From 381ac9dc09e949f313858981c88e0bb0b666acd3 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Mon, 28 Aug 2023 14:54:20 +0800 Subject: [PATCH] format code Signed-off-by: Sun, Xuehao --- .../code-scan-neural-insights.yaml | 2 +- .../code-scan-neural-solution.yaml | 2 +- .azure-pipelines/code-scan.yml | 2 +- .../scripts/codeScan/pydocstyle/scan_path.txt | 23 +- .../scripts/codeScan/pyspelling/inc_dict.txt | 4263 ++++++++--------- .../scripts/models/collect_log_all.py | 29 +- .../scripts/models/collect_log_model.py | 122 +- .../scripts/models/new_benchmark.py | 20 +- .../scripts/models/update_yaml_config.py | 105 +- .azure-pipelines/ut-basic-no-cover.yml | 1 - .azure-pipelines/ut-neural-insights.yaml | 2 +- .github/workflows/publish.yml | 3 +- .pre-commit-config.yaml | 11 +- README.md | 9 +- docs/build_docs/source/conf.py | 66 +- docs/build_docs/source/index.rst | 3 - docs/build_docs/sphinx-requirements.txt | 6 +- docs/build_docs/update_html.py | 44 +- docs/build_docs/update_version.py | 21 +- docs/source/CODE_OF_CONDUCT.md | 2 +- docs/source/FX.md | 1 - docs/source/NAS.md | 14 +- docs/source/adaptor.md | 73 +- docs/source/add_new_adaptor.md | 62 +- docs/source/add_new_data_type.md | 12 +- docs/source/api-doc/adaptor.rst | 2 +- docs/source/api-doc/adaptor/onnxrt.rst | 1 - docs/source/api-doc/adaptor/torch_utils.rst | 2 +- docs/source/api-doc/compression.rst | 2 - docs/source/api-doc/mix_precision.rst | 2 +- docs/source/api-doc/model.rst | 2 - docs/source/api-doc/strategy.rst | 3 +- docs/source/benchmark.md | 4 +- docs/source/dataloader.md | 4 +- docs/source/dataset.md | 8 +- docs/source/diagnosis.md | 28 +- docs/source/distillation.md | 5 +- docs/source/distillation_quantization.md | 1 + docs/source/examples_readme.md | 2 +- docs/source/export.md | 36 +- docs/source/get_started.md | 8 +- docs/source/incompatible_changes.md | 24 +- docs/source/metric.md | 4 +- docs/source/migration.md | 352 +- docs/source/mixed_precision.md | 13 +- docs/source/model.md | 3 +- docs/source/objective.md | 5 +- docs/source/orchestration.md | 2 +- docs/source/pruning.md | 141 +- docs/source/pythonic_style.md | 44 +- docs/source/quantization.md | 123 +- docs/source/quantization_mixed_precision.md | 12 +- docs/source/quantization_weight_only.md | 16 +- docs/source/sigopt_strategy.md | 1 - docs/source/smooth_quant.md | 30 +- docs/source/tensorboard.md | 4 +- docs/source/transform.md | 1 - docs/source/tuning_strategies.md | 41 +- docs/source/user_yaml.md | 1 - docs/source/validated_model_list.md | 1 - neural_coder/__init__.py | 1 + neural_coder/backends/.yaml | 2 +- neural_coder/backends/nano_bf16.yaml | 2 +- .../backends/nano_bf16_channels_last.yaml | 2 +- neural_coder/backends/nano_bf16_ipex.yaml | 2 +- .../nano_bf16_ipex_channels_last.yaml | 2 +- .../backends/nano_fp32_channels_last.yaml | 2 +- neural_coder/backends/nano_fp32_ipex.yaml | 2 +- .../nano_fp32_ipex_channels_last.yaml | 2 +- neural_coder/backends/nano_gpu_to_cpu.yaml | 2 +- neural_coder/backends/nano_int8.yaml | 2 +- neural_coder/backends/nano_jit_bf16.yaml | 2 +- .../backends/nano_jit_bf16_channels_last.yaml | 2 +- neural_coder/backends/nano_jit_bf16_ipex.yaml | 2 +- .../nano_jit_bf16_ipex_channels_last.yaml | 2 +- .../backends/nano_jit_fp32_channels_last.yaml | 2 +- .../nano_jit_fp32_ipex_channels_last.yaml | 2 +- .../backends/nano_onnxruntime_fp32.yaml | 2 +- .../nano_onnxruntime_int8_qlinear.yaml | 2 +- neural_coder/backends/nano_openvino_fp32.yaml | 2 +- neural_coder/backends/nano_openvino_int8.yaml | 2 +- neural_coder/backends/pytorch_aliblade.yaml | 2 +- .../backends/pytorch_inc_static_quant_fx.yaml | 2 +- .../pytorch_inc_static_quant_fx_fp8.yaml | 2 +- neural_coder/backends/pytorch_jit_script.yaml | 2 +- .../backends/pytorch_jit_script_ofi.yaml | 2 +- .../backends/pytorch_mixed_precision_cpu.yaml | 2 +- .../coders/autoinc/autoinc_harness.py | 284 +- .../coders/autoinc/calib_dataloader.py | 29 +- neural_coder/coders/autoinc/domain.py | 30 +- neural_coder/coders/autoinc/eval_func.py | 93 +- neural_coder/coders/pytorch/batch_size.py | 59 +- .../pytorch/change_trainer_to_nlptrainer.py | 15 +- neural_coder/coders/pytorch/cuda_to_cpu.py | 45 +- .../coders/pytorch/dummy_dataloader.py | 57 +- neural_coder/coders/pytorch/harness.py | 227 +- neural_coder/coders/pytorch/lightning.py | 69 +- .../reclaim_inference_transformers_trainer.py | 24 +- neural_coder/coders/pytorch/reclaim_inputs.py | 34 +- neural_coder/coders/tensorflow/amp.py | 23 +- neural_coder/coders/tensorflow/inc.py | 15 +- neural_coder/coders/transform.py | 25 +- .../docs/cloud_autobench/code/resnet50.py | 1 + neural_coder/globals.py | 3 +- neural_coder/graphers/code_line.py | 139 +- neural_coder/graphers/function.py | 57 +- neural_coder/graphers/model.py | 115 +- .../graphers/preloads/transformers.yaml | 2 +- neural_coder/interface.py | 357 +- neural_coder/launcher.py | 54 +- neural_coder/utils/common.py | 3 +- neural_coder/utils/cpu_info.py | 26 +- neural_coder/utils/device.py | 21 +- neural_coder/utils/handle_user_input.py | 29 +- neural_coder/utils/line_operation.py | 37 +- neural_coder/utils/numa_launcher.py | 670 +-- neural_coder/utils/pdf_report.py | 2 +- neural_compressor/__init__.py | 12 +- neural_compressor/adaptor/__init__.py | 2 +- neural_compressor/adaptor/adaptor.py | 228 +- neural_compressor/adaptor/keras.py | 867 ++-- neural_compressor/adaptor/keras.yaml | 36 +- .../adaptor/keras_utils/__init__.py | 1 - .../adaptor/keras_utils/conv2d.py | 119 +- .../adaptor/keras_utils/dense.py | 104 +- .../adaptor/keras_utils/depthwise_conv2d.py | 63 +- .../adaptor/keras_utils/pool2d.py | 73 +- .../adaptor/keras_utils/quantizer.py | 90 +- .../adaptor/keras_utils/separable_conv2d.py | 76 +- neural_compressor/adaptor/mxnet.py | 300 +- neural_compressor/adaptor/mxnet.yaml | 8 +- .../adaptor/mxnet_utils/__init__.py | 3 +- neural_compressor/adaptor/mxnet_utils/util.py | 288 +- neural_compressor/adaptor/onnxrt.py | 1398 +++--- neural_compressor/adaptor/onnxrt.yaml | 10 +- neural_compressor/adaptor/onnxrt_cuda.yaml | 10 +- neural_compressor/adaptor/onnxrt_dml.yaml | 4 +- neural_compressor/adaptor/onnxrt_dnnl.yaml | 8 +- neural_compressor/adaptor/onnxrt_trt.yaml | 2 +- .../adaptor/ox_utils/operators/__init__.py | 2 +- .../adaptor/ox_utils/operators/activation.py | 49 +- .../adaptor/ox_utils/operators/argmax.py | 12 +- .../adaptor/ox_utils/operators/attention.py | 59 +- .../adaptor/ox_utils/operators/binary_op.py | 85 +- .../adaptor/ox_utils/operators/concat.py | 80 +- .../adaptor/ox_utils/operators/conv.py | 137 +- .../adaptor/ox_utils/operators/direct_q8.py | 40 +- .../ox_utils/operators/embed_layernorm.py | 47 +- .../adaptor/ox_utils/operators/gather.py | 62 +- .../adaptor/ox_utils/operators/gavgpool.py | 42 +- .../adaptor/ox_utils/operators/gemm.py | 97 +- .../adaptor/ox_utils/operators/lstm.py | 63 +- .../adaptor/ox_utils/operators/matmul.py | 106 +- .../adaptor/ox_utils/operators/maxpool.py | 31 +- .../adaptor/ox_utils/operators/norm.py | 7 +- .../adaptor/ox_utils/operators/ops.py | 106 +- .../adaptor/ox_utils/operators/pad.py | 35 +- .../adaptor/ox_utils/operators/pooling.py | 63 +- .../adaptor/ox_utils/operators/reduce.py | 37 +- .../adaptor/ox_utils/operators/resize.py | 29 +- .../adaptor/ox_utils/operators/split.py | 71 +- .../adaptor/ox_utils/operators/unary_op.py | 35 +- .../adaptor/ox_utils/smooth_quant.py | 377 +- neural_compressor/adaptor/ox_utils/util.py | 335 +- .../adaptor/ox_utils/weight_only.py | 307 +- neural_compressor/adaptor/pytorch.py | 3188 ++++++------ neural_compressor/adaptor/pytorch_cpu.yaml | 6 +- neural_compressor/adaptor/pytorch_gpu.yaml | 2 +- neural_compressor/adaptor/pytorch_ipex.yaml | 3 - neural_compressor/adaptor/query.py | 27 +- neural_compressor/adaptor/tensorflow.py | 1921 ++++---- neural_compressor/adaptor/tensorflow.yaml | 3 +- .../adaptor/tf_utils/graph_converter.py | 593 +-- .../tf_utils/graph_converter_without_calib.py | 256 +- .../graph_rewriter/bf16/bf16_convert.py | 151 +- .../generic/convert_add_to_biasadd.py | 32 +- .../graph_rewriter/generic/convert_layout.py | 40 +- .../generic/convert_leakyrelu.py | 21 +- .../generic/convert_nan_to_random.py | 21 +- .../generic/convert_placeholder_to_const.py | 43 +- .../generic/dequantize_cast_optimizer.py | 16 +- .../generic/dilated_contraction.py | 35 +- .../graph_rewriter/generic/dummy_biasadd.py | 84 +- .../generic/expanddims_optimizer.py | 22 +- .../generic/fetch_weight_from_reshape.py | 33 +- .../graph_rewriter/generic/fold_batch_norm.py | 114 +- .../graph_rewriter/generic/fold_constant.py | 35 +- .../generic/fuse_biasadd_add.py | 27 +- .../generic/fuse_column_wise_mul.py | 47 +- .../generic/fuse_conv_with_math.py | 33 +- .../generic/fuse_decomposed_bn.py | 98 +- .../generic/fuse_decomposed_in.py | 107 +- .../graph_rewriter/generic/fuse_gelu.py | 52 +- .../graph_rewriter/generic/fuse_layer_norm.py | 34 +- .../generic/fuse_pad_with_conv.py | 45 +- .../generic/fuse_pad_with_fp32_conv.py | 44 +- .../generic/fuse_reshape_transpose.py | 68 +- .../generic/graph_cse_optimizer.py | 32 +- .../graph_rewriter/generic/grappler_pass.py | 23 +- .../generic/insert_print_node.py | 135 +- .../generic/move_squeeze_after_relu.py | 69 +- .../graph_rewriter/generic/pre_optimize.py | 181 +- .../generic/remove_training_nodes.py | 11 +- .../generic/rename_batch_norm.py | 13 +- .../generic/split_shared_input.py | 16 +- .../generic/strip_equivalent_nodes.py | 14 +- .../generic/strip_unused_nodes.py | 10 +- .../generic/switch_optimizer.py | 28 +- .../tf_utils/graph_rewriter/graph_base.py | 5 +- .../graph_rewriter/int8/freeze_fake_quant.py | 77 +- .../graph_rewriter/int8/freeze_value.py | 241 +- .../int8/freeze_value_without_calib.py | 62 +- .../int8/fuse_conv_redundant_dequantize.py | 128 +- .../int8/fuse_conv_requantize.py | 1010 ++-- .../int8/fuse_matmul_redundant_dequantize.py | 138 +- .../int8/fuse_matmul_requantize.py | 691 +-- .../graph_rewriter/int8/meta_op_optimizer.py | 49 +- .../int8/post_hostconst_converter.py | 21 +- .../int8/post_quantized_op_cse.py | 42 +- .../graph_rewriter/int8/rnn_convert.py | 316 +- .../graph_rewriter/int8/scale_propagation.py | 75 +- .../graph_rewriter/onnx/onnx_graph.py | 295 +- .../tf_utils/graph_rewriter/onnx/onnx_node.py | 35 +- .../graph_rewriter/onnx/onnx_schema.py | 10 +- .../graph_rewriter/onnx/tf2onnx_utils.py | 203 +- .../graph_rewriter/qdq/insert_qdq_pattern.py | 560 ++- .../qdq/merge_duplicated_qdq.py | 18 +- .../graph_rewriter/qdq/share_qdq_y_pattern.py | 12 +- .../adaptor/tf_utils/graph_util.py | 363 +- .../quantize_graph/qat/fake_quantize.py | 45 +- .../quantize_graph/qat/quantize_config.py | 40 +- .../quantize_graph/qat/quantize_helper.py | 30 +- .../qat/quantize_layers/optimize_layer.py | 8 +- .../qat/quantize_layers/quantize_layer_add.py | 26 +- .../quantize_layers/quantize_layer_base.py | 17 +- .../qat/quantize_layers/quantize_layer_bn.py | 9 +- .../quantize_graph/qat/quantize_wrapper.py | 78 +- .../quantize_graph/qdq/fuse_qdq_bn.py | 202 +- .../quantize_graph/qdq/fuse_qdq_concatv2.py | 93 +- .../quantize_graph/qdq/fuse_qdq_conv.py | 1380 +++--- .../quantize_graph/qdq/fuse_qdq_deconv.py | 301 +- .../quantize_graph/qdq/fuse_qdq_in.py | 115 +- .../quantize_graph/qdq/fuse_qdq_matmul.py | 606 +-- .../quantize_graph/qdq/fuse_qdq_pooling.py | 35 +- .../quantize_graph/qdq/optimize_qdq.py | 69 +- .../quantize_graph/quantize_graph_base.py | 559 +-- .../quantize_graph/quantize_graph_bn.py | 198 +- .../quantize_graph/quantize_graph_concatv2.py | 37 +- .../quantize_graph/quantize_graph_conv.py | 267 +- .../quantize_graph_for_intel_cpu.py | 53 +- .../quantize_graph/quantize_graph_matmul.py | 216 +- .../quantize_graph/quantize_graph_pooling.py | 26 +- .../adaptor/tf_utils/quantize_graph_common.py | 138 +- .../tf_utils/smooth_quant_calibration.py | 72 +- .../adaptor/tf_utils/smooth_quant_scaler.py | 41 +- .../adaptor/tf_utils/tf2onnx_converter.py | 76 +- .../transform_graph/bias_correction.py | 85 +- .../transform_graph/graph_transform_base.py | 23 +- .../transform_graph/insert_logging.py | 136 +- .../rerange_quantized_concat.py | 276 +- neural_compressor/adaptor/tf_utils/util.py | 293 +- neural_compressor/adaptor/torch_utils/awq.py | 183 +- .../adaptor/torch_utils/bf16_convert.py | 50 +- neural_compressor/adaptor/torch_utils/gptq.py | 337 +- .../adaptor/torch_utils/hawq_metric.py | 100 +- .../layer_wise_quant/modified_pickle.py | 450 +- .../torch_utils/layer_wise_quant/quantize.py | 111 +- .../layer_wise_quant/torch_load.py | 82 +- .../torch_utils/layer_wise_quant/utils.py | 72 +- .../adaptor/torch_utils/mixed_precision.py | 2 + .../adaptor/torch_utils/model_wrapper.py | 197 +- .../adaptor/torch_utils/pattern_detector.py | 34 +- .../adaptor/torch_utils/smooth_quant.py | 478 +- .../adaptor/torch_utils/symbolic_trace.py | 13 +- neural_compressor/adaptor/torch_utils/teq.py | 174 +- neural_compressor/adaptor/torch_utils/util.py | 523 +- .../adaptor/torch_utils/weight_only.py | 288 +- neural_compressor/algorithm/__init__.py | 4 +- neural_compressor/algorithm/algorithm.py | 34 +- .../algorithm/fast_bias_correction.py | 72 +- neural_compressor/algorithm/smooth_quant.py | 20 +- .../algorithm/weight_correction.py | 62 +- neural_compressor/benchmark.py | 206 +- neural_compressor/compression/__init__.py | 2 +- neural_compressor/compression/callbacks.py | 171 +- .../compression/distillation/__init__.py | 4 +- .../compression/distillation/criterions.py | 804 ++-- neural_compressor/compression/hpo/__init__.py | 1 - .../compression/hpo/sa_optimizer.py | 60 +- .../compression/hpo/search_algorithms.py | 141 +- .../compression/hpo/search_space.py | 39 +- .../compression/pruner/README.md | 141 +- .../compression/pruner/__init__.py | 85 +- .../compression/pruner/criteria.py | 65 +- .../compression/pruner/model_slim/README.md | 148 +- .../compression/pruner/model_slim/__init__.py | 2 +- .../pruner/model_slim/auto_slim.py | 40 +- .../pruner/model_slim/pattern_analyzer.py | 290 +- .../pruner/model_slim/weight_slim.py | 138 +- .../compression/pruner/patterns/__init__.py | 11 +- .../compression/pruner/patterns/base.py | 113 +- .../compression/pruner/patterns/mha.py | 12 +- .../compression/pruner/patterns/ninm.py | 72 +- .../compression/pruner/patterns/nxm.py | 134 +- .../compression/pruner/pruners/__init__.py | 24 +- .../compression/pruner/pruners/base.py | 21 +- .../compression/pruner/pruners/basic.py | 40 +- .../compression/pruner/pruners/block_mask.py | 47 +- .../compression/pruner/pruners/mha.py | 72 +- .../pruner/pruners/pattern_lock.py | 8 +- .../compression/pruner/pruners/progressive.py | 63 +- .../pruner/pruners/retrain_free.py | 55 +- .../compression/pruner/pruners/sparse_gpt.py | 68 +- .../compression/pruner/pruning.py | 98 +- neural_compressor/compression/pruner/regs.py | 24 +- .../compression/pruner/schedulers.py | 66 +- .../compression/pruner/tf_criteria.py | 6 +- neural_compressor/compression/pruner/utils.py | 309 +- neural_compressor/conf/__init__.py | 1 - neural_compressor/conf/config.py | 49 +- neural_compressor/conf/dotdict.py | 50 +- neural_compressor/conf/pythonic_config.py | 34 +- neural_compressor/config.py | 858 ++-- neural_compressor/contrib/__init__.py | 3 +- .../contrib/strategy/__init__.py | 4 +- neural_compressor/contrib/strategy/sigopt.py | 177 +- neural_compressor/contrib/strategy/tpe.py | 423 +- neural_compressor/data/__init__.py | 15 +- .../data/dataloaders/__init__.py | 5 +- .../data/dataloaders/base_dataloader.py | 44 +- .../data/dataloaders/dataloader.py | 126 +- .../data/dataloaders/default_dataloader.py | 73 +- neural_compressor/data/dataloaders/fetcher.py | 40 +- .../data/dataloaders/mxnet_dataloader.py | 47 +- .../data/dataloaders/onnxrt_dataloader.py | 104 +- .../data/dataloaders/pytorch_dataloader.py | 39 +- neural_compressor/data/dataloaders/sampler.py | 32 +- .../data/dataloaders/tensorflow_dataloader.py | 273 +- neural_compressor/data/datasets/__init__.py | 6 +- .../data/datasets/bert_dataset.py | 232 +- .../data/datasets/coco_dataset.py | 191 +- neural_compressor/data/datasets/dataset.py | 473 +- .../data/datasets/dummy_dataset.py | 79 +- .../data/datasets/dummy_dataset_v2.py | 157 +- .../data/datasets/imagenet_dataset.py | 124 +- .../data/datasets/style_transfer_dataset.py | 43 +- neural_compressor/data/filters/__init__.py | 3 +- neural_compressor/data/filters/coco_filter.py | 17 +- neural_compressor/data/filters/filter.py | 100 +- neural_compressor/data/transforms/__init__.py | 37 +- .../data/transforms/coco_transform.py | 18 +- .../data/transforms/imagenet_transform.py | 251 +- .../data/transforms/postprocess.py | 5 +- .../data/transforms/tokenization.py | 56 +- .../data/transforms/transform.py | 940 ++-- neural_compressor/experimental/__init__.py | 18 +- neural_compressor/experimental/benchmark.py | 339 +- .../experimental/common/__init__.py | 4 +- .../experimental/common/criterion.py | 874 ++-- .../experimental/common/dataloader.py | 67 +- .../experimental/common/metric.py | 10 +- .../experimental/common/model.py | 35 +- .../experimental/common/optimizer.py | 120 +- .../experimental/common/postprocess.py | 5 +- .../experimental/common/torch_utils.py | 19 +- neural_compressor/experimental/component.py | 261 +- .../experimental/contrib/__init__.py | 3 +- .../experimental/contrib/strategy/__init__.py | 4 +- .../experimental/contrib/strategy/sigopt.py | 142 +- .../experimental/contrib/strategy/tpe.py | 398 +- .../experimental/data/__init__.py | 3 +- .../data/dataloaders/base_dataloader.py | 42 +- .../data/dataloaders/dataloader.py | 32 +- .../data/dataloaders/default_dataloader.py | 69 +- .../experimental/data/dataloaders/fetcher.py | 34 +- .../data/dataloaders/mxnet_dataloader.py | 45 +- .../data/dataloaders/onnxrt_dataloader.py | 100 +- .../data/dataloaders/pytorch_dataloader.py | 37 +- .../experimental/data/dataloaders/sampler.py | 24 +- .../data/dataloaders/tensorflow_dataloader.py | 267 +- .../experimental/data/datasets/__init__.py | 3 +- .../data/datasets/bert_dataset.py | 218 +- .../data/datasets/coco_dataset.py | 185 +- .../experimental/data/datasets/dataset.py | 421 +- .../data/datasets/dummy_dataset.py | 77 +- .../data/datasets/dummy_dataset_v2.py | 153 +- .../data/datasets/imagenet_dataset.py | 110 +- .../data/datasets/style_transfer_dataset.py | 41 +- .../experimental/data/filters/__init__.py | 3 +- .../experimental/data/filters/coco_filter.py | 13 +- .../experimental/data/filters/filter.py | 80 +- .../experimental/data/transforms/__init__.py | 2 +- .../data/transforms/imagenet_transform.py | 223 +- .../data/transforms/tokenization.py | 56 +- .../experimental/data/transforms/transform.py | 904 ++-- .../experimental/distillation.py | 228 +- .../experimental/export/__init__.py | 2 - .../experimental/export/qlinear2qdq.py | 26 +- .../experimental/export/tf2onnx.py | 63 +- .../experimental/export/torch2onnx.py | 208 +- .../experimental/graph_optimization.py | 178 +- .../experimental/metric/__init__.py | 4 +- neural_compressor/experimental/metric/bleu.py | 30 +- .../experimental/metric/bleu_util.py | 48 +- .../experimental/metric/coco_label_map.py | 161 +- .../experimental/metric/coco_tools.py | 511 +- .../experimental/metric/evaluate_squad.py | 47 +- neural_compressor/experimental/metric/f1.py | 69 +- .../experimental/metric/metric.py | 684 +-- .../experimental/mixed_precision.py | 126 +- .../experimental/model_conversion.py | 157 +- .../experimental/nas/basic_nas.py | 81 +- neural_compressor/experimental/nas/dynas.py | 18 +- neural_compressor/experimental/nas/nas.py | 223 +- .../experimental/nas/nas_utils.py | 8 +- .../experimental/nas/search_algorithms.py | 33 +- .../experimental/pruner_legacy/__init__.py | 3 +- .../pruner_legacy/gradient_sensitivity.py | 125 +- .../experimental/pruner_legacy/group_lasso.py | 15 +- .../experimental/pruner_legacy/magnitude.py | 29 +- .../pruner_legacy/pattern_lock.py | 5 +- .../experimental/pruner_legacy/pruner.py | 23 +- .../pruner_legacy/util/block_mask.py | 3 +- neural_compressor/experimental/pruning.py | 224 +- .../experimental/pruning_recipes/__init__.py | 3 +- .../pruning_recipes/patterns/__init__.py | 4 +- .../pruning_recipes/patterns/pattern.py | 27 +- .../pruning_recipes/patterns/tile_pattern.py | 22 +- neural_compressor/experimental/pruning_v2.py | 225 +- .../experimental/pytorch_pruner/logger.py | 2 +- .../experimental/pytorch_pruner/patterns.py | 105 +- .../pytorch_pruner/prune_utils.py | 128 +- .../experimental/pytorch_pruner/pruner.py | 54 +- .../experimental/pytorch_pruner/pruning.py | 23 +- .../experimental/pytorch_pruner/scheduler.py | 18 +- .../experimental/quantization.py | 214 +- neural_compressor/experimental/scheduler.py | 121 +- .../experimental/strategy/__init__.py | 3 +- .../strategy/auto_mixed_precision.py | 83 +- .../experimental/strategy/basic.py | 99 +- .../experimental/strategy/bayesian.py | 135 +- .../experimental/strategy/exhaustive.py | 16 +- .../experimental/strategy/mse.py | 154 +- .../experimental/strategy/mse_v2.py | 145 +- .../experimental/strategy/random.py | 19 +- .../experimental/strategy/strategy.py | 1025 ++-- .../experimental/strategy/utils/constant.py | 36 +- .../strategy/utils/tuning_sampler.py | 256 +- .../strategy/utils/tuning_space.py | 297 +- .../strategy/utils/tuning_structs.py | 40 +- .../experimental/strategy/utils/utility.py | 15 +- neural_compressor/metric/__init__.py | 30 +- neural_compressor/metric/bleu.py | 30 +- neural_compressor/metric/bleu_util.py | 48 +- neural_compressor/metric/coco_label_map.py | 161 +- neural_compressor/metric/coco_tools.py | 511 +- neural_compressor/metric/evaluate_squad.py | 47 +- neural_compressor/metric/f1.py | 69 +- neural_compressor/metric/metric.py | 617 +-- neural_compressor/mix_precision.py | 88 +- neural_compressor/model/__init__.py | 3 - neural_compressor/model/base_model.py | 8 +- neural_compressor/model/keras_model.py | 31 +- neural_compressor/model/model.py | 199 +- neural_compressor/model/mxnet_model.py | 19 +- neural_compressor/model/nets_factory.py | 156 +- neural_compressor/model/onnx_model.py | 267 +- neural_compressor/model/tensorflow_model.py | 611 +-- neural_compressor/model/torch_model.py | 292 +- neural_compressor/profiling/parser/factory.py | 6 +- .../profiling/parser/onnx_parser/factory.py | 4 +- .../profiling/parser/onnx_parser/parser.py | 8 +- neural_compressor/profiling/parser/parser.py | 6 +- neural_compressor/profiling/parser/result.py | 14 +- .../parser/tensorflow_parser/factory.py | 4 +- .../profiling/profiler/factory.py | 12 +- .../profiler/onnxrt_profiler/factory.py | 13 +- .../profiler/onnxrt_profiler/profiler.py | 6 +- .../profiler/tensorflow_profiler/factory.py | 17 +- .../profiler/tensorflow_profiler/profiler.py | 25 +- .../profiler/tensorflow_profiler/utils.py | 1 - neural_compressor/quantization.py | 61 +- neural_compressor/strategy/__init__.py | 3 +- neural_compressor/strategy/auto.py | 85 +- .../strategy/auto_mixed_precision.py | 123 +- neural_compressor/strategy/basic.py | 236 +- neural_compressor/strategy/bayesian.py | 124 +- neural_compressor/strategy/conservative.py | 108 +- neural_compressor/strategy/exhaustive.py | 13 +- neural_compressor/strategy/hawq_v2.py | 73 +- neural_compressor/strategy/mse.py | 176 +- neural_compressor/strategy/mse_v2.py | 110 +- neural_compressor/strategy/random.py | 17 +- neural_compressor/strategy/strategy.py | 1104 +++-- neural_compressor/strategy/utils/constant.py | 52 +- .../strategy/utils/tuning_sampler.py | 291 +- .../strategy/utils/tuning_space.py | 278 +- .../strategy/utils/tuning_structs.py | 43 +- neural_compressor/strategy/utils/utility.py | 43 +- neural_compressor/template/__init__.py | 13 + neural_compressor/template/api_doc_example.py | 42 +- .../template/graph_optimization.yaml | 2 +- neural_compressor/template/pruning.yaml | 5 +- neural_compressor/template/ptq.yaml | 2 +- neural_compressor/template/qat.yaml | 10 +- neural_compressor/training.py | 109 +- neural_compressor/utils/__init__.py | 15 +- .../utils/collect_layer_histogram.py | 16 +- neural_compressor/utils/constant.py | 104 +- .../utils/create_obj_from_config.py | 128 +- neural_compressor/utils/kl_divergence.py | 35 +- neural_compressor/utils/load_huggingface.py | 77 +- neural_compressor/utils/logger.py | 47 +- .../utils/neural_insights_utils.py | 22 +- neural_compressor/utils/options.py | 33 +- neural_compressor/utils/pytorch.py | 374 +- neural_compressor/utils/utility.py | 221 +- neural_compressor/utils/weights_details.py | 1 + neural_compressor/version.py | 1 - neural_insights/README.md | 243 +- neural_insights/bin/neural_insights.py | 2 +- .../components/diagnosis/diagnosis.py | 20 +- .../components/diagnosis/factory.py | 9 +- .../components/diagnosis/weights_details.py | 5 +- neural_insights/components/graph/collapser.py | 6 +- neural_insights/components/graph/graph.py | 3 +- .../components/model/onnxrt/model.py | 25 +- .../components/model/tensorflow/utils.py | 4 +- .../workload_manager/quantization_workload.py | 2 +- .../components/workload_manager/workload.py | 4 +- .../workload_manager/workload_manager.py | 21 +- .../docs/source/onnx_accuracy_debug.md | 51 +- .../docs/source/tf_accuracy_debug.md | 3 +- neural_insights/main.py | 4 +- neural_insights/ni.py | 22 +- neural_insights/requirements.txt | 4 +- neural_insights/utils/consts.py | 1 - neural_insights/utils/exceptions.py | 1 - neural_insights/utils/expiring_dict.py | 2 +- neural_insights/utils/logger.py | 1 - neural_insights/utils/utils.py | 7 +- neural_insights/web/communication.py | 1 - neural_insights/web/configuration.py | 3 +- neural_insights/web/exceptions.py | 1 - neural_insights/web/router.py | 4 +- neural_insights/web/server.py | 1 - neural_insights/web/service/__init__.py | 1 - .../web/service/request_data_processor.py | 1 - .../web/service/response_generator.py | 1 - neural_solution/README.md | 2 - neural_solution/__init__.py | 1 - neural_solution/backend/__init__.py | 1 - neural_solution/backend/cluster.py | 12 +- neural_solution/backend/result_monitor.py | 1 - neural_solution/backend/runner.py | 1 - neural_solution/backend/scheduler.py | 3 +- neural_solution/backend/task.py | 1 - neural_solution/backend/task_db.py | 1 - neural_solution/backend/task_monitor.py | 1 - neural_solution/backend/utils/__init__.py | 1 - neural_solution/backend/utils/utility.py | 1 - neural_solution/bin/__init__.py | 1 - neural_solution/bin/neural_solution.py | 1 - neural_solution/config.py | 1 - neural_solution/docs/source/README.md | 2 +- .../docs/source/description_api.md | 2 +- neural_solution/docs/source/ns_design_doc.md | 1 - .../template/task_request_description.md | 2 +- neural_solution/examples/README.md | 2 +- .../tf_example1/test.py | 14 + neural_solution/examples/hf_models/README.md | 2 +- .../examples/hf_models_grpc/README.md | 2 +- neural_solution/launcher.py | 61 +- neural_solution/requirements.txt | 8 +- neural_solution/scripts/prepare_deps.py | 1 - neural_solution/utils/__init__.py | 1 - neural_solution/utils/logger.py | 1 - neural_solution/utils/utility.py | 1 - neural_solution/version.py | 1 - pyproject.toml | 14 - requirements.txt | 20 +- setup.py | 92 +- .../mxnet_adaptor/test_adaptor_mxnet.py | 438 +- .../mxnet_adaptor/test_mxnet_query_fwk.py | 23 +- .../onnxrt_adaptor/test_adaptor_onnxrt.py | 1180 +++-- .../onnxrt_adaptor/test_onnxrt_augment.py | 557 ++- .../onnxrt_adaptor/test_onnxrt_operators.py | 2254 +++++---- .../test_weight_only_adaptor.py | 101 +- .../test_adaptor_pytorch_1.x.py | 659 +-- .../test_adaptor_pytorch_2.x.py | 294 +- .../test_weight_only_adaptor.py | 425 +- .../tensorflow_adaptor/test_bf16_convert.py | 325 +- .../test_smooth_quant_tf.py | 111 +- .../tensorflow_adaptor/test_tensorboard.py | 205 +- .../test_tensorflow_bias_correction.py | 55 +- ...est_tensorflow_calculate_op_sensitivity.py | 104 +- .../test_tensorflow_convert_layout.py | 33 +- .../test_tensorflow_data_pipline.py | 51 +- .../test_tensorflow_fold_batch_norm.py | 47 +- .../test_tensorflow_fold_const.py | 85 +- .../test_tensorflow_get_estimator_graph.py | 42 +- .../tensorflow_adaptor/test_tensorflow_gpu.py | 74 +- .../test_tensorflow_graph_cac.py | 85 +- .../test_tensorflow_graph_column_wise_mul.py | 20 +- .../test_tensorflow_graph_concat.py | 91 +- ...t_tensorflow_graph_conv_add_relu_fusion.py | 50 +- .../test_tensorflow_graph_conv_as_output.py | 196 +- .../test_tensorflow_graph_conv_fusion.py | 352 +- .../test_tensorflow_graph_conv_math.py | 38 +- .../test_tensorflow_graph_convert_layout.py | 30 +- ...test_tensorflow_graph_convert_leakyrelu.py | 86 +- .../test_tensorflow_graph_convert_nan.py | 26 +- .../test_tensorflow_graph_cse_optimization.py | 58 +- .../test_tensorflow_graph_debug_mode.py | 44 +- ...sorflow_graph_dequantize_cast_optimizer.py | 67 +- .../test_tensorflow_graph_dump_tensor.py | 62 +- ...t_tensorflow_graph_expanddims_optimizer.py | 34 +- ...sorflow_graph_fetch_weight_from_reshape.py | 139 +- .../test_tensorflow_graph_fold_bn.py | 92 +- ...est_tensorflow_graph_fuse_decomposed_bn.py | 368 +- .../test_tensorflow_graph_fuse_gelu.py | 268 +- .../test_tensorflow_graph_input_output.py | 150 +- .../test_tensorflow_graph_insert_logging.py | 74 +- ...test_tensorflow_graph_library_detection.py | 48 +- .../test_tensorflow_graph_matmul_fusion.py | 202 +- .../test_tensorflow_graph_meta_pass.py | 106 +- .../test_tensorflow_graph_pad_conv.py | 90 +- ...test_tensorflow_graph_post_cse_optimize.py | 122 +- .../test_tensorflow_graph_search_patterns.py | 80 +- .../test_tensorflow_graph_switch_optimizer.py | 71 +- .../test_tensorflow_graph_util.py | 90 +- .../test_tensorflow_grappler_pass.py | 25 +- .../test_tensorflow_inspect_tensor.py | 171 +- ...tensorflow_inspect_tensor_in_mse_tuning.py | 85 +- ...test_tensorflow_move_squeeze_after_relu.py | 109 +- .../test_tensorflow_quantize_input.py | 64 +- .../test_tensorflow_query_yaml.py | 108 +- .../test_tensorflow_remove_training_nodes.py | 100 +- .../tensorflow_adaptor/test_tensorflow_rnn.py | 87 +- .../test_tensorflow_set_tensor.py | 127 +- .../test_tensorflow_share_nodes_graph.py | 40 +- .../test_tensorflow_strip_equivalent_nodes.py | 34 +- .../tensorflow_adaptor/test_tf_util.py | 200 +- test/algorithm/modeling_gptj.py | 57 +- test/algorithm/test_algorithm.py | 87 +- test/algorithm/test_layer_wise_quant.py | 32 +- test/algorithm/test_smooth_quant.py | 400 +- test/algorithm/test_smooth_quant_onnx.py | 169 +- test/benchmark/test_benchmark.py | 147 +- test/benchmark/test_benchmark_2.x.py | 102 +- test/config/test_config_1.x.py | 547 ++- test/config/test_config_2.x.py | 43 +- test/config/test_config_regex.py | 104 +- test/config/test_pythonic_config.py | 294 +- test/data/test_dataloader.py | 1577 +++--- test/data/test_exp_dataloader.py | 164 +- test/data/test_exp_transformers.py | 1219 ++--- test/data/test_filter.py | 251 +- test/data/test_tokenization.py | 28 +- test/data/test_transform.py | 1219 ++--- test/distillation/test_distillation_1.x.py | 93 +- test/distillation/test_distillation_2.x.py | 121 +- .../test_self_distillation_2.x.py | 44 +- test/distributed/test_distributed_metrics.py | 84 +- test/distributed/test_distributed_pt_train.py | 42 +- .../test_distributed_tf_dataloader.py | 510 +- test/export/test_onnx_qlieanr_to_qdq.py | 795 +-- test/export/test_torch2onnx.py | 142 +- .../test_graph_optimization.py | 682 ++- test/hpo/test_hpo.py | 56 +- test/ipex/test_adaptor_ipex.py | 202 +- test/itex/test_keras_in_keras_out.py | 115 +- test/itex/test_smooth_quant_itex.py | 117 +- test/itex/test_tensorflow_itex_2.x.py | 51 +- test/itex/test_tensorflow_itex_basic.py | 301 +- ...test_tensorflow_qdq_convert_to_onnx_qdq.py | 99 +- test/metric/test_coco_tools.py | 292 +- test/metric/test_exp_metrics.py | 1073 +++-- test/metric/test_metrics.py | 1003 ++-- test/metric/test_metrics_2.x.py | 977 ++-- test/metric/test_mse.py | 145 +- test/metric/test_mse_metric.py | 112 +- test/metric/test_register_metric_transform.py | 50 +- test/mixed_precision/test_mixed_precision.py | 329 +- .../test_mixed_precision_keras_model.py | 90 +- test/model/test_model.py | 436 +- test/model/test_model_pytorch.py | 77 +- test/model/test_onnx_model.py | 274 +- .../test_tensorflow_auto_input_output.py | 21 +- test/nas/test_nas.py | 74 +- test/neural_coder/test_common.py | 4 +- test/neural_coder/test_line_operation.py | 4 +- test/objective/test_objective.py | 530 +- .../test_gradient_sensitivity.py | 61 +- .../pruning_1.x_v1/test_pattern_lock.py | 21 +- .../test_pruning_experimental.py | 29 +- .../test_pruning_group_lasso.py | 16 +- .../pruning_1.x_v1/test_pruning_pattern.py | 18 +- .../pruning_1.x_v1/test_pruning_pure_yaml.py | 16 +- .../pruning_1.x_v2/test_pruning.py | 35 +- .../pruning_1.x_v2/test_pruning_config.py | 33 +- .../pruning_1.x_v2/test_pruning_criteria.py | 42 +- .../pruning_1.x_v2/test_pruning_patterns.py | 33 +- .../pruning_1.x_v2/test_pruning_regs.py | 25 +- .../pruning_1.x_v2/test_pruning_schedulers.py | 30 +- .../pruning_1.x_v2/test_pruning_types.py | 29 +- .../test_pytorch_pruning_experimental.py | 27 +- .../test_auto_excluding_classifier.py | 13 +- .../pruning_2.x/test_auto_slim.py | 40 +- .../pruning_2.x/test_conv_pruning.py | 54 +- .../pruning_2.x/test_pruning.py | 40 +- .../pruning_2.x/test_pruning_block.py | 47 +- .../pruning_2.x/test_pruning_config.py | 35 +- .../pruning_2.x/test_pruning_criteria.py | 81 +- .../pruning_2.x/test_pruning_patterns.py | 34 +- .../pruning_2.x/test_pruning_progressive.py | 39 +- .../pruning_2.x/test_pruning_regs.py | 43 +- .../pruning_2.x/test_pruning_schedulers.py | 32 +- .../pruning_2.x/test_pruning_types.py | 32 +- .../pruning_2_plus.x/test_pruning.py | 33 +- .../pruning_2_plus.x/test_pruning_block.py | 29 +- .../test_pruning_retrain_free.py | 45 +- .../test_pruning_sparsegpt.py | 35 +- .../test_tensorflow_distributed_pruning.py | 62 +- .../pruning_1.x_v1/test_tensorflow_pruning.py | 295 +- .../test_tensorflow_pruning_utility.py | 38 +- .../pruning_1.x_v2/test_tensorflow_pruning.py | 295 +- .../pruning_2.x/test_pruning_keras.py | 68 +- test/quantization/test_quantization.py | 401 +- test/quantization/test_tensorflow_qat.py | 112 +- test/quantization/test_tensorflow_recipe.py | 226 +- test/quantization/test_tensorflow_recover.py | 116 +- .../test_weight_only_quantization.py | 149 +- test/requirements.txt | 24 +- test/scheduler/test_oneshot.py | 218 +- test/scheduler/test_orchestration.py | 41 +- test/scheduler/test_scheduler.py | 140 +- test/strategy/test_basic.py | 127 +- test/strategy/test_basic_1.x.py | 136 +- test/strategy/test_bayesian.py | 254 +- test/strategy/test_bayesian_1.x.py | 250 +- test/strategy/test_distributed_tuning.py | 57 +- test/strategy/test_exhaustive.py | 58 +- test/strategy/test_exhaustive_1.x.py | 64 +- test/strategy/test_hawq_v2_2.x.py | 38 +- test/strategy/test_lower_bit_sampler.py | 61 +- test/strategy/test_mse.py | 242 +- test/strategy/test_mse_1.x.py | 239 +- test/strategy/test_mse_v2.py | 105 +- test/strategy/test_mse_v2_2.x.py | 177 +- test/strategy/test_new_datatype.py | 61 +- test/strategy/test_quant_level.py | 357 +- test/strategy/test_random.py | 60 +- test/strategy/test_random_1.x.py | 59 +- test/strategy/test_sigopt.py | 119 +- test/strategy/test_sigopt_1.x.py | 101 +- test/strategy/test_tpe.py | 69 +- test/strategy/test_tpe_1.x.py | 70 +- test/strategy/test_tuning_sampler.py | 307 +- test/strategy/test_tuning_sampler_1.x.py | 266 +- test/strategy/test_tuning_space.py | 292 +- test/strategy/test_tuning_space_1.x.py | 354 +- test/strategy/test_tuning_space_v2.py | 416 +- test/strategy/test_tuning_space_v2_1.x.py | 288 +- test/strategy/test_utility.py | 7 +- test/tfnewapi/test_smooth_quant_newapi.py | 111 +- .../test_tensorflow_bias_correction.py | 180 +- .../test_tensorflow_fuse_reshape_transpose.py | 61 +- ...est_tensorflow_graph_biasadd_add_fusion.py | 71 +- .../test_tensorflow_graph_conv_fusion.py | 518 +- ...tensorflow_graph_conv_requantize_fusion.py | 720 +-- ...t_tensorflow_graph_depthwiseconv_fusion.py | 205 +- ..._graph_dequantize_cast_optimizer_newapi.py | 67 +- .../test_tensorflow_graph_dq_cast_fusion.py | 43 +- .../test_tensorflow_graph_fuse_gelu_newapi.py | 264 +- ...est_tensorflow_graph_fuse_pad_conv_fp32.py | 61 +- .../test_tensorflow_graph_qdq_bn_fusion.py | 230 +- ...test_tensorflow_graph_qdq_concat_fusion.py | 160 +- ...test_tensorflow_graph_qdq_conv3d_fusion.py | 667 +-- .../test_tensorflow_graph_qdq_conv_fusion.py | 548 ++- ...nsorflow_graph_qdq_depthwiseconv_fusion.py | 225 +- ...test_tensorflow_graph_qdq_matmul_fusion.py | 511 +- ...st_tensorflow_graph_qdq_new_conv_fusion.py | 87 +- ...est_tensorflow_graph_qdq_pooling_fusion.py | 83 +- .../test_tf_spr_base_distributed_metrics.py | 73 +- .../test_tf_spr_base_distributed_pruning.py | 62 +- ...t_tf_spr_base_distributed_tf_dataloader.py | 509 +- test/utils/test_huggingface.py | 35 +- test/utils/test_layer_histogram.py | 46 +- test/utils/test_logger.py | 14 +- 790 files changed, 55606 insertions(+), 48868 deletions(-) diff --git a/.azure-pipelines/code-scan-neural-insights.yaml b/.azure-pipelines/code-scan-neural-insights.yaml index 65fd843cc5d..532bcdfc581 100644 --- a/.azure-pipelines/code-scan-neural-insights.yaml +++ b/.azure-pipelines/code-scan-neural-insights.yaml @@ -80,7 +80,7 @@ stages: source $(Build.SourcesDirectory)/.azure-pipelines/scripts/change_color.sh set -e mkdir -p $(Build.SourcesDirectory)/$(CODE_SCAN_LOG_PATH) - RESET="echo -en \\E[0m \\n" # close + RESET="echo -en \\E[0m \\n" # close supported_extensions=(py, sh, yaml) git --no-pager diff --name-only $(git show-ref -s remotes/origin/$(System.PullRequest.TargetBranch)) $(Build.SourcesDirectory)/neural_insights > $(Build.SourcesDirectory)/$(CODE_SCAN_LOG_PATH)/diff.log diff --git a/.azure-pipelines/code-scan-neural-solution.yaml b/.azure-pipelines/code-scan-neural-solution.yaml index 6485f0be6dc..77e4e54962b 100644 --- a/.azure-pipelines/code-scan-neural-solution.yaml +++ b/.azure-pipelines/code-scan-neural-solution.yaml @@ -80,7 +80,7 @@ stages: source $(Build.SourcesDirectory)/.azure-pipelines/scripts/change_color.sh set -e mkdir -p $(Build.SourcesDirectory)/$(CODE_SCAN_LOG_PATH) - RESET="echo -en \\E[0m \\n" # close + RESET="echo -en \\E[0m \\n" # close supported_extensions=(py, sh, yaml) git --no-pager diff --name-only $(git show-ref -s remotes/origin/$(System.PullRequest.TargetBranch)) $(Build.SourcesDirectory)/neural_solution > $(Build.SourcesDirectory)/$(CODE_SCAN_LOG_PATH)/diff.log diff --git a/.azure-pipelines/code-scan.yml b/.azure-pipelines/code-scan.yml index 92e08366391..4401183ab57 100644 --- a/.azure-pipelines/code-scan.yml +++ b/.azure-pipelines/code-scan.yml @@ -79,7 +79,7 @@ stages: source $(Build.SourcesDirectory)/.azure-pipelines/scripts/change_color.sh set -e mkdir -p $(Build.SourcesDirectory)/$(CODE_SCAN_LOG_PATH) - RESET="echo -en \\E[0m \\n" # close + RESET="echo -en \\E[0m \\n" # close supported_extensions=(py, sh, yaml) git --no-pager diff --name-only $(git show-ref -s remotes/origin/$(System.PullRequest.TargetBranch)) $(Build.SourcesDirectory)/neural_compressor > $(Build.SourcesDirectory)/$(CODE_SCAN_LOG_PATH)/diff.log diff --git a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt index 13d8f5727ef..b524f1f61db 100644 --- a/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt +++ b/.azure-pipelines/scripts/codeScan/pydocstyle/scan_path.txt @@ -1,18 +1,17 @@ -/neural-compressor/neural_compressor/experimental -/neural-compressor/neural_compressor/contrib -/neural-compressor/neural_compressor/strategy -/neural-compressor/neural_compressor/algorithm -/neural-compressor/neural_compressor/model -/neural-compressor/neural_compressor/utils -/neural-compressor/neural_compressor/pruner /neural-compressor/neural_compressor/adaptor/mxnet_utils /neural-compressor/neural_compressor/adaptor/ox_utils -/neural-compressor/neural_compressor/adaptor/tf_utils /neural-compressor/neural_compressor/adaptor/tensorflow.py -/neural-compressor/neural_compressor/training.py +/neural-compressor/neural_compressor/adaptor/tf_utils +/neural-compressor/neural_compressor/algorithm /neural-compressor/neural_compressor/benchmark.py -/neural-compressor/neural_compressor/quantization.py -/neural-compressor/neural_compressor/objective.py /neural-compressor/neural_compressor/config.py +/neural-compressor/neural_compressor/contrib +/neural-compressor/neural_compressor/experimental /neural-compressor/neural_compressor/mix_precision.py - +/neural-compressor/neural_compressor/model +/neural-compressor/neural_compressor/objective.py +/neural-compressor/neural_compressor/pruner +/neural-compressor/neural_compressor/quantization.py +/neural-compressor/neural_compressor/strategy +/neural-compressor/neural_compressor/training.py +/neural-compressor/neural_compressor/utils diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 4ab625b8e51..d9e062b0240 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -1,150 +1,80 @@ -aa -aac -aae -abc +ABI +ACDC +ADDR +ADE +AGS +ALLREDUCE +AMD +AMX +APIs +APl +APm +APs +ASPLOS +AVX +AWQ +AWS +AWSSageMakerSupport Abc AbcAdaptor AbcTuneStrategy -abeja -abi -ABI -absl -abspath -abstractive -acc Acc -accuracies -acdc -actorder -ACDC +AccuracyCriterion +AccuracyLoss Acknowledgement -activations Adadelta -adam AdamW -adaptor Adaptor AddEmbeddings AddN -addr -ADDR AddV -ade -ADE -adresses AdvProp -ae -aea -af -AGS -ai -aia -ailab -al -albert -alexnet +Affine +Ajanthan AlexNet -algo -algos -alibaba Alibaba AlignImageChannel -allenai -alloc -ALLREDUCE -alsologtostderr +Alireza Aman -amazonaws -amazonlinux Amodei AmpConf -AMX -amx -analytics Analytics Anastasiia AnchorGenerator -andravin -andreamad -anisotropic -anno -anton -ap -apache -api -APIs -APl -APm -approch -APs -arg -argmax +ArcFace ArgMax -args -arxiv -arXiv -asd -astype -asym -async -atrous -att +Arial AttentionReshape -attr -attredirects AttributeProto -attrs -auc -aug -autgrad -autogenerate -autograd +AutoINC AutoMixPrecision AutoModelForSequenceClassification -autopep -Autoregressive +AutoQuant AutoTokenizer -ava +Autoregressive AverageMeter -avgloss AvgPool -avx -AVX -backend -backends -backticks -bart -barthez -bashrc -basicConfig +BERT's +BERTDataSet +BFP +BGR +BLAS +BLEU +BNInception +BV +BasicNAS BasicTokenizer -batchgenerators BatchMatMul BatchMatMulV -batchnorm BatchNorm -bayesian BayesianOptimization -bazel -bbbb -bbox -bboxes -bccf -bce -bd -bdb -bdist -benchmarked -benchmarking +BenchmarkConf +BenchmarkConfig Benchmarking Bengio Benoît -berkeleyvision -bert -BERT's BertAdam BertConfig -BERTDataSet BertForMaskedLM BertForNextSentencePrediction BertForPreTraining @@ -152,2578 +82,2635 @@ BertForQuestionAnswering BertForSequenceClassification BertForTokenClassification BertModel -berts -bertsquad BertTokenizer -bfloat -blockwise -BFP -BGR +BiDAF +BiLSTM Bianchi BiasAdd BibTeX -bicubic -bilibili +BigDL +BigDLNanoSupport BiliBili -bilinear BilinearImagenet -billsum -BiLSTM -binarize -binarized BinaryAdd -biomedical Biomedical -BLAS -blendcnn -BlendCnn +BladeDISC BlendCNN -bleu -BLEU -blocktime -blogpost -bn -bninception -BNInception -bobw -booktitle -bool -Boudoukh +BlendCnn BoW -boxlist +Boudoukh BoxList -br -BrainTumour BraTS -broadcasted -bs -bsnone -bugfix -buildin -builtin +BrainTumour +Btg Builtin -BV -bvlcalexnet -bzip -cadene +CCE +CCFF +CCL +CERN's +CFLAGS +CHANGELOG +CHZ +CIFAR +CLA +CLI +CLM +CLX +CMAKE +CMU +CMake +CMakeLists +CNWXA +COCODataset +COCODemo +COCOEval +COCONpy +COCORaw +COCORecord +COCOmAP +COCOmAPv +CONDA +CPUExecutionProvider +CPUs +CPX +CPz +CTRLModel +CTRLTokenizer +CUDAExecutionProvider +CUDAToolKit +CUHK +CVF +CXX +CYP Cadene -caffe Caffe -caffenet -Caffenet -cafferesnet -CaffeResnet +CaffeNet CaffeResNet +CaffeResnet +Caffenet Caiming -calib -calibrationcsv -camembert CamemBERT -canada Carbonell CascadeFullRes -cbica -cd -cded -cdn -ce -cec -CenterCrop -centernet -centerNet -centos CentOS +CenterCrop Centre -cern -CERN's -certfile Cesa -cfa -cffi -cfg -CFLAGS ChamNet +Changelog +Chatbot Chaumond -channelx -checkbox -checkboxes Cheng -chmod Cho's -chongruo Chongruo -chris Chuanqi -ci -cifar -CIFAR -circleci Cistac -cityscapes Cityscapes -cityscapesscripts -cityscapesScripts -cknowledge -ckpt -ckpts ClassPredictionTower -clcarwin Clergerie -cli -CLI -clipnorm -clm -CLM -cls -CLX -cly -cmake -CMake -CMAKE -CMakeLists -cmd -CMU -cn -cnn -cnt CoCo -cocoapi -cocoApi -cocodataset -COCODataset -COCODemo -COCOEval -COCOmAP -COCOmAPv -COCONpy -cocoraw -COCORaw -COCORecord -codalab -codecogs -codenamed CoLA -colorama +CoNLL +CodeGenerator +CodeXGLUE ColorJitter -colspan -compat -compilervars -concat +CompVis ConcatV -cond -conda -CONDA -condconv CondConv Condensenet -conf -config Config -configs -CoNLL Conneau -const -ConstantOfShape ConstDataLoader -constexpr -contaning -conv +ConstantOfShape Conv ConvBNReLU -ConvertingSSDMobilenetToONNX -convertion ConvNets -convolutional -Convolutional ConvPerStage ConvReLU -cooldown -copt -coreml +ConvertingSSDMobilenetToONNX +Convolutional CoreML -cp -cpp -cpu -cpus -CPUs -CPX -cpython -creafz -creatis -creativecommons -criteo Criteo CriteoTerabyte -croping CropResize CropToBoundingBox CrossEntropyLoss -crossvalidaton -crt -csv -ctrl -CTRLModel -CTRLTokenizer -ctuning -ctx -cuda -cudaPopCallConfiguration -cudatoolkit -CUDAToolKit -cudnn -CUHK -curr Curran -customised -custormer -cv +CustomObj CvAClvFfyA -CXX -cxxopt -cypw -cython -da -dae +DBMDZ +DCMAKE +DDP +DDR +DENABLE +DFS +DFabiansResUNet +DGAN +DKFZ +DLRM's +DMQA +DNNL +DPNs +DUC +DUNet +DUnetCNN DagnyT Dai -dailymail Danqi -darknet Darknet -datadir -datafile -dataloader -dataLoader DataLoader DataLoadermodule -dataloaders DataParallel -datapoints DataProcessor -dataset Dataset DatasetAnalyzer -datasets -datatype -datatypes -dathath Dathathri -datset -dbmdz -DBMDZ -dbox -dbs -DCMAKE -dcn -ddp -DDP -DDR -de -deberta -decapoda +Dbzg +DeBERTa +DeQuantStub +DeQuantize DecodeImage -deepengine -deeplab DeepLab -deeplabv DeepLabV DeepLearningExamples Delangue -DENABLE -denseblock -denselayer -densenet DenseNet -deps DepthwiseConv -dequant -dequantize -DequantizeLinear DequantStub -DeQuantStub -desc -dest -destructor -detections -detectron +DequantizeLinear Detectron -dev -devel +Dettmers +DevCloud Devlin -devtool -DFabiansResUNet -dfb -DFS -DGAN -dialogpt DialoGPT -dicts -dir -dirname Discrim -distil Distil -distilbert -DistilBert -DistilBERT DistilBERT +DistilBert DistilBertModel DistilBertTokenizer -distilgpt DistilGPT -distillated +DistilRoBERTa Distillated -distillating +DistillationConf +DistillationConfig DistilmBERT -distilrobert -distilroberta -DistilRoBERTa DistributedDataParallel DistributedOptimizer DistributedSampler -distro -dividiti Djamé -DKFZ -dl -dlabel -dlboost -dlrm -DLRM's -dmlc -DMQA -dNative -dnf -dnn -dnnl -DNNL DnnlExecutionProvider Dockerfile -doclist -docstrings -doctrings -docutils -doteq -dowmsampling -downloader -downsampled -downsampling -doxygen -dpn -DPNs -dpr DropOut -ds -dscore -dst -dtype DualPathNet -dualpathnetworks DualPathNetworks DummyDataLoader -dunet -DUNet Dupont Durand -dvdt -dw -dynamiccaly -ead +DyNAS +DyNas EAQkaohzrJbd -earlystop -eb -ece -ecotrust -edgetpu +EMC EdgeTPU -edu -ee -eer -ef -efficientnet -efficientNet +Edouard EfficientNet EfficientNets -eg -eightbit -einstein -el EleutherAI -elif -eltwise -emb -embeddings -embs -EMC -enablerepo +EmbedLayerNormalization EncodeJped -enfr -eng -ensembling -ensp -entrypoint -enum -env -environ -ep -eps -eq -erf Erf -Éric -eriklindernoren Errno -esri -et -eval -evaluator -evel -exemplarily -exising -existing -exmaple -expanduser ExperimentPlanner ExperimentPlanners -extractive +Extensibility EzjbRL -fabian +FAC +FBNet +FBResNet +FCN +FERPlus +FFFFFF +FLOPs +FP +FPN +FQqOuW +FRN +FUNSD +FWK FabiansUNet -facebook FaceBook -facebookresearch -fairseq -fallbacks -fanout -faq +FakeQuant Farhadi FashionMNIST -FasterRCNN FastFormers -fastrcnn -fatihcakirs -favourably -fb -fbgemm -FBNet -fbnetc -fbresnet -FBResNet -fc -fcn -FCN -fd -fdbf +FasterRCNN FeatureExtractor -feedbacks Feng -ffc -filename -filenames FileNotFoundError -filepath -filesystem -finbert -finetune Finetune -finetuned -finetuning -flac FlatMapDataset -flaubert -flavour -flavours +FlauBERT Flavours -floatfunctional FloatFunctional FloatTensor -FLOPs Florian -fmfn -fmt -fmtstr -fn -fname -fns -foregound -fp -FP -fpic -fPIC -fpn -FPN -FRN +FrameworkModel +Frantar FromConfig -frontend -fstack -ftfy Fu -fullres -func -functionalities -functionet -functools Funtowicz -fvcore -fw -FWK -fx -GameAI +FusedConv GANs +GCP +GEMM +GFLOPs +GLIBCXX +GLOG +GLUE +GN +GPG +GPTJ +GPTQ +GPUs +GQm +Galata +GameAI Garnett -gcc -gchhablani -gclient -gd -geffnet -gelu Gelu GeluOperator GenEfficientNet GenericPreprocessor -german -germeval GermEval -gestaltit -getitem -getsize GetStrides -GFLOPs -gh -gid Gimpel Girshick -github GitHub -githubusercontent -gitmodules -GLIBCXX -GLOG -GLUE -gluebenchmark -gluepy -gluon +GlobalAveragePool Gluon -gluoncv GluonCV -gluonnlp -gn -GN -goldsborough -goog -google -googleapis -googleblog -googlenet -googlesource +GoogleNet +Governers Goyal -gpg -GPG -gpt -GPTJ -gpu -gpus -GPUs -graphdef GraphDef GraphModule +GraphModules GraphProto Grauman -grpc -gtFile -gtFine +Graviton +Guangxuan Gui Guillaume Guoming -gz -gzY +HAWQ +HBM +HOROVOD +HPO +HTnwXegLGNAtw +HWs +HYPJUDY Haibin -haibinlin Haihao -hangzhang -hardcoding +Hanwen HasAns -hawq -HAWQ HdQ -heatmaps Hein -helloworld HelloWorld -henson -hiddenlayer -hippocampus Hippocampus HistogramObserver -hlu -horovod Horovod -HOROVOD -horovodrun -hostfile Hounsfield -howpublished -hyp HqEgzS -href -html -http -https Hu -hubert -huggingface +HuBERT HuggingFace HuggingFace's HuggingFacesTS -hujie -hvd HybirdBlock HybridBlock -hyperparameter -hyperparameters -icc +IC ICCV -Icelake -icpc -icx -ide -idx -ie IEEE ILSVR -ilsvrc ILSVRC -Ilya -im -imagecocodataset -ImageFolder -ImageList -imagenet +IML +IMS +INTRA +IOMP +IPEX +IRQ +ISA +Icelake +Ilya +ImageFolder +ImageList ImageNet -ImagenetRaw ImageRecord ImageRecordIter -imagesTr -imagesTs -img -imgrec -imgs -imgx -IML -impl +ImagenetRaw ImportError -IMS -ibean -inceptionresnetv InceptionResNetV -inceptionv +InceptionResnetV InceptionV -incollection IndexType -indexValue -indices -indico -inferencer -informations -infos -init InnerProduct -innersource -inp -inplace -inproceedings -inputcsv InputData InputExample InputFile Inria -insa -instanceonly -instantiation -integerops -intel -intelai IntelAI -interoperability -introudces -ints -inturn +IntelCaffe +IntelDevTools +IntelNeuralCompressor +IntelON +IntermediateLayersKnowledgeDistillationLossConfig InvertedResidual -io -ios -iOS -iou IoU -ipc -ipex -IPEX -ipynb -ipynbrun -ipython -ir -irv -ISA Isensee -isinstance -issuecomment IssueQuery IssueQueryThreads -iter IteratorGetNext -iters -intrinsics -Jäger -japanese -jemalloc +JIRA +JPEGImages +Javascript Jens +Ji Jie -jim Jingfei Jiong -JIRA -jit -jitter Joshi -jpeg -JPEGImages -jpg -jpwarren -json -jsons Julien JunWang -jupyter -kaggle -kaggleAdDisplayChallenge -kaiming +JupyterLab +JupyterLab's +Jäger +KH +KMP KaimingHe Karthik -kcho -keepbs -keepdim -keras Keskar -keyfile -keypoint Keypoint -kimiyoung -kitti -kmp -KMP KnowledgeDistillationLoss -kriz -kwargs +KnowledgeDistillationLossConfig +Kullback Kyunghyun +LLM +LLMs +LMHeadModel +LOADGEN +LOC +LOCderiv +LOCpart +LOGLEVEL +LPOT +LPOT's +LSVRC +LTS +LaTeX LabelBalance LabelShift -labelsTr Lample Lan -lang LanguageModeling Lapata -Larochelle Larey +Larochelle LastLayerShape -latencies -LaTeX Lavin -layernorm LayerNorm -layoutlm -ld -len +LayoutLM +LayoutLMv +LeakyRelu +Lecun +Leibler LessEqual -lf -lfaidata -lfs -li -libdeep -libengine -libffi -libGL -libglib -libiomp -libmlperf -librispeech +LiTS LibriSpeech -librosa -libsndfile -libstdc -libz -licence -liKE Limitting -lin -linkopt -linoxide -linux -linuxfoundation +LinkedIn ListDataset -LiTS Liu Liu's -llvmlite -lm -LMHeadModel -ln -loadgen LoadGen -LOADGEN LoadGen's +LoadImage LoadgenAPI LoadgenAPITestSettings LoadgenVersion -LoadImage -LOC -localdisk -localhost -LOCderiv -LOCpart -logdir -logfile -login -logits -LOGLEVEL LogSettings -logtostderr -longformer -lossy +LokuUdeVg Louf LowPrecisionInferenceTool -lowproposals -lowres Lp -lpot -LPOT -LPOT's -lr -lS -LSVRC -lt -LTS -lua Luan -lutzroeder -lyon Lysandre -lzma -macOS MACOSX +MALLOC MAdds +MICCAI +MKL +MLAS +MLPerf +MLefficiency +MLperf +MMLAB +MNASNet +MNIST +MNLI +MRPC +MSD +MSE +MSELoss +MSFT +MSR +MULTISCALE +MXNet +MYTASK +MYTASKNAME Madotto MagnitudePrunePolicy Maier -mainpage +MakeIterator Makefile MakefileGnProj -MakeIterator Mandar Manmatha -manylinux -mAp -mAP Mapillary -marianmt +MarkDown MaskPostProcessor -maskrcnn MaskRCNN MaskRCNNFPNFeatureExtractor -maskrnn -massa Massa -matcher -matmul MatMul +MatMulInteger +MatMulIntegerToFloat MatMulWithBias MatMulWithBiasAdd MatMulWithBiasGelu MatMulWithBiasTanh -matplotlib -matricses -maxdepth -maxindrange -maxk MaxPool -maxSizeInComplete -mbart -mBERT -mcc McCann -mcordts -md +McKinstry MeanSquaredError -measurer Medcial -medicaldecathlon -meetup -mem -membind -mems -messi -metabuild -metadata -metamind -MICCAI -microsoft -miguelgrinberg -Mingda -minibatch -minilm -minimalistic -minival -minloglevel -minmax +Migacz +MinMax MinMaxObserver -mins -mIoU -mIOU +Mingda +MiniLM Mirella -misalignments -miscs Mish -missmatches -MixedConv -mixnet MixNet -mixup -mkdir -mkl -MKL -mlap -mlas -MLAS -mlcommons -mll -mlm -mlp -mlpc -mlperf -MLperf -MLPerf -mlt -mmdetection -mmlab -MMLAB -mnasnet -MNASNet -mnist -MNIST -mnli -MNLI -mobilebert +MixedConv +MixedPrecision +MixedPrecisionConfig MobileBERT -mobilenet MobileNet -mobilenetv -Mobilenetv -MobilenetV -MobileNetv MobileNetV -modalities +MobileNetv +MobilenetV +Mobilenetv Modalities -modality ModelConversion -modelfeatures -modelforward -modelinput -modellogits -modelmean -modelsize -modelstd +ModelProto +ModelSize ModelTC +ModelZoo ModuleDict ModuleNotFoundError Molino -mpi -mrcnn -mrpc -MRPC -MSD -mse -MSE -msvc -mul -mult -multi +Moshe Multi -multiclass -multilabel -multinli MultiNLI -multiscale -MULTISCALE MultiStream MultiStream's MultiStreamFree -mutli -mv -mx -mxnet MxNet -MXNet MyDataset +MyLearning +MyMetric Mykhailo Myle -MyMetric -myModel -MYTASK -MYTASKNAME -Naman -namedtuple -nanohanno -Narasimhan NAS -nasnet +NASBase +NASConfig NASNet -nasnetalarge -nasnetamobile -nb -nbest -nbsp -nc NCCL -nchw NCHW -nd -ndarray NDArray -nderlu -NeelNanda -neox -nepoch -ner NER -nervanasystems -nesterov -NetEase -netron -Netron -networkbuilders -NeurIPS -neval -NewMetric -newstest -nextplatform -ng -ngatang NGPUS -ngram NHWC -ni NIC -nifti -niftis -nii -Nijmegen -Nitish -nl NLG -nli -nll -nlp NLP -nlpyang -nltk +NLPToolkit NLU -nm -nms -nn -nnodes -nnu -nnU -nnunet -nnUnet -nnUNet -nnUNetPlansv -nnUNetTrainer -nnUNetTrainers -nnUNetTrainerV NNZ -noduplicates -NoisyStudent -Nonlinearity -NonNestedTuple -NoNormalization -NonZero -noobj -np -nproc -npy -npz -nq -nrix -ns -nsample -nsamples -nsdf -nSsKchNAySU -nthreads -ntrain -num -numactl -numba -numCompleteThreads -numerics -numpy -numTest -numTraining +NPM +NUMA NVAITC -nvcc -nvidia NVIDIA NVIDIA's -nvme -nw +NVidia +Naman +Namhoon +Nano +Narasimhan +NeelNanda +NetEase +Netron +NeurIPS +NeuralCompressor +NewDataloader +NewMetric +NextPlatform +Nezha +Nijmegen +Nitish +NoNormalization +NodeJS +NoisyStudent +NonNestedTuple +NonZero +Nonlinearity +NormalFloat +Nsh +Ntsk Nx NxM -nyu -oc -ok -ol -Omer +OC OMP -onboarding -oneapi -oneAPI -onednn -oneDNN -oneshot -onlinedocs -onnx ONNX +ONNXCommunityMeetup +ONNXConfig ONNXQuantizer -onnxrt ONNXRT -onnxruntime -OnnxRuntime -oob +ONNXRTAdaptor OOM OOQtYMH -openai -OpenAI -OpenAI's -OpenAIAdam -OpenAIGPTModel -OpenAIGPTTokenizer -opencv -OpenCV -openmp -openslr -opensource -openssl -openvino -OpenVINO -openvinotoolkit -OpenWebTextCorpus -OperatorConfig OPs -opset -opsetid -optim -optimizations -Optimizations -optimizers -Optimizers -optypewise -opwise -OrderedDict ORGderiv ORGpart -os -osJJ OTH OTHderiv OTHpart +OaaS +Ofir +Omer +OnnxRuntime +OpenAI +OpenAI's +OpenAIAdam +OpenAIGPTModel +OpenAIGPTTokenizer +OpenCV +OpenMP +OpenVINO +OpenWebTextCorpus +OperatorConfig +Optimizations +Optimizers +OrderedDict Ott -oup -outdir Outlier -outliers OutputData -outputfile -ov -overfeat -overfit -overfitted -PaddingSequence +PERderiv +PERpart +PIL +PLM +PLg +PNASNet +POC +PPLM +PQ +PR +PRETAINED +PRs +PTQ +PWC +PWD +PWDEBUG +PYTHONPATH +PZ PaddingSequence -pageId -palletsprojects -panoptic Panoptic -paperswithcode -param -parametrization -params Parinov ParseDecodeImagenet ParseDecodeVoc -participations Parzen -pastebin -patientIDs -pb -pbar -pdf Peason -pegasus -pelee -peleenet PeleeNet Penghui Pengxin -pepy PerChannelMinMaxObserver -PERderiv -perf -perftests -PERpart -phrasebank -phy -physcpubind PhYUmn Piero Pierric -PIL -pixAcc Piyush -pjreddie -pkill -pkl -pky -plm -PLM -pls -pnasnet -PNASNet -png -POC -polynet PolyNet +Ponte Pooler -pos -postprocesing -postprocess -postprocessed -postprocessing +PostPostTrainingQuantConfig PostProcessor +PostTrainingQuantConfig PostTransform PowerTools -pplm -PPLM -PQ -pre -prebuild -prebuilt +PreSumm Prec -precisions -pred -preds -preformance Preload -preprint -preprocess -preprocessed -preprocesses -preprocessing -preprocessor +Preloading Preprocessor PreprocessorFor Preprocessors -prerelease -PreSumm -pretrain -pretrained -pretrainedmodels -pretraining -prev -prioritizies -probs -proc -productizing -profilings ProgressBar -proto Protobuf -protoc -protractortest -PRs PrunePolicy -pth -ptq -PTQ -ptr -pudae -pw -PWC -pwd -PWD -px -py -pybind -pycocotools -pyguide -pylint -pymodule -pymoo +Pruning's +PruningConf PyObject -pypi PyPI +PyPi PySUT -pytest -PythonAPI -PYTHONPATH -pytorch PyTorch -pytorchic PyTorchKnowledgeDistillationLoss -pyyaml PyYAML -PZ -qat +PythonAPI +PythonLauncher QAT -qconfig QConfig -QiaoranC -qint -qlinear +QDQ +QIntegerops QLinear -qlinearops -QnA -qnli +QLinearOps +QLinearOpsAdaptor +QLinearops +QLoRA QNLI -qps +QOperator QPS -qqp QQP -qscheme -qsl QSL -qtcreator -qtype -quant -quantile -quantizable +QiaoranC +Qlora +QnA +QuaLA +QuantConf +QuantStub Quantizable -quantization Quantization -quantize -quantized +QuantizationAwareTrainingConfig +QuantizeLinear QuantizedConv QuantizedConvReLU QuantizedInput -quantizer -quantizes Quantizes -quantizing -QuantStub QueryBackendCapability QuerySampleComplete QuerySampleLibrary -quickstart -Quickstart QuickStart +Quickstart Quoc R'emi +README +RESTful +RFB +RGB +RMSE +RNN +ROC +RPN +RPNHead +RPNPostProcessor +RTN +RTX Radboud Radford Radu -rAjHyXhTzz -rajpurkar -ramdisk RandAug RandAugment -randn RandomCrop RandomHorizontalFlip RandomResizedCrop RandomVerticalFlip Rault -rc -rcnn -readme -README +ReLU ReadmeBuild ReadmeFAQ ReadmeHtmlDocs ReadmeTests -readthedocs -realtime Realtime -rebase -recommonmark -RecordingObserver -recordio RecordIO -recurse +RecordingObserver Redmon ReduceMean -regex RegNet -rehm Rehm -reinstall -relase -relu +Releaser Relu -ReLU -repo -repo's -repo’s -repos -representating -requantize -resampled -resampling -rescale +ResNeSt +ResNeXt +ResNest +ResNet +ResNetV +ResNext Rescale ResencUNet -resize Resize ResizeCropImagenet -resized -Resizes ResizeWithRatio -resnest -ResNest -ResNeSt -resnet +Resizes Resnet -ResNet -resnetv -ResNetV -resnext -ResNext -ResNeXt -ressource -ressources -reStructuredText -ret +ResultMonitor RetinaMask -retinanet -retinaNet RetinaNet -reusability Rewon -rf -rfcn -rgb -RGB -rmax -rmin -RMSE -rn -rng -RNN -rnnt -ro -roberta RoBERTa RobertaModel RobertaTokenizer -ROC RocStories Romary -rosanneliu -rougeL -rougeLsum -rowanz -rowspan -RPN -RPNHead -RPNPostProcessor Rsqrt -rst -rtd -RTX -runhooks -runtime Runtime RuntimeError Rusia -rusiaaman Ruslan -rw -rwightman -sacremoses +SBSTD +SENet +SEP +SGD +SHA +SMBO +SMBOs +SOTA +SPIQ +SPR +SQuAD +SSDMobilenet +SSDSC +SSDSCKKB +STS +SUT +SageMaker Sagot Salakhutdinov -salesforce Salesforce Salimans -sanh Sanh -sata -SavedModel SavedModel Scalable -scaler -scatterFillKernel -sched -scikit -scm -screenshots ScriptModule -se -sed Seddah -seg -segm SegmentationMask -segmentations -seid -senet -SENet -sentencepiece +SelfKnowledgeDistillationLossConfig Sep -SEP SeqDataCollator -serializable +ServerAPP ServerPool -sess -setuptools -sexualized -SGD -sgd -sgmoid -SHA -sharded +SettingsPython Sharma Shen Shirish -shouldn -showEvent -shufflenet -Shufflenet ShuffleNet -shufflenetv +Shufflenet Shvets -sigmoid -signup -sigopt -Sigopt SigOpt +Sigopt SingleStream -skx Skylake -skylion -SMBO -SMBOs Smola -smoothes -sndfile +SmoothQuant +SoX Socher -socio SocketIO -softmax -somain Soricut -sota -SOTA -sox -SoX -spacings -spacy SpaCy SparseCategoricalAccuracy SparseCategoricalCrossentropy -sparsified +SparseLib Spearman -spearmanr -specificities -splitted -spm -spnasnet -sqlalchemy Sqrt -sqrt -sqSiUy Squad -SQuAD SquadF -squadpy SquadV SquaredDifference -squeezebert -squeezenet +SqueezeBERT SqueezeNet -src SrcTuple -sryqufw -ssd -SSDMobilenet -SSDSC -sshleifer -sst -stackoverflow +StableDiffusionPipeline Standley -startswith StartTest -stdout -stds -stefan -stemblock -stepsize +Startup Stoyanov -str -strided -struct -sts -STS -stsb -styleguide -Suárez -subexpression -subfolder -subfolders Subgraph -submodule -submodules Submodules -subsample -subtoken -sudo Sumanth -summarization Summarization SummaryWriter -superseeds -suported -sut -SUT +SuperBench +Supernet +SupportMatrix +Supprted Sutskever -sv -svg -swagaf -sym -symlink -symlinked -symlinks +Suyue +Suárez Symlinks -synset -sys SystemUnderTest -tanh -TaskXX -TaskXXX -tb +Szymon TBD -tbe -tbody -td -techdecoded -tencent -tensor's -tensorboard -tensorBoard -TensorBoard -tensorcore -TensorDataset -tensorflow -TensorFlow -TensorflowQuery -tensorImageSize -TensorInfo -TensorProto -teraoperations -tesla -testability -TestSettings -tf +TEQ TF TFBertForSequenceClassification -tfhub -tflite -tfp -tfrecord TFRecord TFRecordDataset -tfrecords TFRobertaModel TFSlimNetsFactory TFSlimNetsFactory's -tg -tgt -tgz -th THCudaTensor -thead -thepath -thres -thrs -Tian -Tidx -timeline -timestamps -TinyBERT -tl -tlkh -tLoss TLS -tmp -tmpfs -ToArray -ToBGR -toc -toctree TODO -tokenization -tokenize -tokenized -tokenizer -Tokenizer -tokenizers -Tokenizers -tokenizing -tol TOL -tolist -toml +TPE +TPU +TZ +TaskDB +TaskLauncher +TaskMonitor +TaskXX +TaskXXX +TensorBoard +TensorDataset +TensorFlow +TensorInfo +TensorProto +TensorRT +TensorflowQATModel +TensorflowQuery +TensorflowTopK +TensorrtExecutionProvider +TestSettings +Thalaiyasingam +Tian +Tidx +TimDettmers +TinyBERT +ToArray +ToBGR ToNDArray -toolchains ToPILImage -topk -TopK -topologies ToRange -torchaudio -torchscript +ToTensor +Tokenizer +Tokenizers +TopK +TorchDynamo TorchScript -torchtext -torchvision +TorchSmoothQuant TorchVision -toronto -totalizing -ToTensor +Torr Toutanova -tp -tpe -TPE -tpu -TPU -tqdm -traceback -trainings -trainval -trainvaltest -transfo -TransformImage TransfoXLModel TransfoXLTokenizer -travis -trigram -tstandley -tsv +TransformImage +Treebank TuneStrategy -tunings -tuningusage -tuple -tuples -txt -TZ -uber -ubuntu -ubyte +TuningCriterion UI UID -uint -uk -ultralytics -un -uncomment -uncompress -unet -Unet +UKERBljNxC UNet -unidecode -uniq -unittest -unref -unscale -unsqueeze -unstack -upenn -uploader -upscaled -Upscaled -upstreamed -url -userspace -usp -usr UTC -util -utils -valminusminival -valset -ValueError -Varshney +UmK +Unet +Upscaled VCVTNE VCVTNEPS VDPBF -vec -Veronika -veronikayurchuk -versioned -Veselin -vgg -viewpage -Villemonte -ViT -voc +VMware +VNNI VOC -VOCdevkit -VOCmAP VOCMApMetrics VOCRecord -voxel -voxels -vram +VOCdevkit +VOCmAP VRAM +VSCode VTune -waleedka +ValueError +Vanhoucke +Varshney +Vecchio +Veronika +Veselin +ViT +Villemonte +WARMUPS +WIDERFACE +WIP +WLYDCRB Wallach -wangg -warmup -wav -wd -webcam -Webcam -webite -webpage +Wasserblat +WeChat +WebSocket WebSockets WebText -wedam +Webcam +WeightOnlyLinear +WeightPruningConfig WeightSharedConvolutionalBoxPredictor Wformat -wget -whitelist -whl -WideResNet WideResNet Wightman -wikipedia -wikitext WikiText WilsonCity -WIP -WLYDCRB -wmt -wnd WnD -wnli Wnxu WordPiece -workdir -workflow Workflow -workflows -workspace -wrt -wwm -www -xad -xception -Xception -xchannel -xcode -xeon -Xeon -Xiang -Xiong -xl +XKeyboard XLA -xlm XLMModel XLMTokenizer -xlnet XLNet XLNetModel XLNetTokenizer -XlUH -xml -xnli XNLI -xsum -xV -xvf -xvzf XXXX -xxy -xxz -xYNrZdEAnrHk -xywh -xyxy -xz -xzvf -yacs -yaml -yamls +Xbyak +Xception +Xdiag +Xeon +Xiang +Xiao +Xiong +Xiuying +XlUH +YADWOFuj +YKd +YOLOV +YOLOv +YY +YagFgODM +Yan Yi Yiming Yinhan -yizhu -yjxiong -YKd Yoann -yolo -yolov -YOLOv -YOLOV -yosinski Yosinski YqgzY Yuanjun Yue Yunpeng Yurchuk -YY -zenodo -Zettlemoyer -zfnet +Yvinec ZFNet -zh -zhang +ZHShareTargetIDMore +Zafrir +ZeroPoint +Zettlemoyer Zhang -zhanghang Zhenzhong Zhi Zhilin Zhongyue -zhongyuezhang Zhu Zihang -zihangdai -znoexecstack -znow Zptls -zrelro -zrl -zxvf -CustomObj -ModelSize -QDQ -QLinearOps -qdq -qdqops -CodeGenerator -GEMM -SparseLib -Xbyak -brgemm -cfgs -gtests -hpp -hypotype -kd -ker -kern -sparsednn -spmm -xxxx -GraphModules -wsl -descs -gtest -IOMP -MALLOC -PRETAINED -SPR -libjemalloc -preload -thp -GCP -gcp -gif -solutionslibrary -geomean -VNNI -Preloading -DUC -duc -leftImg -roc -sklearn -CLA -cla -whitehat -codeofconduct -CYP -SBSTD -xd -samsum -IntelCaffe -baremetal -HWs -IC -KH -NVidia -OC -bolded -sparsification -tensorrt -hardwares -BenchmarkConf -PruningConf -Pruning's -DistillationConf -grey -ModelZoo -mzbert -CaffeNet -FlauBERT -GoogleNet -SqueezeBERT -iz -lvwerra -mBart -oje -za -zk -QIntegerops -QLinearops -criterions -HuBERT -csarron -gpb -howey -huawei -noah -nreimers -pruneofa -textattack -scheduler's -BiDAF -bidaf -FERPlus -ferplus -MixedPrecision -DUnetCNN -calibrationset -ndhwc -ArcFace +aa +aac +aae +aarch +abc +abcadf +abeja +abi +absl +absmax +abspath +abstractive +acc +accuracies +acdc +activations +actorder +adam +adaptor +adaptor's +addr +ade +adresses +ae +aea +af +ai +aia +aidevcloud +ailab +al +albert +aldk +alexnet +algo +algos +alibaba +aliblade +allenai +alloc +alsologtostderr +amazonaws +amazonlinux +amd +amerather +amodio +amongst +amsgrad +amx +analytics +andravin +andreamad +anisotropic +anno +anton +ap +apache +api +apis +approch +arXiv +arange arcface arcfaceresnet -nfolds -RFB -WIDERFACE -shuoyang -ultraface -XKeyboard -lscpu -qpa -vnni -xcb -DevCloud -PyPi -aidevcloud -awk -clx -devcloud -lAtr -nda -ppn -qstat -qsub -qsvr -ruserok -scp -spr -stderr -uXXXXX -QuantConf -SuperBench +archs +arg +argmax +argparse +args +arxiv +asd +astype +asym +async +aten +atrous +att +attr +attredirects +attrs +auc +aug +autgrad +autoEnabling autocast -kai -mailto -superbench -yao -Lecun -NLPToolkit -Yan -exdb -lecun -publis -yann -abcadf -bcb -INTRA -WARMUPS -ende -intra -inteltensorflow -AutoINC +autogenerate +autograd autoinc -CNWXA -ZHShareTargetIDMore -utm -youtube -zhihu -zhuanlan -AutoQuant -yottx -yrw -Changelog -CHANGELOG -codegen -feedstock -Galata -galata -Javascript -jestjs -jlpm -ui -jpserver -js -JupyterLab -jupyterlab -JupyterLab's -labextension -labextensions -NodeJS -NPM -npm -pkgs -PWDEBUG -pyproject -Releaser -releaser -sdist -ServerAPP -serverextension -serverIP -username -yarnpkg -BasicNAS -DyNAS -NASBase -NASConfig -Supernet -archs -dynas -evals -mbv -nas -nsga -ofa -pareto -pragma -supernet -Governers -OpenMP -cpufreq -governer -powersave -MarkDown -quantizations -NUMA -bc -cdb -deeac -eaf -IntelON -YagFgODM -eD -oQA -qq -weixin -AWS -Btg -Dbzg -GQm -IntelDevTools -LinkedIn -MSR -MyLearning +automixedprecisionmkl +autonumber +autopep +ava +avgloss +avx +awk +awq aws azuremarketplace bCO +backend +backends +backticks +baremetal +bart +barthez +bashrc +basicConfig +batchgenerators +batchnorm +bayesian +bazel +bb +bbbb +bbox +bboxes +bc +bcb +bccf +bce +bd +bdb +bdf +bdist +beba +benchmarked +benchmarking +berkeleyvision +bert +berts +bertsquad +bfc +bfloat +bicubic +bidaf +bigdl +bigscience +bilibili +bilinear +billsum +binarize +binarized +biomedical bitnami -dXY -deeplearning -iLit -linkedin -pdp -prodview -retweets -sharepoint -xmggbmga -yjyh -torchdynamo -SupportMatrix -BladeDISC -TorchDynamo -aliblade -automixedprecisionmkl -ofi -itex -extmanager -AMD -Graviton -TensorRT -aldk -cj -cwad -ec -gnq -scalable -twlt -xlarge -CompVis -Nezha -StableDiffusionPipeline -diffusers -grafftti -initializer -learnable -nezha -Supprted -mrm -qa -salti -spanbert -squadv -Alireza -MiniLM -Supprted -english -philschmid -sd -youtooz -DyNas -amd -openaccess -thecvf -weiaicunzai -Kullback -Leibler -ia -ieee -ieeexplore -modelsize -CCL +bitsandbytes +blendcnn +bleu +blocktime +blockwise +blogpost +bmm +bn +bnb +bninception +bobw +bolded +booktitle +bool +boolean +boxlist +br +brgemm +brighly +broadcasted +bs +bsnone +bugfix +buildin +builtin +bvlcalexnet +bzip +cadene +caffe +caffenet +cafferesnet +calib +calibrationcsv +calibrationset +camembert +canada +cb +cbica +ccdb +ccedc ccl -cwd -oneccl -setvars -Affine -ZeroPoint -ModelProto -graphDef -pythonic -Treebank +cd +cdb +cded +cdf +cdn +cdot +cdrdv +ce +ceba +cec +ceee ceil -brighly +centerNet +centernet +centos +cern +certfile +cfa +cff +cffi +cfg +cfgs +channelx +chatbot +checkbox +checkboxes +chmod +chongruo +chris christmas -dicoo -snowly -syncedreview -ww -EmbedLayerNormalization -FusedConv -GlobalAveragePool -LeakyRelu -ONNXRTAdaptor -QLinearOpsAdaptor -onnxrtadaptor -constfold -grappler -amsgrad -qoperator -apis -CPz -PostTrainingQuantConfig -Nsh -UmK -fe -vmware -PythonLauncher -keepachangelog -vscode -IntelNeuralCompressor -SettingsPython -VSCode -argparse -autoEnabling +ci +cifar +circleci +cityscapes +cityscapesScripts +cityscapesscripts +cj +cknowledge +ckpt +ckpts +cla +classDef +classDiagram +clcarwin +cli clickAuto clickEnable clickSetting +clipnorm +clm +cloudblogs +cls +clx +cly +cmake +cmd +cn +cnn +cnt +cocoApi +cocoapi +cocodataset +cocoraw +codalab +codebert +codecogs +codegen +codenamed +codeofconduct +codexglue +colorama +colspan +compat +compilervars +concat +cond +conda +condconv +conf +config +configs +confs connectSSH -enableHistory -historyDetail -itemName -leftIcon -outPut -settingPath -topRight -visualstudio -amodio +const +constexpr +constfold +contaning +contrib +conv +convertion +convolutional +cooldown +copt +coreml +cp +cpp +cpu +cpufreq +cpus +cpython +creafz +creatis +creativecommons +criteo +criterions +croping +crossvalidaton +crt +csarron +css +csv +ctrl +ctuning +ctx +cuda +cudaPopCallConfiguration +cudatoolkit +cudnn +curr +customised +custormer +cv +cwad +cwd +cxxopt +cypw +cython +dNative +dXY +da +dae +dailymail +darknet +dataLoader +datadir +datafile +dataloader +dataloaders +datapoints +dataset +datasets +datatype +datatypes +dathath +datset dbaeumer -dropdown -eslint -registerCommand -tsl -viewlet -PythonLauncher -BigDL -BigDLNanoSupport -Nano -bigdl -inferenceoptimizer -nano -SageMaker -bb -beba -ccdb -ceba +dbmdz +dbox +dbs +dcn +ddp +de +deberta +decapoda +deeac deeb -ebbce -efe -npmjs -AWSSageMakerSupport -sagemaker -xpu +deepcopy +deepengine +deeplab +deeplabv +deeplearning +deepset +denseblock +denselayer +densenet +deps +dequant +dequantization +dequantize +dequantized +desc +descs +dest +destructor +detections +detectron +dev +devcloud +devel +devtool +dfb dgpu -BenchmarkConfig -QuantizationAwareTrainingConfig -Startup +diag +dialogpt +dicoo +dicts +diffusers +dir +dirname +distil +distilbert +distilgpt +distillated +distillating +distilrobert +distilroberta +distro +distutils +dividiti +dl +dlabel +dlboost +dlrm +dmjx +dmlc +dnf +dnn +dnnl +doclist +docstrings +doctrings +docutils doesn -startup -Ajanthan -WeightPruningConfig -Namhoon -Thalaiyasingam -Torr -QOperator -MixedPrecisionConfig -mixedprecision -contrib -ONNXConfig -Arial -MatMulInteger -MatMulIntegerToFloat -QuantizeLinear -css -subgraph -AccuracyCriterion -AccuracyLoss -TuningCriterion -CHZ -IRQ -SSDSCKKB -aarch -Hanwen -Suyue -MLefficiency -cff -kawapanion -mlefficiency -DistillationConfig -SelfKnowledgeDistillationLossConfig -DistillationConfig -KnowledgeDistillationLossConfig -confs -TensorflowQATModel -HBM -Ponte -SmoothQuant -Vecchio -WeChat -QuaLA -Moshe -Ofir -Wasserblat -Zafrir -etection -parameterization -emsp -frequence -ema -ASPLOS -MSFT -NextPlatform -VMware -todo -gui -PostPostTrainingQuantConfig -MobilenetV -InceptionResnetV -zalandoresearch -emCgSTlJaAg -matsubara -yoshitomo -deepset -FAC -electra -parallelizes -IntermediateLayersKnowledgeDistillationLossConfig -distutils -glx -libgl -toolchain -bigscience -FQqOuW -HTnwXegLGNAtw -gemm -optypes -gloo -CPUExecutionProvider -CUDAExecutionProvider -TensorrtExecutionProvider -iterable -trt -LLMs -Xdiag -amongst -cdot -diag -mathbb -smoothquant -CVF -Edouard -Guangxuan -LLM -MSELoss -SPIQ -TorchSmoothQuant -Xiao -Xiuying -Yvinec -absmax -dequantization -dequantized +doteq +dowmsampling +downloader +downsampled +downsampling +doxygen +dpn +dpr dq +dropdown +ds +dscore +dst +dtype +dualpathnetworks +duc +dunet +dvdt +dw +dynamiccaly +dynas +eD +ead +eaf +earlystop +eb +ebbce +ec +ece +ecotrust +edgetpu +edu +ee +eer +ef +efb +efe +efficientNet +efficientnet +eg +eightbit +einstein +el +electra +elif +eltwise +emCgSTlJaAg +ema +emb +embeddings +embs +emsp +enableHistory +enablerepo +ende +enfr +eng +english +ensembling +ensp +entrypoint +enum +env +environ +ep +eps +eq +erf +eriklindernoren +eslint +esri +et +etection +eval +evals +evaluator +evel +exdb +exemplarily +exising +existing +exmaple +expanduser +extensibility +extmanager +extractive f'scale +fPIC +fabian +facebook +facebookresearch +fafdcd +fairseq +fallbacks +fanout +faq +fastrcnn +fatihcakirs +favourably +fb +fba +fbgemm +fbnetc +fbresnet +fc +fcn +fd +fdb +fdbf +fe +feedbacks +feedstock +ferplus +ffc +filename +filenames +filepath +filesystem +finbert +finetune +finetuned +finetuning +flac +flaubert +flavour +flavours +floatfunctional +fmfn +fmt +fmtstr +fn +fname +fns +foregound +fp +fpic +fpn frac -groupnorm -hardtanh -instancenorm -leakyrelu -llamanorm -nbias -pc -cdrdv -NeuralCompressor -zp -TensorflowTopK -mpirun -FUNSD -LayoutLMv -layoutlmv +frequence +frontend +fstack +ftfy +fullres +func +functionalities +functionet +functools funsd -layoutlmft -layoutlm -LayoutLM -nielsr -HYPJUDY -amerather -efb -netflix -DeBERTa +fvcore +fw +fx +gRPC +galata +gcc +gchhablani +gclient +gcp +gd +geffnet +gelu +gemm +geomean +german +germeval +gestaltit +getitem +getsize +gh +gid +gif +github +githubusercontent +gitmodules +gloo +gluebenchmark +gluepy +gluon +gluoncv +gluonnlp +glx +gn +gnq +goldsborough +goog +google +googleapis +googleblog +googlenet +googlesource +governer +gpb +gpg +gpt +gptq +gpu +gpus +grafftti +graphDef +graphdef +grappler +grey +groupnorm +grpc +gtFile +gtFine +gtest +gtests +gui +gz +gzY +haibinlin +hangzhang +hardcoding +hardswish +hardtanh +hardwares +hawq +heatmaps +helloworld +henson +hiddenlayer +hippocampus +historyDetail +hlu +horovod +horovodrun +hostfile +hostname +howey +howpublished +hpo +hpp +href +html +htmlLabels +http +https +huawei +hubert +huggingface +hujie +hvd +hyp +hyperparameter +hyperparameters +hypotype +iLit +iOS +ia +ibean +icc +icpc +icx +ide +idx +ie +ieee +ieeexplore +ilsvrc +im +imagecocodataset +imagenet +imagesTr +imagesTs +img +imgrec +imgs +imgx +impl +inceptionresnetv +inceptionv +incollection +indexValue +indices +indico +inferenceoptimizer +inferencer +informations +infos +init +initializer +innersource +inp +inplace +inproceedings +inputcsv +insa +instancenorm +instanceonly +instantiation +integerops +intel +intelai +inteltensorflow +interoperability +intra +intrinsics +introudces +ints +inturn +io +ios +iou +ip +ipc +ipex +ipynb +ipynbrun +ipython +ir +irv +isinstance +issuecomment +itemName +itemStyle +iter +iterable +iters +itex +iz +jJA +japanese +jemalloc +jestjs +jim +jit +jitter +jlpm +jpeg +jpg +jpserver +jpwarren +js +json +jsonl +jsons +jupyter +jupyterlab +kaggle +kaggleAdDisplayChallenge +kai +kaiming +kawapanion +kcho +kd +keepachangelog +keepbs +keepdim +ker +keras +kern +keyfile +keypoint +kimiyoung +kitti +kmp +kriz +kwargs +lAtr +lS +labelsTr +labextension +labextensions +lang +latencies +layernorm +layoutlm +layoutlmft +layoutlmv +ld +leakyrelu +learnable +lecun +leftIcon +leftImg +len +lf +lfaidata +lfs +li +liKE +libGL +libdeep +libengine +libffi +libgl +libglib +libiomp +libjemalloc +libmlperf +librispeech +librosa +libsndfile +libstdc +libz +licence +lin +linkedin +linkopt +linoxide +linux +linuxfoundation +llamanorm +llms +llvmlite +lm +ln +loadgen +localdisk +localhost +logdir +logfile +login +logits +logtostderr +longformer +lossy +lowproposals +lowres +lpot +lr +lscpu +lt +luYBWA +lua +lutzroeder +lvwerra +lyon +lzma +mAP +mAp +mBERT +mBart +mIOU +mIoU +macOS +mailto +mainpage +manylinux +marianmt +maskrcnn +maskrnn +massa +matcher +mathbb +matmul +matplotlib +matricses +matsubara +maxSizeInComplete +maxdepth +maxindrange +maxk +mbart +mbv +mcc +mcordts +md +measurer +medicaldecathlon +meetup +mem +membind +mems +messi +metabuild +metadata +metamind +microsoft +miguelgrinberg +minibatch +minilm +minimalistic +minival +minloglevel +minmax +mins +misalignments +miscs +missmatches +mixedprecision +mixnet +mixup +mkdir +mkl +mlap +mlas +mlcommons +mlefficiency +mll +mlm +mlp +mlpc +mlperf +mlt +mmdetection +mmlab +mnasnet +mnist +mnli +mobilebert +mobilenet +mobilenetv +modalities +modality +modelfeatures +modelforward +modelinput +modellogits +modelmean +modelsize +modelstd +mose +mpi +mpirun +mrcnn +mrm +mrpc +mse +msg +msvc +mul +mult +multi +multiclass +multilabel +multinli +multiscale +mutli +mv +mx +mxnet +myModel +mzbert +nSsKchNAySU +namedtuple +nano +nanohanno +nas +nasnet +nasnetalarge +nasnetamobile +nb +nbest +nbias +nbsp +nc +nchw +nd +nda +ndarray +nderlu +ndhwc +neox +nepoch +ner +nervanasystems +nesterov +netflix +netron +networkbuilders +neval +newstest +nextplatform +nezha +nf +nfolds +ng +ngatang +ngram +ni +nielsr +nifti +niftis +nii +nl +nli +nll +nlp +nlpyang +nltk +nm +nms +nn +nnU +nnUNet +nnUNetPlansv +nnUNetTrainer +nnUNetTrainerV +nnUNetTrainers +nnUnet +nnodes +nnu +nnunet +noah +noduplicates +noobj +np +npm +npmjs +nproc +npy +npz +nq +nreimers +nrix +ns +nsample +nsamples +nsdf +nsga +nthreads +ntrain +num +numCompleteThreads +numTest +numTraining +numactl +numba +numerics +numpy +nvcc +nvidia +nvme +nw +nyu +oQA +oaas +oc +ofa +ofi +oje +ok +ol +onboarding +oneAPI +oneDNN +oneapi +oneccl +onednn +oneshot +onlinedocs +onnx +onnxrt +onnxrtadaptor +onnxruntime +oob +openaccess +openai +opencv +openmp +openslr +opensource +openssl +openvino +openvinotoolkit +opset +opsetid +optim +optimizations +optimizers +optype +optypes +optypewise +opwise +os +osJJ +oup +outPut +outdir +outliers +outputfile +ov +overfeat +overfit +overfitted +pQ +pageId +palletsprojects +panoptic +paperswithcode +parallelize +parallelizes +parallelizing +param +parameterization +parametrization +params +pareto +participations +pastebin +patientIDs +pb +pbar +pc +pdf +pdp +pegasus +pelee +peleenet +pepy +percdamp +perchannel +perf +perftests +philschmid +phrasebank +phy +physcpubind +pixAcc +pjreddie +pkgs +pkill +pkl +pky +plm +pls +pnasnet +png +polynet +pos +postprocesing +postprocess +postprocessed +postprocessing +powersave +pplm +ppn +pragma +pre +prebuild +prebuilt +precisions +pred +preds +preformance +preload +preprint +preprocess +preprocessed +preprocesses +preprocessing +preprocessor +prerelease +pretrain +pretrained +pretrainedmodels +pretraining +prev +prioritizies +probs +proc +productizing +prodview +profilings +proto +protoc +protractortest +pruneofa +pth +ptq +ptr +publis +pudae +pw +pwd +px +py +pybind +pycocotools +pyguide +pylint +pymodule +pymoo +pypi +pyproject +pytest +pythonic +pytorch +pytorchic +pyyaml +qa +qat +qconfig +qdq +qdqops +qint +qlinear +qlinearops +qnli +qoperator +qpa +qps +qq +qqp +qscheme +qsl +qstat +qsub +qsvr +qtcreator +qtype +quant +quantile +quantizable +quantization +quantizations +quantize +quantized +quantizer +quantizes +quantizing +quickstart +qweight +rAjHyXhTzz +rajpurkar +ramdisk +randn +rc +rcnn +reStructuredText +readme +readthedocs +realtime +rebase +recommonmark +recordio +recurse +regex +registerCommand +rehm +reinstall +relase +releaser +relu +repo +repo's +repos +repo’s +representating +requantize +resampled +resampling +rescale +resize +resized +resnest +resnet +resnetv +resnext +ressource +ressources +ret +retinaNet +retinanet +retweets +reusability +rf +rfcn +rgb +rmax +rmin +rn +rng +rnnt +ro +roberta +roc +rosanneliu +rougeL +rougeLsum +rowanz +rowspan +rst +rtd +rtn +runhooks +runtime +ruserok +rusiaaman +rw +rwightman +sacremoses +sagemaker +salesforce +salti +samsum +sanh +sata +scalable +scaler +scatterFillKernel +sched +scheduler's +scikit +scm +scp +screenshots +sd +sdist +se +sed +seg +segm +segmentations +seid +senet +sentencepiece +sequenceDiagram +serializable +serverIP +serverextension +sess +settingPath +setuptools +setvars +sexualized +sgd +sgmoid +sharded +sharepoint +shouldn +showEvent +shufflenet +shufflenetv +shuoyang +sigmoid +signup +sigopt +sklearn +skx +skylion +smoothes +smoothquant +sndfile +snowly +socio +softmax +solutionslibrary +somain +sota +sox +spacings +spacy +spanbert +sparsednn +sparsification +sparsified +spearmanr +specificities +splitted +spm +spmm +spnasnet +spr +sqSiUy +sqlalchemy +sqrt +squadpy +squadv +squeezebert +squeezenet +src +sryqufw +ssd +sshleifer +sst +stackoverflow +startswith +startup +stderr +stdout +stds +stefan +stemblock +stepsize +str +strided +struct +sts +stsb +styleguide +subexpression +subfolder +subfolders +subgraph +subgraphStyle +submodule +submodules +subprocesses +subsample +subtoken +sudo +summarization +superbench +supernet +superseeds +suported +sut +sv +svg +swagaf +sym +symlink +symlinked +symlinks +syncedreview +synset +sys +tLoss +tanh +tb +tbe +tbody +td +techdecoded +tencent +tensor's +tensorBoard +tensorImageSize +tensorboard +tensorcore +tensorflow +tensorrt +teq +teraoperations +tesla +testability +textattack +tf +tfhub +tflite +tfp +tfrecord +tfrecords +tg +tgt +tgz +th +thead +thecvf +thepath +thp +thres +thrs +timeline +timestamps +tl +tlkh +tmp +tmpfs +toc +toctree +todo +tokenization +tokenize +tokenized +tokenizer +tokenizers +tokenizing +tol +tolist +toml +toolchain +toolchains +topRight +topk +topologies +torchaudio +torchdynamo +torchscript +torchtext +torchvision +toronto +totalizing +tp +tpe +tpu +tqdm +traceback +trainings +trainval +trainvaltest +transfo +travis +trigram +trt +tsl +tstandley +tsv +tunings +tuningusage +tuple +tuples +twlt +txt +uXXXXX +uber +ubuntu +ubyte +ui +uint +uk +ultraface +ultralytics +un +uncomment +uncompress +unet +unidecode unilm -aten -hardswish -gRPC -msg -OaaS -RESTful -parallelizing -cdf -fafdcd -WebSocket -ip -Extensibility -ResultMonitor -TaskDB -TaskLauncher -TaskMonitor -classDiagram -extensibility -oaas -parallelize -sequenceDiagram -wip -adaptor's -autonumber -ceee -htmlLabels -subgraphStyle -itemStyle -NewDataloader -subprocesses -LayoutLM -bfc -cb -CCE -CCFF -FFFFFF -classDef -bdf -bmm -AWQ -awq -GPTQ -gptq -RTN -rtn -gptq -percdamp -Frantar -Ji -mose -DeQuantize -FakeQuant -FrameworkModel -boolean -deepcopy -optype -perchannel -LokuUdeVg -Ntsk -PLg -UKERBljNxC -YADWOFuj -cloudblogs -dmjx -fdb -jJA +uniq +unittest +unref +unscale +unsqueeze +unstack +upenn +uploader +upscaled +upstreamed +url +username +userspace +usp +usr +util +utils +utm +valminusminival +valset +vec +veronikayurchuk +versioned +vgg +viewlet +viewpage +visualstudio +vmware +vnni +voc +voxel +voxels +vram +vscode wWLes +waleedka +wangg +warmup +wav +wd +webcam +webite +webpage +wedam +weiaicunzai +weixin +wget +whitehat +whitelist +whl +wikipedia +wikitext +wip +wmt +wnd +wnli +workdir +workflow +workflows +workspace +wrt +wsl +ww +wwm +www xHKe -PR -Chatbot -chatbot -fba -hostname -qweight -qconfig -TEQ -teq -WeightOnlyLinear -McKinstry -Migacz -MinMax -Szymon -Vanhoucke -ONNXCommunityMeetup -luYBWA -pQ -CodeXGLUE -codebert -codexglue -jsonl +xV +xYNrZdEAnrHk +xad +xcb +xception +xchannel +xcode +xd +xeon xgb xgboost -hpo -HPO -arange -nf -Dettmers -Qlora -llms -NormalFloat -QLoRA -TimDettmers -bitsandbytes -bnb -ccedc \ No newline at end of file +xl +xlarge +xlm +xlnet +xmggbmga +xml +xnli +xpu +xsum +xvf +xvzf +xxxx +xxy +xxz +xywh +xyxy +xz +xzvf +yacs +yaml +yamls +yann +yao +yarnpkg +yizhu +yjxiong +yjyh +yolo +yolov +yoshitomo +yosinski +yottx +youtooz +youtube +yrw +za +zalandoresearch +zenodo +zfnet +zh +zhang +zhanghang +zhihu +zhongyuezhang +zhuanlan +zihangdai +zk +znoexecstack +znow +zp +zrelro +zrl +zxvf +Éric diff --git a/.azure-pipelines/scripts/models/collect_log_all.py b/.azure-pipelines/scripts/models/collect_log_all.py index 85d47cf89e9..45c297af401 100644 --- a/.azure-pipelines/scripts/models/collect_log_all.py +++ b/.azure-pipelines/scripts/models/collect_log_all.py @@ -1,5 +1,6 @@ import argparse import os + import requests parser = argparse.ArgumentParser(allow_abbrev=False) @@ -12,34 +13,34 @@ def main(): file_dir = args.logs_dir - summary_content = ['OS;Platform;Framework;Version;Precision;Model;Mode;Type;BS;Value;Url\n'] - tuning_info_content = ['OS;Platform;Framework;Version;Model;Strategy;Tune_time\n'] + summary_content = ["OS;Platform;Framework;Version;Precision;Model;Mode;Type;BS;Value;Url\n"] + tuning_info_content = ["OS;Platform;Framework;Version;Model;Strategy;Tune_time\n"] url_dict = parse_download_url() # get full path of all files for root, dirs, files in os.walk(file_dir): for name in files: file_name = os.path.join(root, name) print(file_name) - if '_summary.log' in name: + if "_summary.log" in name: for line in open(file_name, "r"): - if 'linux' in line: + if "linux" in line: line = line.replace("", parse_summary_log(line, url_dict)) summary_content.append(line) - if '_tuning_info.log' in name: + if "_tuning_info.log" in name: for line in open(file_name, "r"): - if 'linux' in line: + if "linux" in line: line = line.replace("", parse_tuning_log(line, url_dict)) tuning_info_content.append(line) - f = open(args.output_dir + '/summary.log', "a") + f = open(args.output_dir + "/summary.log", "a") for summary in summary_content: f.writelines(str(summary)) - f2 = open(args.output_dir + '/tuning_info.log', "a") + f2 = open(args.output_dir + "/tuning_info.log", "a") for tuning_info in tuning_info_content: f2.writelines(str(tuning_info)) def parse_tuning_log(line, url_dict): - """Parsing {Framework}-{Model}-tune.log to get tuning result""" + """Parsing {Framework}-{Model}-tune.log to get tuning result.""" result = line.split(";") OS, Platform, Framework, Version, Model, Strategy, Tune_time, Tuning_trials, URL, __ = result file_name = f"{Framework}-{Model}-tune.log" @@ -49,7 +50,7 @@ def parse_tuning_log(line, url_dict): def parse_summary_log(line, url_dict): - """Parse {Framework}-{Model}-tune.log to get benchmarking accuracy result""" + """Parse {Framework}-{Model}-tune.log to get benchmarking accuracy result.""" result = line.split(";") OS, Platform, Framework, Version, Precision, Model, Mode, Type, BS, Value, Url = result file_name = f"{Framework}-{Model}-tune.log" @@ -59,8 +60,10 @@ def parse_summary_log(line, url_dict): def parse_download_url(): - """Get azure artifact information""" - azure_artifact_api_url = f'https://dev.azure.com/lpot-inc/neural-compressor/_apis/build/builds/{args.build_id}/artifacts?api-version=5.1' + """Get azure artifact information.""" + azure_artifact_api_url = ( + f"https://dev.azure.com/lpot-inc/neural-compressor/_apis/build/builds/{args.build_id}/artifacts?api-version=5.1" + ) azure_artifacts_data = dict(requests.get(azure_artifact_api_url).json().items()) artifact_count = azure_artifacts_data.get("count") artifact_value = azure_artifacts_data.get("value") @@ -72,5 +75,5 @@ def parse_download_url(): return url_dict -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index b219bfb3624..224911a3446 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -12,10 +12,14 @@ parser.add_argument("--stage", type=str, default="collect_log") parser.add_argument("--gap", type=float, default=0.05) args = parser.parse_args() -print('====== collecting model test log =======') -OS = 'linux' -PLATFORM = 'icx' -URL = 'https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId='+args.build_id+'&view=artifacts&pathAsName=false&type=publishedArtifacts' +print("====== collecting model test log =======") +OS = "linux" +PLATFORM = "icx" +URL = ( + "https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId=" + + args.build_id + + "&view=artifacts&pathAsName=false&type=publishedArtifacts" +) OOB_MODEL_LIST = ["darknet19", "densenet-121", "resnet-101"] @@ -23,8 +27,8 @@ def get_model_tuning_dict_results(): tuning_result_dict = {} if os.path.exists(tuning_log): - print('tuning log found') - tmp = {'fp32_acc': 0, 'int8_acc': 0, 'tuning_trials': 0} + print("tuning log found") + tmp = {"fp32_acc": 0, "int8_acc": 0, "tuning_trials": 0} with open(tuning_log, "r") as f: for line in f: parse_tuning_line(line, tmp) @@ -36,11 +40,11 @@ def get_model_tuning_dict_results(): "Framework": args.framework, "Version": args.fwk_ver, "Model": args.model, - "Strategy": tmp.get('strategy', 'basic'), - "Tune_time": tmp.get('tune_time'), + "Strategy": tmp.get("strategy", "basic"), + "Tune_time": tmp.get("tune_time"), } benchmark_accuracy_result_dict = { - 'int8': { + "int8": { "OS": OS, "Platform": PLATFORM, "Framework": args.framework, @@ -49,10 +53,10 @@ def get_model_tuning_dict_results(): "Mode": "Inference", "Type": "Accuracy", "BS": 1, - "Value": tmp.get('int8_acc'), + "Value": tmp.get("int8_acc"), "Url": URL, }, - 'fp32': { + "fp32": { "OS": OS, "Platform": PLATFORM, "Framework": args.framework, @@ -61,9 +65,9 @@ def get_model_tuning_dict_results(): "Mode": "Inference", "Type": "Accuracy", "BS": 1, - "Value": tmp.get('fp32_acc'), + "Value": tmp.get("fp32_acc"), "Url": URL, - } + }, } return tuning_result_dict, benchmark_accuracy_result_dict @@ -114,7 +118,9 @@ def get_refer_data(): for value in values: precision = value[keys.index("Precision")] Type = value[keys.index("Type")] - result[f"{precision}_{Type}"] = float(value[keys.index("Value")]) if value[keys.index("Value")]!="unknown" else "unknown" + result[f"{precision}_{Type}"] = ( + float(value[keys.index("Value")]) if value[keys.index("Value")] != "unknown" else "unknown" + ) return result else: print(f"refer log file: {refer_log} not found") @@ -127,46 +133,71 @@ def collect_log(): print(f"tuning log dir is {tuning_log}") # get model tuning results if os.path.exists(tuning_log): - print('tuning log found') - tmp = {'fp32_acc': 0, 'int8_acc': 0, 'tuning_trials': 0} + print("tuning log found") + tmp = {"fp32_acc": 0, "int8_acc": 0, "tuning_trials": 0} with open(tuning_log, "r") as f: for line in f: parse_tuning_line(line, tmp) print(tmp) # oob_model no need acc - if ((args.model in OOB_MODEL_LIST) and args.framework == "tensorflow"): - tmp['fp32_acc'], tmp['int8_acc'] = "unknown", "unknown" - - results.append('{};{};{};{};FP32;{};Inference;Accuracy;1;{};{}\n'.format( - OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp['fp32_acc'], "")) - results.append('{};{};{};{};INT8;{};Inference;Accuracy;1;{};{}\n'.format( - OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp['int8_acc'], "")) - tuning_infos.append(';'.join([OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp.get('strategy', 'basic'), str( - tmp['tune_time']), str(tmp['tuning_trials']), "", f"{round(tmp['max_mem_size'] / tmp['total_mem_size'] * 100, 4)}%"])+'\n') + if (args.model in OOB_MODEL_LIST) and args.framework == "tensorflow": + tmp["fp32_acc"], tmp["int8_acc"] = "unknown", "unknown" + + results.append( + "{};{};{};{};FP32;{};Inference;Accuracy;1;{};{}\n".format( + OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp["fp32_acc"], "" + ) + ) + results.append( + "{};{};{};{};INT8;{};Inference;Accuracy;1;{};{}\n".format( + OS, PLATFORM, args.framework, args.fwk_ver, args.model, tmp["int8_acc"], "" + ) + ) + tuning_infos.append( + ";".join( + [ + OS, + PLATFORM, + args.framework, + args.fwk_ver, + args.model, + tmp.get("strategy", "basic"), + str(tmp["tune_time"]), + str(tmp["tuning_trials"]), + "", + f"{round(tmp['max_mem_size'] / tmp['total_mem_size'] * 100, 4)}%", + ] + ) + + "\n" + ) # get model benchmark results - for precision in ['int8', 'fp32']: + for precision in ["int8", "fp32"]: throughput = 0.0 bs = 1 for root, dirs, files in os.walk(args.logs_dir): for name in files: file_name = os.path.join(root, name) print(file_name) - if 'performance-'+precision in name: + if "performance-" + precision in name: for line in open(file_name, "r"): - result= parse_perf_line(line) + result = parse_perf_line(line) if result.get("throughput"): throughput += result.get("throughput") if result.get("batch_size"): bs = result.get("batch_size") - results.append('{};{};{};{};{};{};Inference;Performance;{};{};{}\n'.format(OS, PLATFORM, args.framework, args.fwk_ver, precision.upper(), args.model, bs, throughput, URL)) + results.append( + "{};{};{};{};{};{};Inference;Performance;{};{};{}\n".format( + OS, PLATFORM, args.framework, args.fwk_ver, precision.upper(), args.model, bs, throughput, URL + ) + ) # write model logs - f = open(args.output_dir+'/'+args.framework+'_'+args.model+'_summary.log', "a") + f = open(args.output_dir + "/" + args.framework + "_" + args.model + "_summary.log", "a") f.writelines("OS;Platform;Framework;Version;Precision;Model;Mode;Type;BS;Value;Url\n") for result in results: f.writelines(str(result)) - f2 = open(args.output_dir + '/'+args.framework+'_'+args.model+'_tuning_info.log', "a") + f2 = open(args.output_dir + "/" + args.framework + "_" + args.model + "_tuning_info.log", "a") f2.writelines("OS;Platform;Framework;Version;Model;Strategy;Tune_time\n") for tuning_info in tuning_infos: f2.writelines(str(tuning_info)) @@ -175,40 +206,43 @@ def collect_log(): def parse_tuning_line(line, tmp): tuning_strategy = re.search(r"Tuning strategy:\s+([A-Za-z]+)", line) if tuning_strategy and tuning_strategy.group(1): - tmp['strategy'] = tuning_strategy.group(1) + tmp["strategy"] = tuning_strategy.group(1) - baseline_acc = re.search(r"FP32 baseline is:\s+\[Accuracy:\s(\d+(\.\d+)?), Duration \(seconds\):\s*(\d+(\.\d+)?)\]", - line) + baseline_acc = re.search( + r"FP32 baseline is:\s+\[Accuracy:\s(\d+(\.\d+)?), Duration \(seconds\):\s*(\d+(\.\d+)?)\]", line + ) if baseline_acc and baseline_acc.group(1): - tmp['fp32_acc'] = float(baseline_acc.group(1)) + tmp["fp32_acc"] = float(baseline_acc.group(1)) - tuned_acc = re.search(r"Best tune result is:\s+\[Accuracy:\s(\d+(\.\d+)?), Duration \(seconds\):\s(\d+(\.\d+)?)\]", line) + tuned_acc = re.search( + r"Best tune result is:\s+\[Accuracy:\s(\d+(\.\d+)?), Duration \(seconds\):\s(\d+(\.\d+)?)\]", line + ) if tuned_acc and tuned_acc.group(1): - tmp['int8_acc'] = float(tuned_acc.group(1)) + tmp["int8_acc"] = float(tuned_acc.group(1)) tune_trial = re.search(r"Tune \d*\s*result is:", line) if tune_trial: - tmp['tuning_trials'] += 1 + tmp["tuning_trials"] += 1 tune_time = re.search(r"Tuning time spend:\s+(\d+(\.\d+)?)s", line) if tune_time and tune_time.group(1): - tmp['tune_time'] = int(tune_time.group(1)) + tmp["tune_time"] = int(tune_time.group(1)) fp32_model_size = re.search(r"The input model size is:\s+(\d+(\.\d+)?)", line) if fp32_model_size and fp32_model_size.group(1): - tmp['fp32_model_size'] = int(fp32_model_size.group(1)) + tmp["fp32_model_size"] = int(fp32_model_size.group(1)) int8_model_size = re.search(r"The output model size is:\s+(\d+(\.\d+)?)", line) if int8_model_size and int8_model_size.group(1): - tmp['int8_model_size'] = int(int8_model_size.group(1)) + tmp["int8_model_size"] = int(int8_model_size.group(1)) total_mem_size = re.search(r"Total resident size\D*([0-9]+)", line) if total_mem_size and total_mem_size.group(1): - tmp['total_mem_size'] = float(total_mem_size.group(1)) + tmp["total_mem_size"] = float(total_mem_size.group(1)) max_mem_size = re.search(r"Maximum resident set size\D*([0-9]+)", line) if max_mem_size and max_mem_size.group(1): - tmp['max_mem_size'] = float(max_mem_size.group(1)) + tmp["max_mem_size"] = float(max_mem_size.group(1)) def parse_perf_line(line): @@ -240,7 +274,7 @@ def check_status(precision, precision_upper, check_accuracy=False): assert abs(current_accuracy - refer_accuracy) <= 0.001 -if __name__ == '__main__': +if __name__ == "__main__": tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log") refer = get_refer_data() diff --git a/.azure-pipelines/scripts/models/new_benchmark.py b/.azure-pipelines/scripts/models/new_benchmark.py index daae8f0d415..b96eba16b28 100644 --- a/.azure-pipelines/scripts/models/new_benchmark.py +++ b/.azure-pipelines/scripts/models/new_benchmark.py @@ -33,7 +33,7 @@ def get_architecture(): p2 = subprocess.Popen(["grep", "Architecture"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = None - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res = line.decode("utf-8").strip() return res @@ -44,7 +44,7 @@ def get_threads_per_core(): p2 = subprocess.Popen(["grep", "Thread(s) per core"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = None - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res = line.decode("utf-8").strip() return res @@ -55,7 +55,7 @@ def get_threads(): p2 = subprocess.Popen(["grep", "processor"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res @@ -66,7 +66,7 @@ def get_physical_ids(): p2 = subprocess.Popen(["grep", "physical id"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res @@ -77,7 +77,7 @@ def get_core_ids(): p2 = subprocess.Popen(["grep", "core id"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res @@ -98,13 +98,13 @@ def config_instance(cores_per_instance, num_of_instance): """Configure the multi-instance commands and trigger benchmark with sub process.""" core = [] - if (get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1): - raise OSError('Currently no support on AMD with hyperthreads') + if get_architecture() == "aarch64" and int(get_threads_per_core()) > 1: + raise OSError("Currently no support on AMD with hyperthreads") else: bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids()) for i in range(0, num_of_instance): - if get_architecture() == 'x86_64': + if get_architecture() == "x86_64": core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance core_list = np.array(bounded_threads)[core_list_idx] else: @@ -113,9 +113,9 @@ def config_instance(cores_per_instance, num_of_instance): for i in range(len(core)): core[i] = [str(j) for j in core[i]] - core[i] = ','.join(core[i]) + core[i] = ",".join(core[i]) - core = ';'.join(core) + core = ";".join(core) return core diff --git a/.azure-pipelines/scripts/models/update_yaml_config.py b/.azure-pipelines/scripts/models/update_yaml_config.py index c4af9b3c083..b594bd2c7f5 100644 --- a/.azure-pipelines/scripts/models/update_yaml_config.py +++ b/.azure-pipelines/scripts/models/update_yaml_config.py @@ -1,10 +1,10 @@ import argparse -import re import os -import psutil +import platform +import re from typing import Optional, Union -import platform +import psutil system = platform.system() try: @@ -39,19 +39,19 @@ def update_yaml_dataset(yaml, framework, dataset_location): patterns = { "root_path": { - "pattern": r'root:.*/path/to/(calibration|evaluation)/dataset/?', + "pattern": r"root:.*/path/to/(calibration|evaluation)/dataset/?", "replacement": f"root: {dataset_location}", }, "data_path": { - "pattern": r'data_path:.*/path/to/(calibration|evaluation)/dataset/?', + "pattern": r"data_path:.*/path/to/(calibration|evaluation)/dataset/?", "replacement": f"data_path: {dataset_location}", }, "image_list": { - "pattern": r'image_list:.*/path/to/(calibration|evaluation)/label/?', + "pattern": r"image_list:.*/path/to/(calibration|evaluation)/label/?", "replacement": f"image_list: {val_txt_location}", }, "data_dir": { - "pattern": r'data_dir:.*/path/to/dataset/?', + "pattern": r"data_dir:.*/path/to/dataset/?", "replacement": f"data_dir: {dataset_location}", }, } @@ -69,11 +69,11 @@ def update_yaml_dataset(yaml, framework, dataset_location): train_dataset = dataset_location + f"{os.path.sep}" + "train" patterns = { "calibration_dataset": { - "pattern": r'root:.*/path/to/calibration/dataset/?', + "pattern": r"root:.*/path/to/calibration/dataset/?", "replacement": f"root: {train_dataset}", }, "evaluation_dataset": { - "pattern": r'root:.*/path/to/evaluation/dataset/?', + "pattern": r"root:.*/path/to/evaluation/dataset/?", "replacement": f"root: {val_dataset}", }, } @@ -88,9 +88,20 @@ def update_yaml_dataset(yaml, framework, dataset_location): config.write(line) -def update_yaml_config_tuning(yaml_file, strategy = None, mode = None, batch_size = None, iteration = None, - max_trials = None, algorithm = None, timeout = None, strategy_token = None, - sampling_size = None, dtype = None, tf_new_api = None): +def update_yaml_config_tuning( + yaml_file, + strategy=None, + mode=None, + batch_size=None, + iteration=None, + max_trials=None, + algorithm=None, + timeout=None, + strategy_token=None, + sampling_size=None, + dtype=None, + tf_new_api=None, +): with open(yaml_file) as f: yaml_config = yaml.round_trip_load(f, preserve_quotes=True) @@ -124,11 +135,13 @@ def update_yaml_config_tuning(yaml_file, strategy = None, mode = None, batch_siz strategy_name = prev_strategy.get("name", None) prev_strategy.update({"name": strategy}) if strategy == "sigopt": - prev_strategy.update({ - "sigopt_api_token": strategy_token, - "sigopt_project_id": "lpot", - "sigopt_experiment_name": "lpot-tune", - }) + prev_strategy.update( + { + "sigopt_api_token": strategy_token, + "sigopt_project_id": "lpot", + "sigopt_experiment_name": "lpot-tune", + } + ) if strategy == "hawq": prev_strategy.update({"loss": "CrossEntropyLoss"}) print(f"Changed {strategy_name} to {strategy}") @@ -140,9 +153,7 @@ def update_yaml_config_tuning(yaml_file, strategy = None, mode = None, batch_siz tuning_config = yaml_config.get("tuning", {}) prev_exit_policy = tuning_config.get("exit_policy", {}) if not prev_exit_policy: - tuning_config.update({"exit_policy": { - "max_trials": max_trials - }}) + tuning_config.update({"exit_policy": {"max_trials": max_trials}}) else: prev_max_trials = prev_exit_policy.get("max_trials", None) prev_exit_policy.update({"max_trials": max_trials}) @@ -150,7 +161,7 @@ def update_yaml_config_tuning(yaml_file, strategy = None, mode = None, batch_siz except Exception as e: print(f"[ WARNING ] {e}") - if mode == 'accuracy': + if mode == "accuracy": try: # delete performance part in yaml if exist performance = yaml_config.get("evaluation", {}).get("performance", {}) @@ -183,7 +194,7 @@ def update_yaml_config_tuning(yaml_file, strategy = None, mode = None, batch_siz except Exception as e: print(f"[ WARNING ] {e}") - if batch_size and mode == 'latency': + if batch_size and mode == "latency": try: dataloader = yaml_config.get("evaluation", {}).get("performance", {}).get("dataloader", {}) prev_batch_size = dataloader.get("batch_size", None) @@ -222,39 +233,39 @@ def update_yaml_config_tuning(yaml_file, strategy = None, mode = None, batch_siz except Exception as e: print(f"[ WARNING ] {e}") - print(f"====== update_yaml_config_tuning ========") + print("====== update_yaml_config_tuning ========") yaml_content = yaml.round_trip_dump(yaml_config) - with open(yaml_file, 'w') as output_file: + with open(yaml_file, "w") as output_file: output_file.write(yaml_content) -def update_yaml_config_benchmark_acc(yaml_path: str, batch_size = None): +def update_yaml_config_benchmark_acc(yaml_path: str, batch_size=None): with open(yaml_path) as f: yaml_config = yaml.round_trip_load(f, preserve_quotes=True) try: accuracy = yaml_config.get("evaluation", {}).get("accuracy", {}) if not accuracy: raise AttributeError - dataloader = accuracy.get('dataloader', {}) + dataloader = accuracy.get("dataloader", {}) if dataloader: - dataloader.update({'batch_size': batch_size}) - configs = accuracy.get('configs', {}) + dataloader.update({"batch_size": batch_size}) + configs = accuracy.get("configs", {}) if configs: - del accuracy['configs'] + del accuracy["configs"] except Exception as e: print(f"[ WARNING ] {e}") - print(f"====== update_yaml_config_benchmark_acc ========") + print("====== update_yaml_config_benchmark_acc ========") yaml_content = yaml.round_trip_dump(yaml_config) - with open(yaml_path, 'w') as output_file: + with open(yaml_path, "w") as output_file: output_file.write(yaml_content) -def update_yaml_config_benchmark_perf(yaml_path: str, batch_size = None, multi_instance = None): +def update_yaml_config_benchmark_perf(yaml_path: str, batch_size=None, multi_instance=None): # Get cpu information for multi-instance total_cores = psutil.cpu_count(logical=False) total_sockets = 1 @@ -262,7 +273,7 @@ def update_yaml_config_benchmark_perf(yaml_path: str, batch_size = None, multi_i ncores_per_instance = ncores_per_socket iters = 100 - if multi_instance=='true': + if multi_instance == "true": ncores_per_instance = 4 iters = 500 @@ -272,30 +283,32 @@ def update_yaml_config_benchmark_perf(yaml_path: str, batch_size = None, multi_i performance = yaml_config.get("evaluation", {}).get("performance", {}) if not performance: raise AttributeError - dataloader = performance.get('dataloader', {}) + dataloader = performance.get("dataloader", {}) if dataloader: - dataloader.update({'batch_size': batch_size}) - performance.update({'iteration': iters}) - configs = performance.get('configs', {}) + dataloader.update({"batch_size": batch_size}) + performance.update({"iteration": iters}) + configs = performance.get("configs", {}) if not configs: raise AttributeError else: - configs.update({ - 'cores_per_instance': int(ncores_per_instance), - 'num_of_instance': int(ncores_per_socket // ncores_per_instance) - }) - for attr in ['intra_num_of_threads', 'inter_num_of_threads', 'kmp_blocktime']: + configs.update( + { + "cores_per_instance": int(ncores_per_instance), + "num_of_instance": int(ncores_per_socket // ncores_per_instance), + } + ) + for attr in ["intra_num_of_threads", "inter_num_of_threads", "kmp_blocktime"]: if configs.get(attr): del configs[attr] print(configs) except Exception as e: print(f"[ WARNING ] {e}") - print(f"====== update_yaml_config_benchmark_perf ========") + print("====== update_yaml_config_benchmark_perf ========") yaml_content = yaml.round_trip_dump(yaml_config) - with open(yaml_path, 'w') as output_file: + with open(yaml_path, "w") as output_file: output_file.write(yaml_content) @@ -303,7 +316,7 @@ def update_yaml_config_benchmark_perf(yaml_path: str, batch_size = None, multi_i args = parse_args() update_yaml_dataset(args.yaml, args.framework, args.dataset_location) update_yaml_config_tuning(args.yaml, strategy=args.strategy) - print('===== multi_instance={} ===='.format(args.multi_instance)) - if args.new_benchmark=='true': + print("===== multi_instance={} ====".format(args.multi_instance)) + if args.new_benchmark == "true": update_yaml_config_benchmark_acc(args.yaml, batch_size=args.batch_size) update_yaml_config_benchmark_perf(args.yaml, batch_size=args.batch_size, multi_instance=args.multi_instance) diff --git a/.azure-pipelines/ut-basic-no-cover.yml b/.azure-pipelines/ut-basic-no-cover.yml index 84a60b01f1b..1bd4c3574d3 100644 --- a/.azure-pipelines/ut-basic-no-cover.yml +++ b/.azure-pipelines/ut-basic-no-cover.yml @@ -113,4 +113,3 @@ stages: # utArtifact: "ut-ipex_v2.1" # utTestMode: "no-coverage" # utContainerName: "utTest-no-coverage" - diff --git a/.azure-pipelines/ut-neural-insights.yaml b/.azure-pipelines/ut-neural-insights.yaml index 328c34d1d54..5618d38cb79 100644 --- a/.azure-pipelines/ut-neural-insights.yaml +++ b/.azure-pipelines/ut-neural-insights.yaml @@ -28,4 +28,4 @@ stages: utScriptFileName: 'run_neural_insights' uploadPath: $(UPLOAD_PATH) utArtifact: 'ut-neural-insights' - utContainerName: "utTest-nInsights" \ No newline at end of file + utContainerName: "utTest-nInsights" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0a307e1d5e6..ec06b4bada1 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -17,11 +17,10 @@ jobs: git config --local --get remote.origin.url cd docs/build_docs bash build.sh latest - + - name: Push to github uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./build_tmp/gh-pages publish_branch: gh-pages - diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ded4a3251da..16a8ab3c30a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: args: [--unique] - id: requirements-txt-fixer - id: trailing-whitespace - files: (.*\.(py|md|rst|cmake|yaml|yml))$ + files: (.*\.(py|rst|cmake|yaml|yml))$ - repo: https://github.com/Lucas-C/pre-commit-hooks rev: v1.5.4 @@ -104,15 +104,8 @@ repos: args: [--line-length=120, --skip-errors] exclude: docs/source-app - - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.2 - hooks: - - id: prettier - files: \.(markdown|md)$ - args: [--prose-wrap, never] - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.285 + rev: v0.0.286 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix, --no-cache] diff --git a/README.md b/README.md index d93998f7895..5881ef2174a 100644 --- a/README.md +++ b/README.md @@ -48,14 +48,16 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobil from neural_compressor.data import DataLoader, Datasets from neural_compressor.config import PostTrainingQuantConfig -dataset = Datasets('tensorflow')['dummy'](shape=(1, 224, 224, 3)) -dataloader = DataLoader(framework='tensorflow', dataset=dataset) +dataset = Datasets("tensorflow")["dummy"](shape=(1, 224, 224, 3)) +dataloader = DataLoader(framework="tensorflow", dataset=dataset) from neural_compressor.quantization import fit + q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=PostTrainingQuantConfig(), - calib_dataloader=dataloader) + calib_dataloader=dataloader, +) ``` ## Documentation @@ -157,4 +159,3 @@ q_model = fit( ## Research Collaborations Welcome to raise any interesting research ideas on model compression techniques and feel free to reach us ([inc.maintainers@intel.com](mailto:inc.maintainers@intel.com)). Look forward to our collaborations on Intel Neural Compressor! - diff --git a/docs/build_docs/source/conf.py b/docs/build_docs/source/conf.py index 0436ff2c4b5..0100e43d923 100644 --- a/docs/build_docs/source/conf.py +++ b/docs/build_docs/source/conf.py @@ -5,10 +5,11 @@ import os import sys -sys.path.insert(0, os.path.abspath('../../neural_compressor/')) + +sys.path.insert(0, os.path.abspath("../../neural_compressor/")) import version as ver -version= ver.__version__ +version = ver.__version__ release = version with open("version.txt", "w") as f: @@ -19,68 +20,69 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = 'Intel® Neural Compressor' -copyright = '2022, Intel® Neural Compressor, Intel' -author = 'Intel® Neural Compressor developers' +project = "Intel® Neural Compressor" +copyright = "2022, Intel® Neural Compressor, Intel" +author = "Intel® Neural Compressor developers" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ - 'recommonmark', - 'sphinx_markdown_tables', - 'sphinx.ext.coverage', - 'sphinx.ext.autosummary', - 'sphinx_md', - 'autoapi.extension', - 'sphinx.ext.napoleon', - 'sphinx.ext.githubpages', - "sphinx.ext.linkcode" - ] - -autoapi_dirs = ['../../neural_compressor'] + "recommonmark", + "sphinx_markdown_tables", + "sphinx.ext.coverage", + "sphinx.ext.autosummary", + "sphinx_md", + "autoapi.extension", + "sphinx.ext.napoleon", + "sphinx.ext.githubpages", + "sphinx.ext.linkcode", +] + +autoapi_dirs = ["../../neural_compressor"] autoapi_root = "autoapi" autoapi_keep_files = True autoapi_add_toctree_entry = False autosummary_generate = True -autoapi_options = ['members', - 'show-module-summary'] +autoapi_options = ["members", "show-module-summary"] autoapi_ignore = [] -templates_path = ['_templates'] +templates_path = ["_templates"] -source_suffix = ['.rst', '.md'] +source_suffix = [".rst", ".md"] # The master toctree document. -master_doc = 'index' +master_doc = "index" exclude_patterns = [] -pygments_style = 'sphinx' +pygments_style = "sphinx" # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -#html_theme = 'alabaster' -html_theme = 'sphinx_rtd_theme' +# html_theme = 'alabaster' +html_theme = "sphinx_rtd_theme" + +html_static_path = ["_static"] -html_static_path = ['_static'] def skip_util_classes(app, what, name, obj, skip, options): - if what=='property' or what=='method': + if what == "property" or what == "method": skip = True return skip + def setup(app): - app.add_css_file("custom.css") - app.connect("autoapi-skip-member", skip_util_classes) + app.add_css_file("custom.css") + app.connect("autoapi-skip-member", skip_util_classes) def linkcode_resolve(domain, info): - if domain != 'py': + if domain != "py": return None - if not info['module']: + if not info["module"]: return None - filename = info['module'].replace('.', '/') + filename = info["module"].replace(".", "/") return "{}/{}.py".format(repo_url, filename) diff --git a/docs/build_docs/source/index.rst b/docs/build_docs/source/index.rst index 0669c2c47d0..9780070fec3 100644 --- a/docs/build_docs/source/index.rst +++ b/docs/build_docs/source/index.rst @@ -20,6 +20,3 @@ Sections docs/source/legal_information.md docs/source/SECURITY.md Repo - - - diff --git a/docs/build_docs/sphinx-requirements.txt b/docs/build_docs/sphinx-requirements.txt index f8fbddd715b..51d3751bde8 100644 --- a/docs/build_docs/sphinx-requirements.txt +++ b/docs/build_docs/sphinx-requirements.txt @@ -1,6 +1,6 @@ -sphinx==6.1.1 -sphinx_rtd_theme recommonmark +sphinx==6.1.1 +sphinx-autoapi sphinx-markdown-tables sphinx-md -sphinx-autoapi +sphinx_rtd_theme diff --git a/docs/build_docs/update_html.py b/docs/build_docs/update_html.py index d936b78e57f..7d3a82ffd08 100644 --- a/docs/build_docs/update_html.py +++ b/docs/build_docs/update_html.py @@ -1,47 +1,52 @@ -import os, sys import glob +import os +import sys + def find_index_path(index_file): with open(index_file, "r") as f: lines = f.readlines() for line in lines: pos = line.find('index.html" class="icon icon-home"') - if pos<0: + if pos < 0: continue - pos1 = line.rfind("\"", 0, pos) - if pos1<0: + pos1 = line.rfind('"', 0, pos) + if pos1 < 0: return "" else: - return "../" + line[pos1+1: pos] + return "../" + line[pos1 + 1 : pos] return "ignore" + def update_version_link(version, folder_name, index_file): index_buf = "" index_path = find_index_path(index_file) - if index_path=='ignore': + if index_path == "ignore": return with open(index_file, "r") as f: index_buf = f.read() - key_str='
\n {}\n
'.format(version) - version_list = '''
+ key_str = '
\n {}\n
'.format(version) + version_list = """
{}▼

Click link above to switch version

-
'''.format(index_path, folder_name) - #print(index_buf.find(key_str)) +
""".format( + index_path, folder_name + ) + # print(index_buf.find(key_str)) index_buf = index_buf.replace(key_str, version_list) - #print(index_buf) + # print(index_buf) with open(index_file, "w") as f: f.write(index_buf) def update_source_url(version, folder_name, index_file): - if 'latest'!= folder_name: + if "latest" != folder_name: return - base_url = "class=\"reference external\" href=\"https://github.com/intel/neural-compressor/blob/{}/" - repo_url = base_url.format("v"+version) + base_url = 'class="reference external" href="/intel/neural-compressor/blob/{}/' + repo_url = base_url.format("v" + version) target = base_url.format("master") with open(index_file, "r") as f: index_buf = f.read() @@ -51,18 +56,19 @@ def update_source_url(version, folder_name, index_file): f.write(index_buf) - def main(folder, version): - folder_name=os.path.basename(folder) - for index_file in glob.glob('{}/**/*.html'.format(folder),recursive = True): + folder_name = os.path.basename(folder) + for index_file in glob.glob("{}/**/*.html".format(folder), recursive=True): update_version_link(version, folder_name, index_file) update_source_url(version, folder_name, index_file) + def help(me): print("python {} html_folder version".format(me)) -if __name__=="__main__": - if len(sys.argv)<3: + +if __name__ == "__main__": + if len(sys.argv) < 3: help(sys.argv[0]) sys.exit(1) diff --git a/docs/build_docs/update_version.py b/docs/build_docs/update_version.py index a2b75a5fdd6..de67f3060fe 100644 --- a/docs/build_docs/update_version.py +++ b/docs/build_docs/update_version.py @@ -1,29 +1,34 @@ -import os, sys +import os +import sys def main(folder, version): - folder_name=os.path.basename(folder) + folder_name = os.path.basename(folder) version_file = "{}/versions.html".format(os.path.dirname(folder)) - #print(version_file) + # print(version_file) ver_buf = "" with open(version_file, "r") as f: ver_buf = f.read() - if ver_buf.find(version)>=0: + if ver_buf.find(version) >= 0: return key_str = '
  • latest
  • ' - new_ver = '''
  • latest
  • -
  • {}
  • '''.format(version, version) + new_ver = """
  • latest
  • +
  • {}
  • """.format( + version, version + ) ver_buf = ver_buf.replace(key_str, new_ver) with open(version_file, "w") as f: f.write(ver_buf) + def help(me): print("python {} html_folder version".format(me)) -if __name__=="__main__": - if len(sys.argv)<3: + +if __name__ == "__main__": + if len(sys.argv) < 3: help(sys.argv[0]) sys.exit(1) diff --git a/docs/source/CODE_OF_CONDUCT.md b/docs/source/CODE_OF_CONDUCT.md index cd9322e8a00..8d023b77469 100644 --- a/docs/source/CODE_OF_CONDUCT.md +++ b/docs/source/CODE_OF_CONDUCT.md @@ -83,4 +83,4 @@ This Code of Conduct is adapted from the [Contributor Covenant][homepage], [vers For answers to common questions about this code of conduct, see the [FAQ][FAQ-page] page. -[FAQ-page]: https://www.contributor-covenant.org/faq \ No newline at end of file +[FAQ-page]: https://www.contributor-covenant.org/faq diff --git a/docs/source/FX.md b/docs/source/FX.md index 74924784b7f..8734fe94f9b 100644 --- a/docs/source/FX.md +++ b/docs/source/FX.md @@ -129,4 +129,3 @@ For users, you will see log output below if you model failed on symbolic trace m scores.append(dscore) return [boxes,labels,scores] ``` - diff --git a/docs/source/NAS.md b/docs/source/NAS.md index cb442fbc567..431e3d87104 100644 --- a/docs/source/NAS.md +++ b/docs/source/NAS.md @@ -45,7 +45,8 @@ Simplest launcher code if NAS configuration is defined in user-defined yaml. ```python from neural_compressor.experimental import NAS -agent = NAS('/path/to/user/yaml') + +agent = NAS("/path/to/user/yaml") results = agent.search() ``` @@ -56,14 +57,15 @@ NAS class also support `NASConfig` class as it's argument. ```python from neural_compressor.conf.config import NASConfig from neural_compressor.experimental import NAS -config = NASConfig(approach='dynas', search_algorithm='nsga2') -config.dynas.supernet = 'ofa_mbv3_d234_e346_k357_w1.2' -config.dynas.metrics = ['acc', 'macs'] + +config = NASConfig(approach="dynas", search_algorithm="nsga2") +config.dynas.supernet = "ofa_mbv3_d234_e346_k357_w1.2" +config.dynas.metrics = ["acc", "macs"] config.dynas.population = 50 config.dynas.num_evals = 250 -config.dynas.results_csv_path = 'search_results.csv' +config.dynas.results_csv_path = "search_results.csv" config.dynas.batch_size = 64 -config.dynas.dataset_path = '/datasets/imagenet-ilsvrc2012' #example +config.dynas.dataset_path = "/datasets/imagenet-ilsvrc2012" # example agent = NAS(config) results = agent.search() ``` diff --git a/docs/source/adaptor.md b/docs/source/adaptor.md index 94309834e9f..4ab68df0e83 100644 --- a/docs/source/adaptor.md +++ b/docs/source/adaptor.md @@ -50,8 +50,9 @@ class AbcAdaptor(Adaptor): def quantize(self, tune_cfg, model, dataloader, q_func=None): ... - def evaluate(self, model, dataloader, postprocess=None, - metric=None, measurer=None, iteration=-1, tensorboard=False): + def evaluate( + self, model, dataloader, postprocess=None, metric=None, measurer=None, iteration=-1, tensorboard=False + ): ... def query_fw_capability(self, model): @@ -119,38 +120,38 @@ Onnxruntime already has [quantization tools](https://github.com/microsoft/onnxru The base class ONNXRTAdaptor inherits from the Adaptor class. Please refer to [onnxrt.py](../neural_compressor/adaptor/onnxrt.py). ```python - @adaptor_registry - class ONNXRT_QLinearOpsAdaptor(ONNXRTAdaptor): - @dump_elapsed_time("Pass quantize model") - def quantize(self, tune_cfg, model, data_loader, q_func=None): - ...... - - @dump_elapsed_time("Pass recover model") - def recover(self, model, q_config): - ...... - - def inspect_tensor(self, model, dataloader, op_list=[], - iteration_list=[], - inspect_type='activation', - save_to_disk=False, - save_path=None, - quantization_cfg=None): - ...... - - def set_tensor(self, model, tensor_dict): - ...... - - def query_fw_capability(self, model): - ...... - - def evaluate(self, input_graph, dataloader, postprocess=None, - metrics=None, measurer=None, iteration=-1, - tensorboard=False, fp32_baseline=False): - ...... - - def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None): - ...... - - def save(self, model, path): - ...... + @adaptor_registry + class ONNXRT_QLinearOpsAdaptor(ONNXRTAdaptor): + @dump_elapsed_time("Pass quantize model") + def quantize(self, tune_cfg, model, data_loader, q_func=None): + ...... + + @dump_elapsed_time("Pass recover model") + def recover(self, model, q_config): + ...... + + def inspect_tensor(self, model, dataloader, op_list=[], + iteration_list=[], + inspect_type='activation', + save_to_disk=False, + save_path=None, + quantization_cfg=None): + ...... + + def set_tensor(self, model, tensor_dict): + ...... + + def query_fw_capability(self, model): + ...... + + def evaluate(self, input_graph, dataloader, postprocess=None, + metrics=None, measurer=None, iteration=-1, + tensorboard=False, fp32_baseline=False): + ...... + + def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None): + ...... + + def save(self, model, path): + ...... ``` diff --git a/docs/source/add_new_adaptor.md b/docs/source/add_new_adaptor.md index fdea2079867..d1ac18c845e 100644 --- a/docs/source/add_new_adaptor.md +++ b/docs/source/add_new_adaptor.md @@ -57,51 +57,50 @@ To enable accuracy-aware tuning with a specific framework, we should define the Each framework adaptor should implement the `query_fw_capability` function, this function will only be invoked once and will loop over the graph/model for the quantizable operators and collect each operator's opwise details and optypewise capability. You should return a standard dict of the input model's tuning capability. The format is like below: ```python - capability = { - 'opwise': {('conv2d', 'Conv2D'): [int8_conv_config, {'weight': {'dtype': 'bf16'}, 'activation': {'dtype': 'bf16'}}, {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}}], ... }# all quantizable opwise key-value pair with key tuple: (node_name, node_op)} - 'optypewise': optype_wise_ability, - } +capability = { + 'opwise': {('conv2d', 'Conv2D'): [int8_conv_config, {'weight': {'dtype': 'bf16'}, 'activation': {'dtype': 'bf16'}}, {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}}], ... }# all quantizable opwise key-value pair with key tuple: (node_name, node_op)} + 'optypewise': optype_wise_ability, +} ``` The int8_conv_config is like below, it's parsed from the framework YAML. ```python - int8_conv_config = { +int8_conv_config = { + "weight": { + "dtype": "int8", + "algorithm": "minmax", + "granularity": "per_channel", + "scheme": "sym", + }, + "activation": { + "dtype": "int8", + "quant_mode": "static", + "algorithm": "kl", + "granularity": "per_tensor", + "scheme": "sym", + }, +} +``` +The `optype_wise_ability` exmaple config is like below. + +```python +optype_wise_ability = { + 'Conv2D': { 'weight': { 'dtype': 'int8', 'algorithm': 'minmax', 'granularity': 'per_channel', - 'scheme': 'sym' + 'scheme': 'sym', }, 'activation': { 'dtype': 'int8', 'quant_mode': 'static', 'algorithm': 'kl', 'granularity': 'per_tensor', - 'scheme': 'sym' - } - } - -``` -The `optype_wise_ability` exmaple config is like below. - -```python - optype_wise_ability = { - 'Conv2D': { - 'weight': { - 'dtype': 'int8', - 'algorithm': 'minmax', - 'granularity': 'per_channel', - 'scheme': 'sym' - }, - 'activation': { - 'dtype': 'int8', - 'quant_mode': 'static', - 'algorithm': 'kl', - 'granularity': 'per_tensor', - 'scheme': 'sym' - } + 'scheme': 'sym', }, - ... #all optype wise ability - } + }, + ... #all optype wise ability +} ``` After the work above, we have implement the `query_fw_capability` API and get the tuning capability dict for the Strategy object. Then the Strategy object will fetch tuning configuration and give to the quantize API to get the quantized model. @@ -148,4 +147,3 @@ Calibration data can only approximate the data distribution of the entire datase You can use different algorithms to make the data range more in line with the real data distribution. After applying these algorithms, we obtained the data distribution range of each operator. At this time, you can generate the quantized model. This quantized model can be evaluated. If the evaluation meets the set metric goal, the entire quantization process will be over. Otherwise, a new tuning configuration will be generated until a quantized model that meets the metric requirements. - diff --git a/docs/source/add_new_data_type.md b/docs/source/add_new_data_type.md index fb91d1b285e..99b957ef42d 100644 --- a/docs/source/add_new_data_type.md +++ b/docs/source/add_new_data_type.md @@ -133,19 +133,19 @@ Once the new data type has been added to Intel® Neural Compressor, it can be us ```python from neural_compressor.config import PostTrainingQuantConfig + op_type_dict = { - 'Conv2d': { - 'weight': { - 'dtype': ['int4'], + "Conv2d": { + "weight": { + "dtype": ["int4"], }, - 'activation': { - 'dtype': ['uint4'], + "activation": { + "dtype": ["uint4"], }, }, } conf = PostTrainingQuantConfig(op_type_dict=op_type_dict) ... - ``` With this code, all `Conv2d` operators will be quantized to 4-bit, with weight using `int4` and activation using `uint4`. diff --git a/docs/source/api-doc/adaptor.rst b/docs/source/api-doc/adaptor.rst index d943e8f8dce..2a02cd68c4a 100644 --- a/docs/source/api-doc/adaptor.rst +++ b/docs/source/api-doc/adaptor.rst @@ -7,4 +7,4 @@ The adaptor API information is available: :maxdepth: 1 adaptor/onnxrt - adaptor/torch_utils \ No newline at end of file + adaptor/torch_utils diff --git a/docs/source/api-doc/adaptor/onnxrt.rst b/docs/source/api-doc/adaptor/onnxrt.rst index 4425781121c..312103aaea5 100644 --- a/docs/source/api-doc/adaptor/onnxrt.rst +++ b/docs/source/api-doc/adaptor/onnxrt.rst @@ -5,4 +5,3 @@ ONNX Runtime neural_compressor.adaptor.ox_utils.quantizer neural_compressor.adaptor.ox_utils.calibration neural_compressor.adaptor.ox_utils.operators.ops - diff --git a/docs/source/api-doc/adaptor/torch_utils.rst b/docs/source/api-doc/adaptor/torch_utils.rst index e69bd56a021..f0545be9cec 100644 --- a/docs/source/api-doc/adaptor/torch_utils.rst +++ b/docs/source/api-doc/adaptor/torch_utils.rst @@ -7,4 +7,4 @@ The torch utils API information is available: neural_compressor.adaptor.torch_utils.bf16_convert neural_compressor.adaptor.torch_utils.hawq_metric neural_compressor.adaptor.torch_utils.symbolic_trace - neural_compressor.adaptor.torch_utils.util \ No newline at end of file + neural_compressor.adaptor.torch_utils.util diff --git a/docs/source/api-doc/compression.rst b/docs/source/api-doc/compression.rst index 1df89190057..90748603248 100644 --- a/docs/source/api-doc/compression.rst +++ b/docs/source/api-doc/compression.rst @@ -7,5 +7,3 @@ The compression API information is available: neural_compressor.compression.distillation.criterions neural_compressor.compression.pruner - - diff --git a/docs/source/api-doc/mix_precision.rst b/docs/source/api-doc/mix_precision.rst index 847cdd6c74c..7a2d095f9e8 100644 --- a/docs/source/api-doc/mix_precision.rst +++ b/docs/source/api-doc/mix_precision.rst @@ -3,4 +3,4 @@ Mix Precision .. autoapisummary:: - neural_compressor.mix_precision \ No newline at end of file + neural_compressor.mix_precision diff --git a/docs/source/api-doc/model.rst b/docs/source/api-doc/model.rst index 596ce8fa0ad..34e87ee6a52 100644 --- a/docs/source/api-doc/model.rst +++ b/docs/source/api-doc/model.rst @@ -9,5 +9,3 @@ The model API information is available: neural_compressor.model.keras_model neural_compressor.model.torch_model neural_compressor.model.onnx_model - - diff --git a/docs/source/api-doc/strategy.rst b/docs/source/api-doc/strategy.rst index 89b034c2eaa..d11d0206422 100644 --- a/docs/source/api-doc/strategy.rst +++ b/docs/source/api-doc/strategy.rst @@ -4,9 +4,8 @@ Strategy The strategy API information is available: .. autoapisummary:: - + neural_compressor.strategy.auto neural_compressor.strategy.basic neural_compressor.strategy.mse_v2 neural_compressor.strategy.hawq_v2 - diff --git a/docs/source/benchmark.md b/docs/source/benchmark.md index 4b660f3ac95..461818a53eb 100644 --- a/docs/source/benchmark.md +++ b/docs/source/benchmark.md @@ -48,11 +48,11 @@ And please make sure `cores_per_instance * num_of_instance` must be less than CP ```python from neural_compressor.config import BenchmarkConfig from neural_compressor.benchmark import fit + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) -fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader) +fit(model="./int8.pb", conf=conf, b_dataloader=eval_dataloader) ``` ## Examples Refer to the [Benchmark example](../../examples/helloworld/tf_example5). - diff --git a/docs/source/dataloader.md b/docs/source/dataloader.md index ecb1ed8c440..78e73acbb5a 100644 --- a/docs/source/dataloader.md +++ b/docs/source/dataloader.md @@ -63,7 +63,7 @@ Users can use the unified `DataLoader` API in the following manners. from neural_compressor.data import DataLoader from neural_compressor import quantization, PostTrainingQuantConfig -dataloader = DataLoader(framework='tensorflow', dataset=dataset) +dataloader = DataLoader(framework="tensorflow", dataset=dataset) config = PostTrainingQuantConfig() q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval) ``` @@ -84,7 +84,9 @@ class NewDataloader: for input_data, label in self.dataset: yield input_data, label + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig() dataloader = NewDataloader(batch_size, **kwargs) q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval) diff --git a/docs/source/dataset.md b/docs/source/dataset.md index 26b0e7d8207..0695d78a3ac 100644 --- a/docs/source/dataset.md +++ b/docs/source/dataset.md @@ -148,12 +148,14 @@ After defining the dataset class, pass it to the quantizer: ```python from neural_compressor.experimental import Quantization, common + quantizer = Quantization(yaml_file) -quantizer.calib_dataloader = common.DataLoader(dataset) # user can pass more optional args to dataloader such as batch_size and collate_fn +quantizer.calib_dataloader = common.DataLoader( + dataset +) # user can pass more optional args to dataloader such as batch_size and collate_fn quantizer.model = graph quantizer.eval_func = eval_func -q_model = quantizer.fit() - +q_model = quantizer.fit() ``` ## Examples diff --git a/docs/source/diagnosis.md b/docs/source/diagnosis.md index c668fb6543c..9e70d695489 100644 --- a/docs/source/diagnosis.md +++ b/docs/source/diagnosis.md @@ -62,18 +62,12 @@ Modify quantization/benchmark script to run diagnosis by adding argument `diagno ### Quantization diagnosis ```python -config = PostTrainingQuantConfig( - diagnosis=True, - ... -) +config = PostTrainingQuantConfig(diagnosis=True, ...) ``` ### Benchmark diagnosis ```python -config = BenchmarkConfig( - diagnosis=True, - ... -) +config = BenchmarkConfig(diagnosis=True, ...) ``` # Example @@ -181,13 +175,19 @@ $\sigma_x$ - input model variance ### Fallback setting example ```python -from neural_compressor import quantization, PostTrainingQuantConfig -op_name_dict = {'v0/cg/conv0/conv2d/Conv2D': {'activation': {'dtype': ['fp32']}}} -config = PostTrainingQuantConfig( - diagnosis=True, - op_name_dict=op_name_dict +from neural_compressor import quantization, PostTrainingQuantConfig + +op_name_dict = {"v0/cg/conv0/conv2d/Conv2D": {"activation": {"dtype": ["fp32"]}}} +config = PostTrainingQuantConfig( + diagnosis=True, + op_name_dict=op_name_dict, +) +q_model = quantization.fit( + model, + config, + calib_dataloader=dataloader, + eval_func=eval, ) -q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval) ``` ## See profiling data diff --git a/docs/source/distillation.md b/docs/source/distillation.md index 1a01ea51b80..7e2d6b063ff 100644 --- a/docs/source/distillation.md +++ b/docs/source/distillation.md @@ -110,7 +110,10 @@ eval_func(model) For Intermediate Layer Knowledge Distillation or Self Distillation, the only difference to above launcher code is that `distil_loss_conf` should be set accordingly as shown below. More detailed settings can be found in this [example](../../examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/run_glue_no_trainer.py#L510) for Intermediate Layer Knowledge Distillation and this [example](../../examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/main.py#L344) for Self Distillation. ```python -from neural_compressor.config import IntermediateLayersKnowledgeDistillationLossConfig, SelfKnowledgeDistillationLossConfig +from neural_compressor.config import ( + IntermediateLayersKnowledgeDistillationLossConfig, + SelfKnowledgeDistillationLossConfig, +) # for Intermediate Layer Knowledge Distillation distil_loss_conf = IntermediateLayersKnowledgeDistillationLossConfig(layer_mappings=layer_mappings) diff --git a/docs/source/distillation_quantization.md b/docs/source/distillation_quantization.md index c2ba491effb..383b3488c6f 100644 --- a/docs/source/distillation_quantization.md +++ b/docs/source/distillation_quantization.md @@ -70,6 +70,7 @@ from neural_compressor.experimental import common, Distillation, Quantization from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig from neural_compressor import QuantizationAwareTrainingConfig from neural_compressor.training import prepare_compression + combs = [] distillation_criterion = KnowledgeDistillationLossConfig() d_conf = DistillationConfig(teacher_model=teacher_model, criterion=distillation_criterion) diff --git a/docs/source/examples_readme.md b/docs/source/examples_readme.md index 495f34c95e8..b3fc71ebd08 100644 --- a/docs/source/examples_readme.md +++ b/docs/source/examples_readme.md @@ -9,4 +9,4 @@ View the [examples in Neural Compressor GitHub Repo](https://github.com/intel/ne ## Release Data Intel® Neural Compressor validated the quantization for 10K+ models from popular model hubs (e.g., HuggingFace Transformers, Torchvision, TensorFlow Model Hub, ONNX Model Zoo) with the performance speedup up to 4.2x on VNNI while minimizing the accuracy loss. Over 30 pruning and knowledge distillation samples are also available. -Release data for validated typical models are available [here](/docs/source/validated_model_list.md). \ No newline at end of file +Release data for validated typical models are available [here](/docs/source/validated_model_list.md). diff --git a/docs/source/export.md b/docs/source/export.md index 5989286975e..c86e6eef7bf 100644 --- a/docs/source/export.md +++ b/docs/source/export.md @@ -72,16 +72,19 @@ Here is the workflow of our export API for PyTorch/Tensorflow FP32/INT8 model. ```python from neural_compressor.experimental.common import Model from neural_compressor.config import Torch2ONNXConfig + inc_model = Model(model) fp32_onnx_config = Torch2ONNXConfig( dtype="fp32", example_inputs=torch.randn(1, 3, 224, 224), - input_names=['input'], - output_names=['output'], - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, + input_names=["input"], + output_names=["output"], + dynamic_axes={ + "input": {0: "batch_size"}, + "output": {0: "batch_size"}, + }, ) -inc_model.export('fp32-model.onnx', fp32_onnx_config) +inc_model.export("fp32-model.onnx", fp32_onnx_config) ``` #### INT8 Model Export @@ -89,17 +92,17 @@ inc_model.export('fp32-model.onnx', fp32_onnx_config) ```python # q_model is a Neural Compressor model after performing quantization. from neural_compressor.config import Torch2ONNXConfig + int8_onnx_config = Torch2ONNXConfig( dtype="int8", opset_version=14, - quant_format="QOperator", # or QDQ + quant_format="QOperator", # or QDQ example_inputs=torch.randn(1, 3, 224, 224), - input_names=['input'], - output_names=['output'], - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, + input_names=["input"], + output_names=["output"], + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, ) -q_model.export('int8-model.onnx', int8_onnx_config) +q_model.export("int8-model.onnx", int8_onnx_config) ``` > **Note**: Two export examples covering computer vision and natural language processing tasks exist in examples. Users can leverage them to verify the accuracy and performance of the exported ONNX model. - [Image recognition](/examples/pytorch/image_recognition/torchvision_models/export/fx/) @@ -112,9 +115,10 @@ q_model.export('int8-model.onnx', int8_onnx_config) ```python from neural_compressor.experimental.common import Model from neural_compressor.config import TF2ONNXConfig + inc_model = Model(model) -config = TF2ONNXConfig(dtype='fp32') -inc_model.export('fp32-model.onnx', config) +config = TF2ONNXConfig(dtype="fp32") +inc_model.export("fp32-model.onnx", config) ``` ### INT8 Model Export @@ -122,8 +126,9 @@ inc_model.export('fp32-model.onnx', config) ```python # q_model is a Neural Compressor model after performing quantization. from neural_compressor.config import TF2ONNXConfig -config = TF2ONNXConfig(dtype='int8') -q_model.export('int8-model.onnx', config) + +config = TF2ONNXConfig(dtype="int8") +q_model.export("int8-model.onnx", config) ``` > **Note**: Some export examples of computer vision task exist in examples. Users can leverage them to verify the accuracy and performance of the exported ONNX model. @@ -158,4 +163,3 @@ This table lists the TorchScript operators that are supported by ONNX export wit | ``quantized::sigmoid`` | Since opset 10 | > **Note**: The export function may fail due to unsupported operations. Please fallback unsupported quantized ops by setting 'op_type_dict' or 'op_name_dict' in 'QuantizationAwareTrainingConfig' or 'PostTrainingQuantConfig' config. Fallback examples please refer to [Text classification](/examples/pytorch/nlp/huggingface_models/text-classification/export/fx/) - diff --git a/docs/source/get_started.md b/docs/source/get_started.md index c4e84475509..61c22912c41 100644 --- a/docs/source/get_started.md +++ b/docs/source/get_started.md @@ -18,14 +18,16 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobil from neural_compressor.data import DataLoader, Datasets from neural_compressor.config import PostTrainingQuantConfig -dataset = Datasets('tensorflow')['dummy'](shape=(1, 224, 224, 3)) -dataloader = DataLoader(framework='tensorflow', dataset=dataset) +dataset = Datasets("tensorflow")["dummy"](shape=(1, 224, 224, 3)) +dataloader = DataLoader(framework="tensorflow", dataset=dataset) from neural_compressor.quantization import fit + q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=PostTrainingQuantConfig(), - calib_dataloader=dataloader) + calib_dataloader=dataloader, +) ``` ## Validated Models diff --git a/docs/source/incompatible_changes.md b/docs/source/incompatible_changes.md index a4d24defcf8..4ffcac0a1c1 100644 --- a/docs/source/incompatible_changes.md +++ b/docs/source/incompatible_changes.md @@ -12,26 +12,28 @@ Refer to below examples for details. ```python # user facing API example in v1.1 -quantizer = Quantization('/path/to/user.yaml') -ds = dataset('/path/to/dataset') +quantizer = Quantization("/path/to/user.yaml") +ds = dataset("/path/to/dataset") dataloader = quantizer.dataloader(ds, batch_size=100) -quantizer.metric('metric', metric) -q_model = quantizer('/path/to/model', q_dataloader = dataloader, eval_dataloader = dataloader) -... # user to write framework specific code to save q_model - +quantizer.metric("metric", metric) +q_model = quantizer( + "/path/to/model", + q_dataloader=dataloader, + eval_dataloader=dataloader, +) +... # user to write framework specific code to save q_model ``` ```python # user facing API example in v1.2 quantizer = Quantization(conf.yaml) -quantizer.model = '/path/to/model' -dl = dataset('/path/to/dataset') +quantizer.model = "/path/to/model" +dl = dataset("/path/to/dataset") quantizer.calib_dataloader = common.DataLoader(dl, batch_size=32) quantizer.eval_dataloader = common.DataLoader(dl, batch_size=32) -quantizer.metric = common.Metric(custom_metric) +quantizer.metric = common.Metric(custom_metric) q_model = quantizer.fit() -q_model.save('/path/to/output/dir') # explicitly call to save q_model - +q_model.save("/path/to/output/dir") # explicitly call to save q_model ``` ## Built-in transform/dataset/metric APIs diff --git a/docs/source/metric.md b/docs/source/metric.md index a515ee5f784..dc0844c8341 100644 --- a/docs/source/metric.md +++ b/docs/source/metric.md @@ -100,7 +100,7 @@ from neural_compressor import quantization, PostTrainingQuantConfig top1 = Metric(name="topk", k=1) config = PostTrainingQuantConfig() -q_model = fit(model, config, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader,eval_metric=top1) +q_model = fit(model, config, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) ``` ### Build Custom Metric with Python API @@ -133,7 +133,7 @@ from neural_compressor import quantization, PostTrainingQuantConfig new_metric = NewMetric() config = PostTrainingQuantConfig() -q_model = fit(model, config, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader,eval_metric=new_metric) +q_model = fit(model, config, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=new_metric) ``` ## Example diff --git a/docs/source/migration.md b/docs/source/migration.md index 033f7d2cb64..d1d964d9a43 100644 --- a/docs/source/migration.md +++ b/docs/source/migration.md @@ -33,24 +33,31 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) val_dataset = ... val_dataloader = torch.utils.data.Dataloader( - val_dataset, - batch_size=args.batch_size, shuffle=False, - num_workers=args.workers, ping_memory=True) + val_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + ping_memory=True, +) + + def eval_func(model): ... + # Quantization code from neural_compressor.experimental import Quantization, common + calib_dataloader = eval_dataloader -quantizer = Quantization('conf.yaml') +quantizer = Quantization("conf.yaml") quantizer.eval_func = eval_func quantizer.calib_dataloader = calib_dataloader quantizer.model = common.Model(model) model = quantizer.fit() from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream -save_for_huggingface_upstream(model, tokenizer, output_dir) +save_for_huggingface_upstream(model, tokenizer, output_dir) ``` We formulate the `conf.yaml` as in (https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/ptq.yaml) @@ -62,34 +69,34 @@ In Intel Neural Compressor 2.X, we integrate the `conf.yaml` into `main.py` to s from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion PostTrainingQuantConfig( - ## model: this parameter does not need to specially be defined; - backend="default", # framework: set as "default" when framework was tensorflow, pytorch, pytorch_fx, onnxrt_integer and onnxrt_qlinear. Set as "ipex" when framework was pytorch_ipex, mxnet is currently unsupported; - inputs="image_tensor", # input: same as in the conf.yaml; - outputs="num_detections,detection_boxes,detection_scores,detection_classes", # output: same as in the conf.yaml; - device="cpu", # device: same as in the conf.yaml; - approach="static", # approach: set as "static" when approach was "post_training_static_quant". Set as "dynamic" when approach was "post_training_dynamic_quant"; - ## recipes: this parameter does not need to specially be defined; - calibration_sampling_size=[1000, 2000], # sampling_size: same as in the conf.yaml; - ## transform: this parameter does not need to specially be defined; - ## model_wise: this parameter does not need to specially be defined; - op_name_dict=op_dict, # op_wise: same as in the conf.yaml; - ## evaluation: these parameters do not need to specially be defined; - strategy="basic", # tuning.strategy.name: same as in the conf.yaml; - ## tuning.strategy.sigopt_api_token, tuning.strategy.sigopt_project_id and tuning.strategy.sigopt_experiment_name do not need to specially defined; - objective="performance", # tuning.objective: same as in the conf.yaml; - performance_only=False, # tuning.performance_only: same as in the conf.yaml; - tuning_criterion=tuning_criterion, - accuracy_criterion=accuracy_criterion, - ## tuning.random_seed and tuning.tensorboard: these parameters do not need to specially be defined; + ## model: this parameter does not need to specially be defined; + backend="default", # framework: set as "default" when framework was tensorflow, pytorch, pytorch_fx, onnxrt_integer and onnxrt_qlinear. Set as "ipex" when framework was pytorch_ipex, mxnet is currently unsupported; + inputs="image_tensor", # input: same as in the conf.yaml; + outputs="num_detections,detection_boxes,detection_scores,detection_classes", # output: same as in the conf.yaml; + device="cpu", # device: same as in the conf.yaml; + approach="static", # approach: set as "static" when approach was "post_training_static_quant". Set as "dynamic" when approach was "post_training_dynamic_quant"; + ## recipes: this parameter does not need to specially be defined; + calibration_sampling_size=[1000, 2000], # sampling_size: same as in the conf.yaml; + ## transform: this parameter does not need to specially be defined; + ## model_wise: this parameter does not need to specially be defined; + op_name_dict=op_dict, # op_wise: same as in the conf.yaml; + ## evaluation: these parameters do not need to specially be defined; + strategy="basic", # tuning.strategy.name: same as in the conf.yaml; + ## tuning.strategy.sigopt_api_token, tuning.strategy.sigopt_project_id and tuning.strategy.sigopt_experiment_name do not need to specially defined; + objective="performance", # tuning.objective: same as in the conf.yaml; + performance_only=False, # tuning.performance_only: same as in the conf.yaml; + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion, + ## tuning.random_seed and tuning.tensorboard: these parameters do not need to specially be defined; ) -accuracy_criterion=AccuracyCriterion( - tolerable_loss=0.01, # relative: same as in the conf.yaml; +accuracy_criterion = AccuracyCriterion( + tolerable_loss=0.01, # relative: same as in the conf.yaml; +) +tuning_criterion = TuningCriterion( + timeout=0, # timeout: same as in the conf.yaml; + max_trials=100, # max_trials: same as in the conf.yaml; ) -tuning_criterion=TuningCriterion( - timeout=0, # timeout: same as in the conf.yaml; - max_trials=100, # max_trials: same as in the conf.yaml; -) ``` Following is a simple demo about how to quantize the model with PTQ in Intel Neural Compressor 2.X. @@ -100,19 +107,27 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) val_dataset = ... val_dataloader = torch.utils.data.Dataloader( - val_dataset, - batch_size=args.batch_size, shuffle=False, - num_workers=args.workers, ping_memory=True) + val_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + ping_memory=True, +) + + def eval_func(model): ... + # Quantization code from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + tuning_criterion = TuningCriterion(max_trials=600) conf = PostTrainingQuantConfig(approach="static", tuning_criterion=tuning_criterion) q_model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func) from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream + save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir) ``` @@ -131,25 +146,29 @@ In Intel Neural Compressor 1.X, the difference between the QAT and PTQ is that w model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + def eval_func(model): ... + def train_func(model): ... + trainer = Trainer(...) # Quantization code from neural_compressor.experimental import Quantization, common -quantizer = Quantization('conf.yaml') + +quantizer = Quantization("conf.yaml") quantizer.eval_func = eval_func quantizer.q_func = train_func quantizer.model = common.Model(model) model = quantizer.fit() from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream -save_for_huggingface_upstream(model, tokenizer, output_dir) +save_for_huggingface_upstream(model, tokenizer, output_dir) ``` Similar to PTQ, it requires a `conf.yaml` (https://github.com/intel/neural-compressor/blob/master/neural_compressor/template/qat.yaml) to define the quantization configuration in Intel Neural Compressor 1.X. @@ -162,26 +181,26 @@ In Intel Neural Compressor 2.X, this `conf.yaml` is set via the `QuantizationAwa from neural_compressor.config import QuantizationAwareTrainingConfig QuantizationAwareTrainingConfig( - ## model: this parameter does not need to specially be defined; - backend="default", # framework: set as "default" when framework was tensorflow, pytorch, pytorch_fx, onnxrt_integer and onnxrt_qlinear. Set as "ipex" when framework was pytorch_ipex, mxnet is currently unsupported; - inputs="image_tensor", # input: same as in the conf.yaml; - outputs="num_detections,detection_boxes,detection_scores,detection_classes", # output: same as in the conf.yaml; - device="cpu", # device: same as in the conf.yaml; - ## approach: this parameter does not need to specially be defined; - ## train: these parameters do not need to specially be defined; - ## model_wise: this parameter does not need to specially be defined; - op_name_dict=op_dict, # op_wise: same as in the conf.yaml; - ## evaluation: these parameters do not need to specially be defined; - strategy="basic", # tuning.strategy.name: same as in the conf.yaml; - ## tuning.strategy.sigopt_api_token, tuning.strategy.sigopt_project_id and tuning.strategy.sigopt_experiment_name do not need to specially defined; - relative=0.01, # relative: same as in the conf.yaml; - timeout=0, # timeout: same as in the conf.yaml; - max_trials=100, # max_trials: same as in the conf.yaml; - objective="performance", # tuning.objective: same as in the conf.yaml; - performance_only=False, # tuning.performance_only: same as in the conf.yaml; - ## tuning.random_seed and tuning.tensorboard: these parameters do not need to specially be defined; - ## diagnosis: these parameters do not need to specially be defined; -) + ## model: this parameter does not need to specially be defined; + backend="default", # framework: set as "default" when framework was tensorflow, pytorch, pytorch_fx, onnxrt_integer and onnxrt_qlinear. Set as "ipex" when framework was pytorch_ipex, mxnet is currently unsupported; + inputs="image_tensor", # input: same as in the conf.yaml; + outputs="num_detections,detection_boxes,detection_scores,detection_classes", # output: same as in the conf.yaml; + device="cpu", # device: same as in the conf.yaml; + ## approach: this parameter does not need to specially be defined; + ## train: these parameters do not need to specially be defined; + ## model_wise: this parameter does not need to specially be defined; + op_name_dict=op_dict, # op_wise: same as in the conf.yaml; + ## evaluation: these parameters do not need to specially be defined; + strategy="basic", # tuning.strategy.name: same as in the conf.yaml; + ## tuning.strategy.sigopt_api_token, tuning.strategy.sigopt_project_id and tuning.strategy.sigopt_experiment_name do not need to specially defined; + relative=0.01, # relative: same as in the conf.yaml; + timeout=0, # timeout: same as in the conf.yaml; + max_trials=100, # max_trials: same as in the conf.yaml; + objective="performance", # tuning.objective: same as in the conf.yaml; + performance_only=False, # tuning.performance_only: same as in the conf.yaml; + ## tuning.random_seed and tuning.tensorboard: these parameters do not need to specially be defined; + ## diagnosis: these parameters do not need to specially be defined; +) ``` In Intel Neural Compressor 2.X, we introduce a `compression manager` to control the training process. It requires to insert a pair of hook `callbacks.on_train_begin` and `callbacks.on_train_end` at the begin of the training and the end of the training. Thus, the quantization code is updated as: @@ -193,17 +212,21 @@ In Intel Neural Compressor 2.X, we introduce a `compression manager` to control model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + def eval_func(model): ... + def train_func(model): ... + trainer = Trainer(...) # Quantization code from neural_compressor.training import prepare_compression from neural_compressor.config import QuantizationAwareTrainingConfig + conf = QuantizationAwareTrainingConfig() compression_manager = prepare_compression(model, conf) compression_manager.callbacks.on_train_begin() @@ -211,8 +234,8 @@ trainer.train() compression_manager.callbacks.on_train_end() from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream -save_for_huggingface_upstream(compression_manager.model, tokenizer, training_args.output_dir) +save_for_huggingface_upstream(compression_manager.model, tokenizer, training_args.output_dir) ``` ## Pruning @@ -225,7 +248,8 @@ In Intel Neural Compressor 1.X, the Pruning config is still defined by an extra ```python from neural_compressor.experimental import Pruning, common -prune = Pruning('conf.yaml') + +prune = Pruning("conf.yaml") prune.model = model prune.train_func = pruning_func model = prune.fit() @@ -253,10 +277,8 @@ def pruning_func(model): for step, batch in enumerate(train_dataloader): prune.on_step_begin(step) batch = tuple(t.to(args.device) for t in batch) - inputs = {'input_ids': batch[0], - 'attention_mask': batch[1], - 'labels': batch[3]} - #inputs['token_type_ids'] = batch[2] + inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]} + # inputs['token_type_ids'] = batch[2] outputs = model(**inputs) loss = outputs[0] # model outputs are always tuple in transformers (see doc) @@ -273,8 +295,10 @@ def pruning_func(model): optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() - + prune.on_step_end() + + ... ``` @@ -299,66 +323,67 @@ WeightPruningConfig( We also need to replace the hooks in the training code. The newly defined hooks are included in `compression manager` and listed as follows, ```python - on_train_begin() : Execute at the beginning of training phase. - on_epoch_begin(epoch) : Execute at the beginning of each epoch. - on_step_begin(batch) : Execute at the beginning of each batch. - on_step_end() : Execute at the end of each batch. - on_epoch_end() : Execute at the end of each epoch. - on_before_optimizer_step() : Execute before optimization step. - on_after_optimizer_step() : Execute after optimization step. - on_train_end() : Execute at the ending of training phase. +on_train_begin() : Execute at the beginning of training phase. +on_epoch_begin(epoch) : Execute at the beginning of each epoch. +on_step_begin(batch) : Execute at the beginning of each batch. +on_step_end() : Execute at the end of each batch. +on_epoch_end() : Execute at the end of each epoch. +on_before_optimizer_step() : Execute before optimization step. +on_after_optimizer_step() : Execute after optimization step. +on_train_end() : Execute at the ending of training phase. ``` The final Pruning code is updated as follows, ```python - config = { ## pruner - 'target_sparsity': 0.9, # Target sparsity ratio of modules. - 'pruning_type': "snip_momentum", # Default pruning type. - 'pattern': "4x1", # Default pruning pattern. - 'op_names': ['layer1.*'], # A list of modules that would be pruned. - 'excluded_op_names': ['layer3.*'], # A list of modules that would not be pruned. - 'start_step': 0, # Step at which to begin pruning. - 'end_step': 10, # Step at which to end pruning. - 'pruning_scope': "global", # Default pruning scope. - 'pruning_frequency': 1, # Frequency of applying pruning. - 'min_sparsity_ratio_per_op': 0.0, # Minimum sparsity ratio of each module. - 'max_sparsity_ratio_per_op': 0.98, # Maximum sparsity ratio of each module. - 'sparsity_decay_type': "exp", # Function applied to control pruning rate. - 'pruning_op_types': ['Conv', 'Linear'], # Types of op that would be pruned. - } - - from neural_compressor.training import prepare_compression, WeightPruningConfig - ##setting configs - pruning_configs=[ - {"op_names": ['layer1.*'], "pattern":'4x1'}, - {"op_names": ['layer2.*'], "pattern":'1x1', 'target_sparsity':0.5} - ] - configs = WeightPruningConfig( - pruning_configs=pruning_configs, - target_sparsity=config.target_sparsity, - pattern=config.pattern, - pruning_frequency=config.pruning_frequency, - start_step=config.start_step, - end_step=config.end_step, - pruning_type=config.pruning_type, - ) - compression_manager = prepare_compression(model=model, confs=configs) - compression_manager.callbacks.on_train_begin() ## insert hook - for epoch in range(num_train_epochs): - model.train() - for step, batch in enumerate(train_dataloader): - compression_manager.callbacks.on_step_begin(step) ## insert hook - outputs = model(**batch) - loss = outputs.loss - loss.backward() - compression_manager.callbacks.on_before_optimizer_step() ## insert hook - optimizer.step() - compression_manager.callbacks.on_after_optimizer_step() ## insert hook - lr_scheduler.step() - model.zero_grad() - ... - compression_manager.callbacks.on_train_end() +config = { ## pruner + "target_sparsity": 0.9, # Target sparsity ratio of modules. + "pruning_type": "snip_momentum", # Default pruning type. + "pattern": "4x1", # Default pruning pattern. + "op_names": ["layer1.*"], # A list of modules that would be pruned. + "excluded_op_names": ["layer3.*"], # A list of modules that would not be pruned. + "start_step": 0, # Step at which to begin pruning. + "end_step": 10, # Step at which to end pruning. + "pruning_scope": "global", # Default pruning scope. + "pruning_frequency": 1, # Frequency of applying pruning. + "min_sparsity_ratio_per_op": 0.0, # Minimum sparsity ratio of each module. + "max_sparsity_ratio_per_op": 0.98, # Maximum sparsity ratio of each module. + "sparsity_decay_type": "exp", # Function applied to control pruning rate. + "pruning_op_types": ["Conv", "Linear"], # Types of op that would be pruned. +} + +from neural_compressor.training import prepare_compression, WeightPruningConfig + +##setting configs +pruning_configs = [ + {"op_names": ["layer1.*"], "pattern": "4x1"}, + {"op_names": ["layer2.*"], "pattern": "1x1", "target_sparsity": 0.5}, +] +configs = WeightPruningConfig( + pruning_configs=pruning_configs, + target_sparsity=config.target_sparsity, + pattern=config.pattern, + pruning_frequency=config.pruning_frequency, + start_step=config.start_step, + end_step=config.end_step, + pruning_type=config.pruning_type, +) +compression_manager = prepare_compression(model=model, confs=configs) +compression_manager.callbacks.on_train_begin() ## insert hook +for epoch in range(num_train_epochs): + model.train() + for step, batch in enumerate(train_dataloader): + compression_manager.callbacks.on_step_begin(step) ## insert hook + outputs = model(**batch) + loss = outputs.loss + loss.backward() + compression_manager.callbacks.on_before_optimizer_step() ## insert hook + optimizer.step() + compression_manager.callbacks.on_after_optimizer_step() ## insert hook + lr_scheduler.step() + model.zero_grad() +... +compression_manager.callbacks.on_train_end() ``` ## Distillation @@ -416,28 +441,31 @@ def train_func(model): for nepoch in range(epochs): model.train() cnt = 0 - loss_sum = 0. - iter_bar = tqdm(train_dataloader, desc='Iter (loss=X.XXX)') + loss_sum = 0.0 + iter_bar = tqdm(train_dataloader, desc="Iter (loss=X.XXX)") for batch in iter_bar: teacher_logits, input_ids, segment_ids, input_mask, target = batch cnt += 1 output = model(input_ids, segment_ids, input_mask) loss = criterion(output, target) loss = distiller.on_after_compute_loss( - {'input_ids':input_ids, 'segment_ids':segment_ids, 'input_mask':input_mask}, + {"input_ids": input_ids, "segment_ids": segment_ids, "input_mask": input_mask}, output, loss, - teacher_logits) + teacher_logits, + ) optimizer.zero_grad() loss.backward() optimizer.step() if cnt >= iters: break - print('Average Loss: {}'.format(loss_sum / cnt)) + print("Average Loss: {}".format(loss_sum / cnt)) distiller.on_epoch_end() + from neural_compressor.experimental import Distillation, common from neural_compressor.experimental.common.criterion import PyTorchKnowledgeDistillationLoss + distiller = Distillation(conf.yaml) distiller.student_model = model distiller.teacher_model = teacher @@ -454,9 +482,9 @@ The new distillation API also introduce `compression manager` to conduct the tra from neural_compressor.config import DistillationConfig DistillationConfig( - criterion=KnowledgeDistillationLoss, # criterion: same as in the conf.yaml; - optimizer=SGD, # optimizer: same as in the conf.yaml; -) + criterion=KnowledgeDistillationLoss, # criterion: same as in the conf.yaml; + optimizer=SGD, # optimizer: same as in the conf.yaml; +) ``` The newly updated distillation code is shown as follows, @@ -501,12 +529,13 @@ The recent growth of Deep Learning has driven the development of more complex mo The user can add dataloader and metric in `conf.yaml` to execute evaluation. ```python from neural_compressor.experimental import MixedPrecision, common + dataset = Dataset() -converter = MixedPrecision('conf.yaml') +converter = MixedPrecision("conf.yaml") converter.metric = Metric() -converter.precisions = 'bf16' +converter.precisions = "bf16" converter.eval_dataloader = common.DataLoader(dataset) -converter.model = './model.pb' +converter.model = "./model.pb" output_model = converter() ``` @@ -547,23 +576,23 @@ In 2.X version, we integrate the config information in `MixedPrecisionConfig`, l from neural_compressor.config import MixedPrecisionConfig, TuningCriterion, AccuracyCriterion MixedPrecisionConfig( - ## model: this parameter does not need to specially be defined; - backend="default", # framework: set as "default" when framework was tensorflow, pytorch, pytorch_fx, onnxrt_integer and onnxrt_qlinear. Set as "ipex" when framework was pytorch_ipex, mxnet is currently unsupported; - inputs="image_tensor", # input: same as in the conf.yaml; - outputs="num_detections,detection_boxes,detection_scores,detection_classes", # output: same as in the conf.yaml; - device="cpu", # device: same as in the conf.yaml; - tuning_criterion=tuning_criterion, - accuracy_criterion=accuracy_criterion, - ## tuning.random_seed and tuning.tensorboard: these parameters do not need to specially be defined; + ## model: this parameter does not need to specially be defined; + backend="default", # framework: set as "default" when framework was tensorflow, pytorch, pytorch_fx, onnxrt_integer and onnxrt_qlinear. Set as "ipex" when framework was pytorch_ipex, mxnet is currently unsupported; + inputs="image_tensor", # input: same as in the conf.yaml; + outputs="num_detections,detection_boxes,detection_scores,detection_classes", # output: same as in the conf.yaml; + device="cpu", # device: same as in the conf.yaml; + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion, + ## tuning.random_seed and tuning.tensorboard: these parameters do not need to specially be defined; ) -accuracy_criterion=AccuracyCriterion( - tolerable_loss=0.01, # relative: same as in the conf.yaml; +accuracy_criterion = AccuracyCriterion( + tolerable_loss=0.01, # relative: same as in the conf.yaml; +) +tuning_criterion = TuningCriterion( + timeout=0, # timeout: same as in the conf.yaml; + max_trials=100, # max_trials: same as in the conf.yaml; ) -tuning_criterion=TuningCriterion( - timeout=0, # timeout: same as in the conf.yaml; - max_trials=100, # max_trials: same as in the conf.yaml; -) ``` The update demo is shown as follows, @@ -575,7 +604,7 @@ from neural_compressor.config import MixedPrecisionConfig conf = MixedPrecisionConfig() converted_model = mix_precision.fit(model, config=conf) -converted_model.save('./path/to/save/') +converted_model.save("./path/to/save/") ``` ## Orchestration @@ -589,6 +618,7 @@ Intel Neural Compressor 1.X mainly relies on a `Scheduler` class to automaticall Following is an example how to set the `Scheduler` for Orchestration process. If the user wants to execute the pruning and quantization-aware training with one-shot way, ```python from neural_compressor.experimental import Quantization, Pruning, Scheduler + prune = Pruning(prune_conf.yaml) quantizer = Quantization(quantization_aware_training_conf.yaml) scheduler = Scheduler() @@ -628,7 +658,7 @@ train_loop: compression_manager.on_step_end() compression_manager.on_epoch_end() compression_manager.on_train_end() - + model.save('./path/to/save') ``` @@ -667,9 +697,10 @@ evaluation: # optional. used to config And then, the user can get the accuracy with, ```python -dataset = Dataset() # dataset class that implement __getitem__ method or __iter__ method +dataset = Dataset() # dataset class that implement __getitem__ method or __iter__ method from neural_compressor.experimental import Benchmark, common from neural_compressor.conf.config import BenchmarkConf + conf = BenchmarkConf(config.yaml) evaluator = Benchmark(conf) evaluator.dataloader = common.DataLoader(dataset, batch_size=batch_size) @@ -686,19 +717,19 @@ In Intel Neural Compressor 2.X, we optimize the code to make it simple and clear from neural_compressor.config import BenchmarkConfig BenchmarkConfig( - ## model: this parameter does not need to specially be defined; - backend="default", # framework: set as "default" when framework was tensorflow, pytorch, pytorch_fx, onnxrt_integer and onnxrt_qlinear. Set as "ipex" when framework was pytorch_ipex, mxnet is currently unsupported; - inputs="image_tensor", # input: same as in the conf.yaml; - outputs="num_detections,detection_boxes,detection_scores,detection_classes", # output: same as in the conf.yaml; - device="cpu", # device: same as in the conf.yaml; - warmup=10, # warmup: same as in the conf.yaml; - iteration=100, # iteration: same as in the conf.yaml; - cores_per_instance=4, # cores_per_instance: same as in the conf.yaml; - num_of_instance=7, # num_of_instance: same as in the conf.yaml; - inter_num_of_threads=1, # inter_num_of_threads: same as in the conf.yaml; - intra_num_of_threads=4, # intra_num_of_threads: same as in the conf.yaml; - ## dataloader: this parameter does not need to specially be defined; -) + ## model: this parameter does not need to specially be defined; + backend="default", # framework: set as "default" when framework was tensorflow, pytorch, pytorch_fx, onnxrt_integer and onnxrt_qlinear. Set as "ipex" when framework was pytorch_ipex, mxnet is currently unsupported; + inputs="image_tensor", # input: same as in the conf.yaml; + outputs="num_detections,detection_boxes,detection_scores,detection_classes", # output: same as in the conf.yaml; + device="cpu", # device: same as in the conf.yaml; + warmup=10, # warmup: same as in the conf.yaml; + iteration=100, # iteration: same as in the conf.yaml; + cores_per_instance=4, # cores_per_instance: same as in the conf.yaml; + num_of_instance=7, # num_of_instance: same as in the conf.yaml; + inter_num_of_threads=1, # inter_num_of_threads: same as in the conf.yaml; + intra_num_of_threads=4, # intra_num_of_threads: same as in the conf.yaml; + ## dataloader: this parameter does not need to specially be defined; +) ``` The new example in Intel Neural Compressor 2.X should be updated as, @@ -706,11 +737,12 @@ The new example in Intel Neural Compressor 2.X should be updated as, ```python from neural_compressor.config import BenchmarkConfig from neural_compressor.benchmark import fit + conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) -fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader) +fit(model="./int8.pb", config=conf, b_dataloader=eval_dataloader) ``` ## Examples -User could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/README.md) for more details about the migration from Intel Neural Compressor 1.X to Intel Neural Compressor 2.X. \ No newline at end of file +User could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/README.md) for more details about the migration from Intel Neural Compressor 1.X to Intel Neural Compressor 2.X. diff --git a/docs/source/mixed_precision.md b/docs/source/mixed_precision.md index 185444ca0e5..91bdcf27318 100644 --- a/docs/source/mixed_precision.md +++ b/docs/source/mixed_precision.md @@ -147,9 +147,9 @@ To get a bf16/fp16 model, users can use the Mixed Precision API as follows. from neural_compressor import mix_precision from neural_compressor.config import MixedPrecisionConfig -conf = MixedPrecisionConfig() # default precision is bf16 +conf = MixedPrecisionConfig() # default precision is bf16 converted_model = mix_precision.fit(model, conf=conf) -converted_model.save('./path/to/save/') +converted_model.save("./path/to/save/") ``` - FP16: @@ -159,11 +159,12 @@ from neural_compressor import mix_precision from neural_compressor.config import MixedPrecisionConfig conf = MixedPrecisionConfig( - backend='onnxrt_cuda_ep', - device='gpu', - precisions='fp16') + backend="onnxrt_cuda_ep", + device="gpu", + precisions="fp16", +) converted_model = mix_precision.fit(model, conf=conf) -converted_model.save('./path/to/save/') +converted_model.save("./path/to/save/") ``` ## Examples diff --git a/docs/source/model.md b/docs/source/model.md index 479a33614b0..b9eb3bfd9c3 100644 --- a/docs/source/model.md +++ b/docs/source/model.md @@ -111,6 +111,7 @@ Users can create, use, and save models in the following manners: ```python from neural_compressor.model import Model + inc_model = Model(input_model) ``` @@ -121,6 +122,6 @@ from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig conf = PostTrainingQuantConfig() -q_model = quantization.fit(model = inc_model, conf=conf) +q_model = quantization.fit(model=inc_model, conf=conf) q_model.save("saved_result") ``` diff --git a/docs/source/objective.md b/docs/source/objective.md index 4ede1540955..34fc7c577ac 100644 --- a/docs/source/objective.md +++ b/docs/source/objective.md @@ -53,8 +53,8 @@ Users can specify a built-in objective in `neural_compressor.config.TuningCriter ```python from neural_compressor.config import TuningCriterion -tuning_criterion = TuningCriterion(objective='accuracy') +tuning_criterion = TuningCriterion(objective="accuracy") ``` ### Config Multiple Objectives @@ -63,7 +63,8 @@ Users can specify built-in multiple objectives in `neural_compressor.config.Tuni ```python from neural_compressor.config import TuningCriterion -tuning_criterion = TuningCriterion(objective=['performance', 'accuracy']) + +tuning_criterion = TuningCriterion(objective=["performance", "accuracy"]) ``` ## Example diff --git a/docs/source/orchestration.md b/docs/source/orchestration.md index 3942fee2f37..2128e28660e 100644 --- a/docs/source/orchestration.md +++ b/docs/source/orchestration.md @@ -83,7 +83,7 @@ train_loop: compression_manager.on_step_end() compression_manager.on_epoch_end() compression_manager.on_train_end() - + model.save('./path/to/save') ``` diff --git a/docs/source/pruning.md b/docs/source/pruning.md index aa9cc4cdf2f..7d6c7117f5d 100644 --- a/docs/source/pruning.md +++ b/docs/source/pruning.md @@ -266,35 +266,35 @@ The following section exemplifies how to use hooks in user pass-in training func - Step 1: Define a dict-like configuration in your training codes. Usually only 5-7 configuration items need to be identified. For customized pruning, a configuration template is shown below: ```python - configs = [ - { ## Example of a regular configuration - "op_names": ['layer1.*'], # A list of modules that would be pruned. All linear/conv layers will be hooked when op_names is not explicitly defined. - "start_step": 1, # Step at which to begin pruning, if a gradient-based criterion is used (e.g., snip-momentum), start_step should be equal to or greater than 1. - "end_step": 10000, # Step at which to end pruning, for one-shot pruning start_step = end_step. - "excluded_op_names": ['.*embeddings*'], # A list of modules that would not be pruned. - 'target_sparsity': 0.9, # Target sparsity ratio of modules. - "pruning_frequency": 250, # Frequency of applying pruning, The recommended setting is one fortieth of the pruning steps. - "pattern": "4x1", # Default pruning pattern. - }, # The missing parameter items would be complemented by default settings (i.e. start_step = 1) - - - # It also supports setting multiple pruners, and fine-grained pruning by partition. - { ## pruner2 - 'target_sparsity': 0.9, # Target sparsity ratio of modules. - 'pruning_type': "snip_momentum", # Default pruning type. - 'pattern': "4x1", # Default pruning pattern. - 'op_names': ['layer2.*'], # A list of modules that would be pruned. - 'excluded_op_names': ['layer3.*'], # A list of modules that would not be pruned. - 'start_step': 1, # Step at which to begin pruning. - 'end_step': 10, # Step at which to end pruning. - 'pruning_scope': "global", # Default pruning scope. - 'pruning_frequency': 1, # Frequency of applying pruning. - 'min_sparsity_ratio_per_op': 0.0, # Minimum sparsity ratio of each module. - 'max_sparsity_ratio_per_op': 0.98, # Maximum sparsity ratio of each module. - 'sparsity_decay_type': "exp", # Function applied to control pruning rate. - 'pruning_op_types': ['Conv', 'Linear'], # Types of op that would be pruned. - } - ] + configs = [ + { ## Example of a regular configuration + "op_names": [ + "layer1.*" + ], # A list of modules that would be pruned. All linear/conv layers will be hooked when op_names is not explicitly defined. + "start_step": 1, # Step at which to begin pruning, if a gradient-based criterion is used (e.g., snip-momentum), start_step should be equal to or greater than 1. + "end_step": 10000, # Step at which to end pruning, for one-shot pruning start_step = end_step. + "excluded_op_names": [".*embeddings*"], # A list of modules that would not be pruned. + "target_sparsity": 0.9, # Target sparsity ratio of modules. + "pruning_frequency": 250, # Frequency of applying pruning, The recommended setting is one fortieth of the pruning steps. + "pattern": "4x1", # Default pruning pattern. + }, # The missing parameter items would be complemented by default settings (i.e. start_step = 1) + # It also supports setting multiple pruners, and fine-grained pruning by partition. + { ## pruner2 + "target_sparsity": 0.9, # Target sparsity ratio of modules. + "pruning_type": "snip_momentum", # Default pruning type. + "pattern": "4x1", # Default pruning pattern. + "op_names": ["layer2.*"], # A list of modules that would be pruned. + "excluded_op_names": ["layer3.*"], # A list of modules that would not be pruned. + "start_step": 1, # Step at which to begin pruning. + "end_step": 10, # Step at which to end pruning. + "pruning_scope": "global", # Default pruning scope. + "pruning_frequency": 1, # Frequency of applying pruning. + "min_sparsity_ratio_per_op": 0.0, # Minimum sparsity ratio of each module. + "max_sparsity_ratio_per_op": 0.98, # Maximum sparsity ratio of each module. + "sparsity_decay_type": "exp", # Function applied to control pruning rate. + "pruning_op_types": ["Conv", "Linear"], # Types of op that would be pruned. + }, + ] ``` - Step 2: Enable pruning functionalities @@ -302,49 +302,50 @@ The following section exemplifies how to use hooks in user pass-in training func [**Experimental option** ]Modify model and optimizer. ```python - from neural_compressor import WeightPruningConfig - from neural_compressor.experimental.compression import prepare_pruning - config = WeightPruningConfig(configs) - prepare_pruning(config, model, optimizer) # modify model and optimizer - for epoch in range(num_train_epochs): - model.train() - for step, batch in enumerate(train_dataloader): - outputs = model(**batch) - loss = outputs.loss - loss.backward() - optimizer.step() - lr_scheduler.step() - model.zero_grad() - ``` + from neural_compressor import WeightPruningConfig + from neural_compressor.experimental.compression import prepare_pruning + + config = WeightPruningConfig(configs) + prepare_pruning(config, model, optimizer) # modify model and optimizer + for epoch in range(num_train_epochs): + model.train() + for step, batch in enumerate(train_dataloader): + outputs = model(**batch) + loss = outputs.loss + loss.backward() + optimizer.step() + lr_scheduler.step() + model.zero_grad() + ``` - [**Stable Option** ]Insert Hook functions in your codes. - - ```python - """ All you need is to insert following API functions to your codes: - on_train_begin() # Setup pruners - on_step_begin() # Prune weights - on_before_optimizer_step() # Do weight regularization - on_after_optimizer_step() # Update weights' criteria, mask weights - on_train_end() # End of pruner, print sparse information - """ - from neural_compressor.training import prepare_compression, WeightPruningConfig - config = WeightPruningConfig(configs) - compression_manager = prepare_compression(model, config) # Define a pruning object. - compression_manager.callbacks.on_train_begin() ## insert hook - for epoch in range(num_train_epochs): - model.train() - for step, batch in enumerate(train_dataloader): - compression_manager.callbacks.on_step_begin(step) - outputs = model(**batch) - loss = outputs.loss - loss.backward() - compression_manager.callbacks.on_before_optimizer_step() - optimizer.step() - compression_manager.callbacks.on_after_optimizer_step() - lr_scheduler.step() - model.zero_grad() - compression_manager.callbacks.on_train_end() + [**Stable Option** ]Insert Hook functions in your codes. + + ```python + """ All you need is to insert following API functions to your codes: + on_train_begin() # Setup pruners + on_step_begin() # Prune weights + on_before_optimizer_step() # Do weight regularization + on_after_optimizer_step() # Update weights' criteria, mask weights + on_train_end() # End of pruner, print sparse information + """ + from neural_compressor.training import prepare_compression, WeightPruningConfig + config = WeightPruningConfig(configs) + compression_manager = prepare_compression(model, config) # Define a pruning object. + compression_manager.callbacks.on_train_begin() ## insert hook + for epoch in range(num_train_epochs): + model.train() + for step, batch in enumerate(train_dataloader): + compression_manager.callbacks.on_step_begin(step) + outputs = model(**batch) + loss = outputs.loss + loss.backward() + compression_manager.callbacks.on_before_optimizer_step() + optimizer.step() + compression_manager.callbacks.on_after_optimizer_step() + lr_scheduler.step() + model.zero_grad() + compression_manager.callbacks.on_train_end() ``` In the case mentioned above, pruning process can be done by pre-defined hooks in Neural Compressor. Users need to place those hooks inside the training function. diff --git a/docs/source/pythonic_style.md b/docs/source/pythonic_style.md index 78f4b4abc00..d036e9775d5 100644 --- a/docs/source/pythonic_style.md +++ b/docs/source/pythonic_style.md @@ -51,19 +51,22 @@ from neural_compressor import config ``` * Next, assign values to the attributes of *config.quantization* to use specific configurations, and pass the config to *Quantization* API. ```python -config.quantization.inputs = ['image'] # list of str -config.quantization.outputs = ['out'] # list of str -config.quantization.backend = 'onnxrt_integerops' # support tensorflow, tensorflow_itex, pytorch, pytorch_ipex, pytorch_fx, onnxrt_qlinearops, onnxrt_integerops, onnxrt_qdq, onnxrt_qoperator, mxnet -config.quantization.approach = 'post_training_dynamic_quant' # support post_training_static_quant, post_training_dynamic_quant, quant_aware_training -config.quantization.device = 'cpu' # support cpu, gpu -config.quantization.op_type_dict = {'Conv': {'weight': {'dtype': ['fp32']}, 'activation': {'dtype': ['fp32']}}} # dict -config.quantization.strategy = 'mse' # support basic, mse, bayesian, random, exhaustive -config.quantization.objective = 'accuracy' # support performance, accuracy, modelsize, footprint -config.quantization.timeout = 100 # int, default is 0 -config.quantization.accuracy_criterion.relative = 0.5 # float, default is 0.01 -config.quantization.reduce_range = False # bool. default value depends on hardware, True if cpu supports VNNI instruction, otherwise is False -config.quantization.use_bf16 = False # bool +config.quantization.inputs = ["image"] # list of str +config.quantization.outputs = ["out"] # list of str +config.quantization.backend = "onnxrt_integerops" # support tensorflow, tensorflow_itex, pytorch, pytorch_ipex, pytorch_fx, onnxrt_qlinearops, onnxrt_integerops, onnxrt_qdq, onnxrt_qoperator, mxnet +config.quantization.approach = "post_training_dynamic_quant" # support post_training_static_quant, post_training_dynamic_quant, quant_aware_training +config.quantization.device = "cpu" # support cpu, gpu +config.quantization.op_type_dict = {"Conv": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}} # dict +config.quantization.strategy = "mse" # support basic, mse, bayesian, random, exhaustive +config.quantization.objective = "accuracy" # support performance, accuracy, modelsize, footprint +config.quantization.timeout = 100 # int, default is 0 +config.quantization.accuracy_criterion.relative = 0.5 # float, default is 0.01 +config.quantization.reduce_range = ( + False # bool. default value depends on hardware, True if cpu supports VNNI instruction, otherwise is False +) +config.quantization.use_bf16 = False # bool from neural_compressor.experimental import Quantization + quantizer = Quantization(config) ``` @@ -72,15 +75,18 @@ To specify distillation configurations, users can assign values to the corresponding attributes. ```python from neural_compressor import config -config.distillation.optimizer = {'SGD': {'learning_rate': 0.0001}} + +config.distillation.optimizer = {"SGD": {"learning_rate": 0.0001}} from neural_compressor.experimental import Distillation + distiller = Distillation(config) ``` #### Pruning To specify pruning configurations, users can assign values to the corresponding attributes. ```python from neural_compressor import config + config.pruning.weight_compression.initial_sparsity = 0.0 config.pruning.weight_compression.target_sparsity = 0.9 config.pruning.weight_compression.max_sparsity_ratio_per_layer = 0.98 @@ -95,6 +101,7 @@ config.pruning.weight_compression.prune_domain = "global" config.pruning.weight_compression.pattern = "tile_pattern_1x1" from neural_compressor.experimental import Pruning + prune = Pruning(config) ``` #### NAS @@ -103,8 +110,10 @@ corresponding attributes. ```python from neural_compressor import config -config.nas.approach = 'dynas' + +config.nas.approach = "dynas" from neural_compressor.experimental import NAS + nas = NAS(config) ``` @@ -114,6 +123,7 @@ To specify benchmark configurations, users can assign values to the corresponding attributes. ```python from neural_compressor import config + config.benchmark.warmup = 10 config.benchmark.iteration = 10 config.benchmark.cores_per_instance = 10 @@ -122,6 +132,7 @@ config.benchmark.inter_num_of_threads = 10 config.benchmark.intra_num_of_threads = 10 from neural_compressor.experimental import Benchmark + benchmark = Benchmark(config) ``` ### Pythonic API for Framework Configurations @@ -130,7 +141,6 @@ capabilities as in YAML files. Users can specify a framework's (eg. ONNX Runtime assigning values to corresponding attributes. ```python -config.onnxruntime.precisions = ['int8', 'uint8'] -config.onnxruntime.graph_optimization_level = 'DISABLE_ALL' # only onnxruntime has graph_optimization_level attribute +config.onnxruntime.precisions = ["int8", "uint8"] +config.onnxruntime.graph_optimization_level = "DISABLE_ALL" # only onnxruntime has graph_optimization_level attribute ``` - diff --git a/docs/source/quantization.md b/docs/source/quantization.md index a9c770e729b..ef7ccfe8662 100644 --- a/docs/source/quantization.md +++ b/docs/source/quantization.md @@ -223,20 +223,26 @@ This means user could leverage Intel(R) Neural Compressor to directly generate a model = ResNet50() val_dataset = ... val_dataloader = torch.utils.data.Dataloader( - val_dataset, - batch_size=args.batch_size, shuffle=False, - num_workers=args.workers, ping_memory=True) + val_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + ping_memory=True, +) # Quantization code from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig -conf = PostTrainingQuantConfig() # default approach is "auto", you can set "dynamic":PostTrainingQuantConfig(approach="dynamic") -q_model = quantization.fit(model=model, - conf=conf, - calib_dataloader=val_dataloader) -q_model.save('./output') - +conf = ( + PostTrainingQuantConfig() +) # default approach is "auto", you can set "dynamic":PostTrainingQuantConfig(approach="dynamic") +q_model = quantization.fit( + model=model, + conf=conf, + calib_dataloader=val_dataloader, +) +q_model.save("./output") ``` 2. With Accuracy Aware Tuning @@ -246,40 +252,50 @@ This means user could leverage the advance feature of Intel(R) Neural Compressor ``` python # main.py + # Original code def validate(val_loader, model, criterion, args): ... return top1.avg + model = ResNet50() val_dataset = ... val_dataloader = torch.utils.data.Dataloader( - val_dataset, - batch_size=args.batch_size, shuffle=False, - num_workers=args.workers, ping_memory=True) + val_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + ping_memory=True, +) # Quantization code from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig conf = PostTrainingQuantConfig() -q_model = quantization.fit(model=model, - conf=conf, - calib_dataloader=val_dataloader, - eval_func=validate) -q_model.save('./output') +q_model = quantization.fit( + model=model, + conf=conf, + calib_dataloader=val_dataloader, + eval_func=validate, +) +q_model.save("./output") ``` or ```python from neural_compressor.metric import METRICS -metrics = METRICS('pytorch') -top1 = metrics['topk']() -q_model = quantization.fit(model=model, - conf=conf, - calib_dataloader=val_dataloader, - eval_dataloader=val_dataloader, - eval_metric=top1) + +metrics = METRICS("pytorch") +top1 = metrics["topk"]() +q_model = quantization.fit( + model=model, + conf=conf, + calib_dataloader=val_dataloader, + eval_dataloader=val_dataloader, + eval_metric=top1, +) ``` ### Quantization Aware Training @@ -293,25 +309,30 @@ This method only requires the user to call the callback function during the trai model = ResNet50() train_dataset = ... train_dataloader = torch.utils.data.Dataloader( - train_dataset, - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, ping_memory=True) + train_dataset, + batch_size=args.batch_size, + shuffle=True, + num_workers=args.workers, + ping_memory=True, +) criterion = ... + # Quantization code def train_func(model): ... + from neural_compressor import QuantizationAwareTrainingConfig from neural_compressor.training import prepare_compression + conf = QuantizationAwareTrainingConfig() compression_manager = prepare_compression(model, conf) compression_manager.callbacks.on_train_begin() model = compression_manager.model train_func(model) compression_manager.callbacks.on_train_end() -compression_manager.save('./output') - +compression_manager.save("./output") ``` 2. With Accuracy Aware Tuning @@ -324,26 +345,33 @@ This method requires the user to provide training function and evaluation functi model = ResNet50() val_dataset = ... val_dataloader = torch.utils.data.Dataloader( - val_dataset, - batch_size=args.batch_size, shuffle=False, - num_workers=args.workers, ping_memory=True) + val_dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + ping_memory=True, +) criterion = ... + def validate(val_loader, model, criterion, args): ... return top1.avg + # Quantization code def train_func(model): ... return model # user should return a best performance model here + from neural_compressor import QuantizationAwareTrainingConfig from neural_compressor.training import prepare_compression, fit + conf = QuantizationAwareTrainingConfig() compression_manager = prepare_compression(model, conf) q_model = fit(compression_manager=compression_manager, train_func=train_func, eval_func=validate) -compression_manager.save('./output') +compression_manager.save("./output") ``` ### Specify Quantization Rules @@ -354,42 +382,32 @@ Intel(R) Neural Compressor support specify quantization rules by operator name o op_name_dict = { "layer1.0.conv1": { "activation": { - "dtype": ["fp32"] + "dtype": ["fp32"], }, "weight": { - "dtype": ["fp32"] - } + "dtype": ["fp32"], + }, }, "layer2.0.conv1": { "activation": { "dtype": ["uint8"], "algorithm": ["minmax"], "granularity": ["per_tensor"], - "scheme": ["sym"] + "scheme": ["sym"], }, "weight": { "dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], - "scheme": ["sym"] - } + "scheme": ["sym"], + }, }, } conf = PostTrainingQuantConfig(op_name_dict=op_name_dict) - ``` 2. Example of `op_type_dict` ```python -op_type_dict = { - 'Conv': { - 'weight': { - 'dtype': ['fp32'] - }, - 'activation': { - 'dtype': ['fp32'] - } - } -} +op_type_dict = {"Conv": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}} conf = PostTrainingQuantConfig(op_type_dict=op_type_dict) ``` @@ -416,12 +434,11 @@ Example of recipe: recipes = { "smooth_quant": True, "smooth_quant_args": { - "alpha": 0.5 # default value is 0.5 - }, + "alpha": 0.5, + }, # default value is 0.5 "fast_bias_correction": False, } conf = PostTrainingQuantConfig(recipes=recipes) - ``` ### Specify Quantization Backend and Device diff --git a/docs/source/quantization_mixed_precision.md b/docs/source/quantization_mixed_precision.md index 1bd66a8cbee..d3372721b52 100644 --- a/docs/source/quantization_mixed_precision.md +++ b/docs/source/quantization_mixed_precision.md @@ -6,11 +6,13 @@ BF16 conversion during quantization is default ON. To force disable it, users ne from neural_compressor.config import PostPostTrainingQuantConfig from neural_compressor import quantization -conf = PostTrainingQuantConfig(excluded_precisions=['bf16']) -q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader, - calib_func=eval_func) +conf = PostTrainingQuantConfig(excluded_precisions=["bf16"]) +q_model = quantization.fit( + model_origin, + conf, + calib_dataloader=dataloader, + calib_func=eval_func, +) ``` ### Tensorflow diff --git a/docs/source/quantization_weight_only.md b/docs/source/quantization_weight_only.md index 1c58769de0c..23590da044d 100644 --- a/docs/source/quantization_weight_only.md +++ b/docs/source/quantization_weight_only.md @@ -85,14 +85,14 @@ To support low memory inference, Neural Compressor implemented WeightOnlyLinear, ### **User code**: ```python conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 8, # 1-8 bit - 'group_size': -1, # -1 (per-channel) - 'scheme': 'sym', - 'algorithm': 'RTN', + "bits": 8, # 1-8 bit + "group_size": -1, # -1 (per-channel) + "scheme": "sym", + "algorithm": "RTN", }, }, }, @@ -102,13 +102,13 @@ conf = PostTrainingQuantConfig( }, ) q_model = quantization.fit(model, conf, eval_func=eval_func) -q_model.save('saved_results') +q_model.save("saved_results") compressed_model = q_model.export_compressed_model( compression_dtype=torch.int32, compression_dim=1, scale_dtype=torch.float16, ) -torch.save(compressed_model.state_dict(), 'compressed_model.pt') +torch.save(compressed_model.state_dict(), "compressed_model.pt") ``` The saved_results folder contains two files: `best_model.pt` and `qconfig.json`, and the generated q_model is a fake quantized model. diff --git a/docs/source/sigopt_strategy.md b/docs/source/sigopt_strategy.md index 699f6ace58d..6e142465ee9 100644 --- a/docs/source/sigopt_strategy.md +++ b/docs/source/sigopt_strategy.md @@ -81,4 +81,3 @@ conf = PostTrainingQuantConfig( |--------|-------------|-------------|----------------| | basic | 0.8299 | 0.8294 | 85.0837 | | sigopt | 0.8299 | 0.8291 | 83.4469 | - diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md index 9905e04c1c8..be14f002f72 100644 --- a/docs/source/smooth_quant.md +++ b/docs/source/smooth_quant.md @@ -38,10 +38,13 @@ Suppose the weight tensor is: ```python import torch + W = torch.Tensor( - [[0.6839, 0.4741, 0.7451], - [0.9301, 0.1742, 0.6835]] - ) + [ + [0.6839, 0.4741, 0.7451], + [0.9301, 0.1742, 0.6835], + ] +) ``` According to the formula (1), we need scale $S$ and zero point $Z$ to calculate the integer matrix. @@ -58,13 +61,13 @@ The per-tensor quantization function is: ```python def quantize(x, num_bits=8): - q_min, q_max = 0, 2. ** num_bits - 1. - scale = (torch.max(x) - torch.min(x)) / (2 ** num_bits - 1) + q_min, q_max = 0, 2.0**num_bits - 1.0 + scale = (torch.max(x) - torch.min(x)) / (2**num_bits - 1) scale = torch.clip(scale, min=1e-5) zp = torch.round(0 - (torch.min(x)) / scale) q_x = x / scale + zp q_x.clamp_(q_min, q_max).round_() - print(f'scale = {scale}, zp = {zp}') + print(f"scale = {scale}, zp = {zp}") return q_x, scale, zp ``` @@ -111,15 +114,16 @@ Similarly, the example of per-channel quantization is as follows: ```python def quantize_per_channel(x, num_bits=8): - q_min, q_max = 0, 2. ** num_bits - 1. + q_min, q_max = 0, 2.0**num_bits - 1.0 x_tmp = x.detach().reshape(x.shape[0], -1) - scales = x_tmp.max(dim=-1, keepdim=True)[0] / (2 ** num_bits - 1) - zp = torch.round(0 - x_tmp.min(dim=-1, keepdim=True)[0].divide(scales)) + scales = x_tmp.max(dim=-1, keepdim=True)[0] / (2**num_bits - 1) + zp = torch.round(0 - x_tmp.min(dim=-1, keepdim=True)[0].divide(scales)) q_x = x_tmp.divide(scales) + zp q_x.clamp_(q_min, q_max).round_() - print(f'scale = {scales}, \n zp = {zp}') + print(f"scale = {scales}, \n zp = {zp}") return q_x, scale, zp + def dequantize_per_channel(q_x, scales, zp): print(q_x, scales, zp) print(scales * (q_x - zp)) @@ -160,12 +164,13 @@ Using per-tensor scale quantization to show the process. ```python def quantize_per_tensor_absmax(x, n_bits=8): scales = x.abs().max() - q_max = 2**(n_bits-1)-1 + q_max = 2 ** (n_bits - 1) - 1 scales.clamp_(min=1e-5).div_(q_max) q_x = x / scales q_x = q_x.clamp_(-q_max, q_max).round_() return q_x, scales + def dequantize(q_x, scale): return scale * q_x ``` @@ -295,8 +300,9 @@ In our experiments, an $\alpha$ range of [0.3, 0.7] with a step_size of 0.05 is ```python from neural_compressor.adaptor.torch_utils.smooth_quant import TorchSmoothQuant + sq = TorchSmoothQuant(model, dataloader) -sq.transform(alpha) ##alpha could be a float or a string 'auto' +sq.transform(alpha) ##alpha could be a float or a string 'auto' ``` please note that we rely on torch jit to analyze the model. If you are using huggingface model, you could set torchscript to True when loading the model or set the return_dict to False" diff --git a/docs/source/tensorboard.md b/docs/source/tensorboard.md index 4486b8a4011..670f7930417 100644 --- a/docs/source/tensorboard.md +++ b/docs/source/tensorboard.md @@ -91,7 +91,7 @@ def _post_eval_hook(self, model, **args): ...... op_name = key.strip(".activation_post_process") summary[op_name + ".output"] = observer_dict[key].get_tensor_value() - + for iter in summary[op_name + ".output"]: #Record output tensor, for fused op only record the parent op output ...... @@ -113,7 +113,7 @@ def _post_eval_hook(self, model, **args): torch.dequantize(state_dict[key])) else: writer.add_histogram(op + "/fp32", state_dict[key]) - + ``` diff --git a/docs/source/transform.md b/docs/source/transform.md index 180238a4b87..ce1fa55bb43 100644 --- a/docs/source/transform.md +++ b/docs/source/transform.md @@ -115,4 +115,3 @@ Neural Compressor supports built-in preprocessing methods on different framework | ResizeCropImagenet(height, width, random_crop, resize_side, random_flip_left_right, mean_value, scale) | **height** (int): Height of the result
    **width** (int): Width of the result
    **random_crop** (bool, default=False): whether to random crop
    **resize_side** (int, default=256): desired shape after resize operation
    **random_flip_left_right** (bool, default=False): whether to random flip left and right
    **mean_value** (list, default=[0.0,0.0,0.0]): mean for each channel
    **scale** (float, default=1.0): std value | Combination of a series of transforms which is applicable to images in Imagenet | ResizeCropImagenet:
       height: 224
       width: 224
       random_crop: False
       resize_side: 256
       random_flip_left_right: False
       mean_value: [123.68, 116.78, 103.94]
       scale: 0.017 | | Cast(dtype) | **dtype** (str, default ='float32'): The target data type | Convert image to given dtype | Cast:
       dtype: float32 | | ResizeWithRatio(min_dim, max_dim, padding) | **min_dim** (int, default=800): Resizes the image such that its smaller dimension == min_dim
    **max_dim** (int, default=1365): Ensures that the image longest side does not exceed this value
    **padding** (bool, default=False): If true, pads image with zeros so its size is max_dim x max_dim | Resize image with aspect ratio and pad it to max shape(optional). If the image is padded, the label will be processed at the same time. The input image should be np.array. | ResizeWithRatio:
       min_dim: 800
       max_dim: 1365
       padding: True | - diff --git a/docs/source/tuning_strategies.md b/docs/source/tuning_strategies.md index 860749fc1d5..31062a3e319 100644 --- a/docs/source/tuning_strategies.md +++ b/docs/source/tuning_strategies.md @@ -67,11 +67,11 @@ User can control the tuning process by setting the exit policy by specifying the ```python from neural_compressor.config import TuningCriterion -tuning_criterion=TuningCriterion( - timeout=0, # optional. tuning timeout (seconds). When set to 0, early stopping is enabled. - max_trials=100, # optional. max tuning times. combined with the `timeout` field to decide when to exit tuning. - strategy="basic", # optional. name of the tuning strategy. - strategy_kwargs=None, # optional. see concrete tuning strategy for available settings. +tuning_criterion = TuningCriterion( + timeout=0, # optional. tuning timeout (seconds). When set to 0, early stopping is enabled. + max_trials=100, # optional. max tuning times. combined with the `timeout` field to decide when to exit tuning. + strategy="basic", # optional. name of the tuning strategy. + strategy_kwargs=None, # optional. see concrete tuning strategy for available settings. ) ``` @@ -83,8 +83,8 @@ User can set the accuracy criteria by specifying the `higher_is_better`, `criter from neural_compressor.config import AccuracyCriterion accuracy_criterion = AccuracyCriterion( - higher_is_better=True, # optional. - criterion='relative', # optional. Available values are 'relative' and 'absolute'. + higher_is_better=True, # optional. + criterion="relative", # optional. Available values are 'relative' and 'absolute'. tolerable_loss=0.01, # optional. ) ``` @@ -228,11 +228,8 @@ from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion conf = PostTrainingQuantConfig( quant_level=1, - tuning_criterion=TuningCriterion( - strategy="basic" # optional. name of tuning strategy. - ), + tuning_criterion=TuningCriterion(strategy="basic"), # optional. name of tuning strategy. ) - ``` ### MSE @@ -255,11 +252,8 @@ from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion conf = PostTrainingQuantConfig( quant_level=1, - tuning_criterion=TuningCriterion( - strategy="mse" - ), + tuning_criterion=TuningCriterion(strategy="mse"), ) - ``` ### MSE_V2 @@ -278,7 +272,7 @@ conf = PostTrainingQuantConfig( quant_level=1, tuning_criterion=TuningCriterion( strategy="mse_v2", - strategy_kwargs={"confidence_batches": 2} # optional. the number of batches to score the op impact. + strategy_kwargs={"confidence_batches": 2}, # optional. the number of batches to score the op impact. ), ) ``` @@ -303,7 +297,7 @@ conf = PostTrainingQuantConfig( quant_level=1, tuning_criterion=TuningCriterion( strategy="hawq_v2", - strategy_kwargs={"hawq_v2_loss": model_loss} # required. the loss function for calculating the hessian trace. + strategy_kwargs={"hawq_v2_loss": model_loss}, # required. the loss function for calculating the hessian trace. ), ) ``` @@ -337,10 +331,9 @@ conf = PostTrainingQuantConfig( tuning_criterion=TuningCriterion( timeout=0, # optional. tuning timeout (seconds). When set to 0, early stopping is enabled. max_trials=100, # optional. max tuning times. combined with the `timeout` field to decide when to exit tuning. - strategy="bayesian" + strategy="bayesian", ), ) - ``` ### Exhaustive @@ -471,9 +464,7 @@ from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion conf = PostTrainingQuantConfig( quant_level=1, - tuning_criterion=TuningCriterion( - strategy="tpe" - ) + tuning_criterion=TuningCriterion(strategy="tpe"), ) ``` @@ -514,17 +505,15 @@ For example, user can implement an `Abc` strategy like below: ```python @strategy_registry class AbcTuneStrategy(TuneStrategy): - def __init__(self, model, conf, q_dataloader, q_func=None, - eval_dataloader=None, eval_func=None, dicts=None): + def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, eval_func=None, dicts=None): ... def next_tune_cfg(self): # generate the next tuning config ... - + def traverse(self): for tune_cfg in self.next_tune_cfg(): # do quantization ... - ``` diff --git a/docs/source/user_yaml.md b/docs/source/user_yaml.md index f13a99ccaf6..14d4157ab78 100644 --- a/docs/source/user_yaml.md +++ b/docs/source/user_yaml.md @@ -164,4 +164,3 @@ tuning: timeout: 0 # tuning timeout (seconds), 0 means early stop random_seed: 9527 # random seed ``` - diff --git a/docs/source/validated_model_list.md b/docs/source/validated_model_list.md index 9895339671f..15c852d2180 100644 --- a/docs/source/validated_model_list.md +++ b/docs/source/validated_model_list.md @@ -3003,4 +3003,3 @@ For more complete information about performance and benchmark results, visit www - diff --git a/neural_coder/__init__.py b/neural_coder/__init__.py index 0119b529198..7bf18cd4956 100644 --- a/neural_coder/__init__.py +++ b/neural_coder/__init__.py @@ -15,5 +15,6 @@ from .interface import enable from .interface import bench from .interface import superbench + # from .interface import superreport from .interface import auto_quant diff --git a/neural_coder/backends/.yaml b/neural_coder/backends/.yaml index 6fd135a50aa..1e3b1fa1501 100644 --- a/neural_coder/backends/.yaml +++ b/neural_coder/backends/.yaml @@ -48,4 +48,4 @@ transformation: - above: - - - \ No newline at end of file + - diff --git a/neural_coder/backends/nano_bf16.yaml b/neural_coder/backends/nano_bf16.yaml index 2fe2bf7a4b0..afecf6908f1 100644 --- a/neural_coder/backends/nano_bf16.yaml +++ b/neural_coder/backends/nano_bf16.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_bf16_channels_last.yaml b/neural_coder/backends/nano_bf16_channels_last.yaml index 47127983f3b..d0f3987ef29 100644 --- a/neural_coder/backends/nano_bf16_channels_last.yaml +++ b/neural_coder/backends/nano_bf16_channels_last.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_bf16_ipex.yaml b/neural_coder/backends/nano_bf16_ipex.yaml index a79635d55c8..6e8db6f8719 100644 --- a/neural_coder/backends/nano_bf16_ipex.yaml +++ b/neural_coder/backends/nano_bf16_ipex.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_bf16_ipex_channels_last.yaml b/neural_coder/backends/nano_bf16_ipex_channels_last.yaml index c8ecf0917d0..3b99ef12f35 100644 --- a/neural_coder/backends/nano_bf16_ipex_channels_last.yaml +++ b/neural_coder/backends/nano_bf16_ipex_channels_last.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_fp32_channels_last.yaml b/neural_coder/backends/nano_fp32_channels_last.yaml index f6027539929..9516ba4d50d 100644 --- a/neural_coder/backends/nano_fp32_channels_last.yaml +++ b/neural_coder/backends/nano_fp32_channels_last.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_fp32_ipex.yaml b/neural_coder/backends/nano_fp32_ipex.yaml index a8c69963c56..c6fc4329b38 100644 --- a/neural_coder/backends/nano_fp32_ipex.yaml +++ b/neural_coder/backends/nano_fp32_ipex.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_fp32_ipex_channels_last.yaml b/neural_coder/backends/nano_fp32_ipex_channels_last.yaml index 53094440d9e..aea74db737a 100644 --- a/neural_coder/backends/nano_fp32_ipex_channels_last.yaml +++ b/neural_coder/backends/nano_fp32_ipex_channels_last.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_gpu_to_cpu.yaml b/neural_coder/backends/nano_gpu_to_cpu.yaml index ae9ebd842ec..426f72ce980 100644 --- a/neural_coder/backends/nano_gpu_to_cpu.yaml +++ b/neural_coder/backends/nano_gpu_to_cpu.yaml @@ -21,4 +21,4 @@ transformation: [+] patch_torch() order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_int8.yaml b/neural_coder/backends/nano_int8.yaml index c15cbbe51a4..cb846256435 100644 --- a/neural_coder/backends/nano_int8.yaml +++ b/neural_coder/backends/nano_int8.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_jit_bf16.yaml b/neural_coder/backends/nano_jit_bf16.yaml index 275eb9d0225..122d93ca717 100644 --- a/neural_coder/backends/nano_jit_bf16.yaml +++ b/neural_coder/backends/nano_jit_bf16.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_jit_bf16_channels_last.yaml b/neural_coder/backends/nano_jit_bf16_channels_last.yaml index ffa22db618c..cf7e1437c4a 100644 --- a/neural_coder/backends/nano_jit_bf16_channels_last.yaml +++ b/neural_coder/backends/nano_jit_bf16_channels_last.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_jit_bf16_ipex.yaml b/neural_coder/backends/nano_jit_bf16_ipex.yaml index 21397985fc5..1a237c6edf8 100644 --- a/neural_coder/backends/nano_jit_bf16_ipex.yaml +++ b/neural_coder/backends/nano_jit_bf16_ipex.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml b/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml index b41fbf71578..603db6942f8 100644 --- a/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml +++ b/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_jit_fp32_channels_last.yaml b/neural_coder/backends/nano_jit_fp32_channels_last.yaml index bdc9a3154e4..c30a1767175 100644 --- a/neural_coder/backends/nano_jit_fp32_channels_last.yaml +++ b/neural_coder/backends/nano_jit_fp32_channels_last.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml b/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml index d434e58c886..54514000ac5 100644 --- a/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml +++ b/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_onnxruntime_fp32.yaml b/neural_coder/backends/nano_onnxruntime_fp32.yaml index 820ad2441c2..c29e3410bc4 100644 --- a/neural_coder/backends/nano_onnxruntime_fp32.yaml +++ b/neural_coder/backends/nano_onnxruntime_fp32.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml b/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml index 7fd4e09989b..bb5b35557da 100644 --- a/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml +++ b/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_openvino_fp32.yaml b/neural_coder/backends/nano_openvino_fp32.yaml index d6b88ecf712..6b9324cec39 100644 --- a/neural_coder/backends/nano_openvino_fp32.yaml +++ b/neural_coder/backends/nano_openvino_fp32.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/nano_openvino_int8.yaml b/neural_coder/backends/nano_openvino_int8.yaml index b5c8ae7d045..9f1b70814e7 100644 --- a/neural_coder/backends/nano_openvino_int8.yaml +++ b/neural_coder/backends/nano_openvino_int8.yaml @@ -24,4 +24,4 @@ transformation: - 1 order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/pytorch_aliblade.yaml b/neural_coder/backends/pytorch_aliblade.yaml index 28d6c3df150..ba29ac86548 100644 --- a/neural_coder/backends/pytorch_aliblade.yaml +++ b/neural_coder/backends/pytorch_aliblade.yaml @@ -22,4 +22,4 @@ transformation: [+] MODEL_NAME = torch_blade.optimize(MODEL_NAME, allow_tracing=True, model_inputs=tuple(INPUT_NAME)) order: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/pytorch_inc_static_quant_fx.yaml b/neural_coder/backends/pytorch_inc_static_quant_fx.yaml index 6fd7a5a3b0a..f77801ebc51 100644 --- a/neural_coder/backends/pytorch_inc_static_quant_fx.yaml +++ b/neural_coder/backends/pytorch_inc_static_quant_fx.yaml @@ -35,7 +35,7 @@ transformation: [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME) [+] except: [+] pass - + order: - below: above: diff --git a/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml b/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml index 783170f8026..419c5256588 100644 --- a/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml +++ b/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml @@ -35,7 +35,7 @@ transformation: [+] MODEL_NAME = torch.jit.freeze(MODEL_NAME) [+] except: [+] pass - + order: - below: above: diff --git a/neural_coder/backends/pytorch_jit_script.yaml b/neural_coder/backends/pytorch_jit_script.yaml index 31b39647838..014cc5177f1 100644 --- a/neural_coder/backends/pytorch_jit_script.yaml +++ b/neural_coder/backends/pytorch_jit_script.yaml @@ -33,4 +33,4 @@ transformation: - pytorch_ipex_int8_static_quant - pytorch_ipex_int8_dynamic_quant - pytorch_channels_last - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/pytorch_jit_script_ofi.yaml b/neural_coder/backends/pytorch_jit_script_ofi.yaml index 53c00d6445f..2e9c5868d86 100644 --- a/neural_coder/backends/pytorch_jit_script_ofi.yaml +++ b/neural_coder/backends/pytorch_jit_script_ofi.yaml @@ -32,4 +32,4 @@ transformation: - pytorch_ipex_int8_static_quant - pytorch_ipex_int8_dynamic_quant - pytorch_channels_last - above: \ No newline at end of file + above: diff --git a/neural_coder/backends/pytorch_mixed_precision_cpu.yaml b/neural_coder/backends/pytorch_mixed_precision_cpu.yaml index 2239f8e304c..ad4337f5e57 100644 --- a/neural_coder/backends/pytorch_mixed_precision_cpu.yaml +++ b/neural_coder/backends/pytorch_mixed_precision_cpu.yaml @@ -25,4 +25,4 @@ transformation: - below: above: - below: - above: \ No newline at end of file + above: diff --git a/neural_coder/coders/autoinc/autoinc_harness.py b/neural_coder/coders/autoinc/autoinc_harness.py index 307151dc710..df68b91706b 100644 --- a/neural_coder/coders/autoinc/autoinc_harness.py +++ b/neural_coder/coders/autoinc/autoinc_harness.py @@ -16,20 +16,21 @@ import os import re import sys + import yaml from ... import globals from ...utils.line_operation import ( get_line_indent_level, - is_eval_func_model_name, get_line_left_hand_side, get_line_wo_comment, - single_line_comment_or_empty_line_detection + is_eval_func_model_name, + single_line_comment_or_empty_line_detection, ) -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) @@ -39,36 +40,33 @@ def __init__(self, backend): def print_info(self): for i in globals.list_model_def_instance: - logger.debug(f"i.print_info(): {i.print_info()}") + logger.debug(f"i.print_info(): {i.print_info()}") # collect file transformation info and register in globals # (i.e. which file to add which lines at which location) def register_transformation(self): - backend_file = open(os.path.dirname(__file__) + - "/../../backends/" + self.backend + ".yaml") + backend_file = open(os.path.dirname(__file__) + "/../../backends/" + self.backend + ".yaml") backend_dict = yaml.load(backend_file, Loader=yaml.BaseLoader) logger.debug(f"backend_dict: {backend_dict}") bk_trans_location = backend_dict["transformation"]["location"] # string bk_trans_content = backend_dict["transformation"]["content"] # string bk_trans_order = backend_dict["transformation"]["order"] # list - + # modular design if globals.use_modular: content = globals.modular_item bk_trans_content = ["[+] " + content.replace("\n", "\n[+] ")[:-5]] - + list_code = [] history = set() for i in globals.list_code_path: - list_code.append(open(i, 'r').read()) + list_code.append(open(i, "r").read()) for loc in bk_trans_location: - # PART 1 - "model_definition_line" if "insert_below_model_definition_line" in loc: - for ins in globals.list_model_def_instance: model_name = ins.model_name - if model_name in history and globals.code_domain == 'torchvision': + if model_name in history and globals.code_domain == "torchvision": continue else: history.add(model_name) @@ -76,7 +74,7 @@ def register_transformation(self): model_def_line_idx = ins.model_def_line_idx file_path_idx = globals.list_code_path.index(file_path) - lines = list_code[file_path_idx].split('\n') + lines = list_code[file_path_idx].split("\n") line_idx = 0 # to check if this model has an inference line is in the file @@ -84,10 +82,11 @@ def register_transformation(self): to_transform = False for i in range(len(lines)): line = lines[i] - if model_name + "(" in line or \ - (model_name + "." in line and line.find(model_name) < line.find(".") and "(" in line): + if model_name + "(" in line or ( + model_name + "." in line and line.find(model_name) < line.find(".") and "(" in line + ): to_transform = True - if not to_transform and globals.code_domain == 'onnx': + if not to_transform and globals.code_domain == "onnx": pass elif not to_transform: continue @@ -98,9 +97,9 @@ def register_transformation(self): for i in range(len(lines)): line = lines[i] if not single_line_comment_or_empty_line_detection(line): - if ("DataLoader(" in line and "=" in line and line.find("=") < line.find("DataLoader")) \ - or ("dataloader" in line and "=" in line and \ - line.find("=") > line.find("dataloader")): + if ("DataLoader(" in line and "=" in line and line.find("=") < line.find("DataLoader")) or ( + "dataloader" in line and "=" in line and line.find("=") > line.find("dataloader") + ): dataloader_def_line_indent_level = get_line_indent_level(line) dataloader_name = get_line_left_hand_side(line) dataloader_def_line_idx = i @@ -114,7 +113,7 @@ def register_transformation(self): if not single_line_comment_or_empty_line_detection(line): if is_eval_func and "[coder-enabled]" not in line: inference_line = line - input_name = line[line.find("(")+1:line.find(")")].replace("*","") + input_name = line[line.find("(") + 1 : line.find(")")].replace("*", "") # get "c" in "a = b(**c)" # search input definition in this file (if any) @@ -125,7 +124,7 @@ def register_transformation(self): if input_name in line and "=" in line and line.find("=") > line.find(input_name): input_def_line_indent_level = get_line_indent_level(line) input_def_line_idx = i - + # serach model definition line and its end line index # (only has 1 model definition line, because it's in loop of globals.list_model_def_instance) for i in range(len(lines)): @@ -148,28 +147,32 @@ def register_transformation(self): ### check bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)] - if file_path_idx == 0 and (globals.code_domain in ['transformers_trainer', 'torchvision', 'onnx']): + if file_path_idx == 0 and (globals.code_domain in ["transformers_trainer", "torchvision", "onnx"]): pass - elif ("INPUT_NAME" in bk_trans_content_this and input_name == "") \ - or ("DATALOADER_NAME" in bk_trans_content_this and dataloader_name == "") \ - or ("INFERENCE_LINE" in bk_trans_content_this and inference_line == ""): - logger.info(f"Skipped due to not having enough information required by " - "the transformation content specified in the config file " - "(e.g. INPUT_NAME, DATALOADER_NAME, INFERENCE_LINE). " - f"File path: {file_path}") + elif ( + ("INPUT_NAME" in bk_trans_content_this and input_name == "") + or ("DATALOADER_NAME" in bk_trans_content_this and dataloader_name == "") + or ("INFERENCE_LINE" in bk_trans_content_this and inference_line == "") + ): + logger.info( + f"Skipped due to not having enough information required by " + "the transformation content specified in the config file " + "(e.g. INPUT_NAME, DATALOADER_NAME, INFERENCE_LINE). " + f"File path: {file_path}" + ) continue ### location # search for features to put below them - ''' + """ Example (psuedo-code): model = Net() # jit script begin mark model = torch.jit.script(model) # jit script end mark (feature name + model name to handle multi-model situation) model = ipex.optimize(model, "fp32") # "ipex fp32" must be put below "jit script" - ''' + """ put_below_idx = 0 for i in range(len(lines)): for item in bk_trans_order[0]["below"]: @@ -187,7 +190,7 @@ def register_transformation(self): # location assignment (below model def / dataloader def / input def) torchvision_indent = -1 - if file_path_idx == 0 and globals.code_domain == 'transformers_trainer': + if file_path_idx == 0 and globals.code_domain == "transformers_trainer": for i in range(len(lines)): line = lines[i] if re.findall("trainer = .*Trainer", line): @@ -198,19 +201,26 @@ def register_transformation(self): i_search = 1 while do_search: following_line = lines[i + i_search] - if ")" in following_line and \ - following_line.count(")") > following_line.count("("): + if ")" in following_line and following_line.count(")") > following_line.count( + "(" + ): do_search = False i_search += 1 trans_insert_location = i + i_search trans_insert_location = min(max(trans_insert_location, put_below_idx), put_above_idx) - elif file_path_idx == 0 and globals.code_domain == 'torchvision': + elif file_path_idx == 0 and globals.code_domain == "torchvision": trans_insert_location = 1 for i in range(len(lines)): line = lines[i] - if "val_loader" in line and "aux_val_loader" not in line \ - and ("torch.utils.data.DataLoader" in line \ - or "utils.data.DataLoader" in line or "DataLoader" in line): + if ( + "val_loader" in line + and "aux_val_loader" not in line + and ( + "torch.utils.data.DataLoader" in line + or "utils.data.DataLoader" in line + or "DataLoader" in line + ) + ): torchvision_indent = get_line_indent_level(line) if "(" in line and line.count(")") == line.count("("): trans_insert_location = i + 1 @@ -219,84 +229,107 @@ def register_transformation(self): i_search = 1 while do_search: following_line = lines[i + i_search] - if ")" in following_line and \ - following_line.count(")") > following_line.count("("): + if ")" in following_line and following_line.count(")") > following_line.count( + "(" + ): do_search = False i_search += 1 trans_insert_location = i + i_search trans_insert_location = min(max(trans_insert_location, put_below_idx), put_above_idx) else: if "insert_below_model_definition_line" in loc: - trans_insert_location = \ - min(max(model_definition_end_line_idx, - put_below_idx), put_above_idx) + trans_insert_location = min( + max(model_definition_end_line_idx, put_below_idx), put_above_idx + ) if "insert_below_dataloader_definition_line" in loc: try: dataloader_def_line_idx except: - logger.warning(f"Skipped due to not having dataloader definition required by " - "the transformation content specified in the config file. " - f"File path: {file_path}") + logger.warning( + f"Skipped due to not having dataloader definition required by " + "the transformation content specified in the config file. " + f"File path: {file_path}" + ) continue - trans_insert_location = max(trans_insert_location, - min(max(dataloader_def_line_idx + 1, - put_below_idx), put_above_idx)) + trans_insert_location = max( + trans_insert_location, + min(max(dataloader_def_line_idx + 1, put_below_idx), put_above_idx), + ) if "insert_below_input_definition_line" in loc: try: input_def_line_idx except: - logger.warning(f"Skipped due to not having input definition required by " - "the transformation content specified in the config file. " - f"File path: {file_path}") + logger.warning( + f"Skipped due to not having input definition required by " + "the transformation content specified in the config file. " + f"File path: {file_path}" + ) continue - trans_insert_location = max(trans_insert_location, - min(max(input_def_line_idx + 1, - put_below_idx), put_above_idx)) - - insert_indent_level = get_line_indent_level(lines[trans_insert_location - 1]) \ - if torchvision_indent == -1 else torchvision_indent + trans_insert_location = max( + trans_insert_location, min(max(input_def_line_idx + 1, put_below_idx), put_above_idx) + ) + + insert_indent_level = ( + get_line_indent_level(lines[trans_insert_location - 1]) + if torchvision_indent == -1 + else torchvision_indent + ) ### content # lines to insert lines_to_insert = bk_trans_content_this - if globals.code_domain == 'transformers_trainer': - lines_to_insert = lines_to_insert \ - .replace("EVAL_FUNC_LINES", globals.list_eval_func_lines[0]) \ - .replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0]) - elif globals.code_domain == 'transformers_no_trainer': + if globals.code_domain == "transformers_trainer": + lines_to_insert = lines_to_insert.replace( + "EVAL_FUNC_LINES", globals.list_eval_func_lines[0] + ).replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0]) + elif globals.code_domain == "transformers_no_trainer": pass - elif globals.code_domain == 'torchvision': - lines_to_insert = lines_to_insert \ - .replace("EVAL_FUNC_LINES", globals.list_eval_func_lines[0]) \ - .replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0]) - elif globals.code_domain =='onnx': - lines_to_insert = lines_to_insert \ - .replace("EVAL_FUNCTION_NAME", globals.list_eval_func_name[0]) \ - .replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0]) + elif globals.code_domain == "torchvision": + lines_to_insert = lines_to_insert.replace( + "EVAL_FUNC_LINES", globals.list_eval_func_lines[0] + ).replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0]) + elif globals.code_domain == "onnx": + lines_to_insert = lines_to_insert.replace( + "EVAL_FUNCTION_NAME", globals.list_eval_func_name[0] + ).replace("DATALOADER_NAME", globals.list_calib_dataloader_name[0]) else: - lines_to_insert = lines_to_insert \ - .replace("DATALOADER_NAME", dataloader_name)\ - .replace("def eval_func", "# def eval_func") + lines_to_insert = lines_to_insert.replace("DATALOADER_NAME", dataloader_name).replace( + "def eval_func", "# def eval_func" + ) - optimum_quant_config_line = \ + optimum_quant_config_line = ( 'IncQuantizationConfig.from_pretrained("' + globals.optimum_quant_config + '")' + ) # replace [+] indication with empty - lines_to_insert = lines_to_insert.replace( - "[+] ", " " * insert_indent_level) + lines_to_insert = lines_to_insert.replace("[+] ", " " * insert_indent_level) # add begin indicator - lines_to_insert = " " * insert_indent_level + "# [NeuralCoder] " + \ - self.backend + " for " + model_name + " [Beginning Line]\n" + lines_to_insert + lines_to_insert = ( + " " * insert_indent_level + + "# [NeuralCoder] " + + self.backend + + " for " + + model_name + + " [Beginning Line]\n" + + lines_to_insert + ) # replace INDICATIONS with real stuff - lines_to_insert = lines_to_insert \ - .replace("MODEL_NAME", model_name) \ - .replace("INPUT_NAME", input_name) \ - .replace("EVAL_FUNC_LINES", "# return 1") \ - .replace("OPTIMUM_QUANT_CONFIG", optimum_quant_config_line) \ + lines_to_insert = ( + lines_to_insert.replace("MODEL_NAME", model_name) + .replace("INPUT_NAME", input_name) + .replace("EVAL_FUNC_LINES", "# return 1") + .replace("OPTIMUM_QUANT_CONFIG", optimum_quant_config_line) .replace("\n", " # [coder-enabled]\n") + ) # add end indicator - lines_to_insert += " # [coder-enabled]\n" + \ - " " * insert_indent_level + "# [NeuralCoder] " + self.backend + " for " + \ - model_name + " [Ending Line] # [coder-enabled]" + lines_to_insert += ( + " # [coder-enabled]\n" + + " " * insert_indent_level + + "# [NeuralCoder] " + + self.backend + + " for " + + model_name + + " [Ending Line] # [coder-enabled]" + ) ### register @@ -312,13 +345,14 @@ def register_transformation(self): globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert) # PART 2 - "inference line" - if "indent_inference_line" in loc or \ - "insert_above_inference_line" in loc or \ - "insert_below_inference_line" in loc: - + if ( + "indent_inference_line" in loc + or "insert_above_inference_line" in loc + or "insert_below_inference_line" in loc + ): for file_path in globals.list_code_path: - code = open(file_path, 'r').read() - lines = code.split('\n') + code = open(file_path, "r").read() + lines = code.split("\n") line_idx = 0 for i in range(len(lines)): line = lines[i] @@ -326,15 +360,15 @@ def register_transformation(self): is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line) if is_eval_func and "[coder-enabled]" not in line: if eval_func_type == "non-forward": - pass # do something - + pass # do something + inference_line_indent_level = get_line_indent_level(line) if "indent_inference_line" in loc: bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)] add_indent_level = int(bk_trans_content_this) - trans_indent_location = [] + trans_indent_location = [] # indent can have multiple location, so is a list of numbers trans_indent_level = [] @@ -360,7 +394,7 @@ def register_transformation(self): globals.list_trans_indent_modified_file.append(file_path) globals.list_trans_indent_location_idxs.append(trans_indent_location) globals.list_trans_indent_level.append(trans_indent_level) - else: + else: idx = globals.list_trans_indent_modified_file.index(file_path) for i in trans_indent_location: globals.list_trans_indent_location_idxs[idx].append(i) @@ -371,7 +405,7 @@ def register_transformation(self): idx_offset = 0 elif "insert_below_inference_line" in loc: idx_offset = 1 - + if "insert_above_inference_line" in loc or "insert_below_inference_line" in loc: bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)] @@ -384,32 +418,39 @@ def register_transformation(self): # lines to insert lines_to_insert = bk_trans_content_this # replace [+] indication with empty - lines_to_insert = lines_to_insert.replace( - "[+] ", " " * insert_indent_level) + lines_to_insert = lines_to_insert.replace("[+] ", " " * insert_indent_level) # add begin indicator - lines_to_insert = " " * insert_indent_level + "# [NeuralCoder] " + \ - self.backend + " [Beginning Line] \n" + lines_to_insert - # replace INDICATIONS with real stuff + lines_to_insert = ( + " " * insert_indent_level + + "# [NeuralCoder] " + + self.backend + + " [Beginning Line] \n" + + lines_to_insert + ) + # replace INDICATIONS with real stuff # (for now, inference_line related transformations ) # (have nothing to do with input, dataloader etc, ) # (so no need to put replaces here.) lines_to_insert = lines_to_insert.replace("\n", " # [coder-enabled]\n") # add end indicator - lines_to_insert += " # [coder-enabled]\n" + \ - " " * insert_indent_level + "# [NeuralCoder] " + \ - self.backend + " [Ending Line] # [coder-enabled]" + lines_to_insert += ( + " # [coder-enabled]\n" + + " " * insert_indent_level + + "# [NeuralCoder] " + + self.backend + + " [Ending Line] # [coder-enabled]" + ) # customized argument if self.backend == "pytorch_benchmark": - lines_to_insert = lines_to_insert.replace("NUM_BENCHMARK_ITERATION", - globals.num_benchmark_iteration) - lines_to_insert = lines_to_insert.replace("ACCURACY_MODE", - str(False)) - lines_to_insert = lines_to_insert.replace("EVAL_FUNC_LINES", - line.strip()) + lines_to_insert = lines_to_insert.replace( + "NUM_BENCHMARK_ITERATION", globals.num_benchmark_iteration + ) + lines_to_insert = lines_to_insert.replace("ACCURACY_MODE", str(False)) + lines_to_insert = lines_to_insert.replace("EVAL_FUNC_LINES", line.strip()) ### register - + if file_path not in globals.list_trans_insert_modified_file: globals.list_trans_insert_modified_file.append(file_path) globals.list_trans_insert_location_idxs.append([trans_insert_location]) @@ -429,12 +470,7 @@ def register_transformation(self): # PART 3 - for customized location - - logger.debug( - f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}") - logger.debug( - f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}") - logger.debug( - f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}") - logger.debug( - f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}") + logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}") + logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}") + logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}") + logger.debug(f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}") diff --git a/neural_coder/coders/autoinc/calib_dataloader.py b/neural_coder/coders/autoinc/calib_dataloader.py index 7d448b8e588..8d287837676 100644 --- a/neural_coder/coders/autoinc/calib_dataloader.py +++ b/neural_coder/coders/autoinc/calib_dataloader.py @@ -13,30 +13,33 @@ # limitations under the License. import logging + from ... import globals + class Calib_Dataloader(object): def __init__(self): pass + def register_transformation(self): - if globals.code_domain == 'transformers_trainer': - globals.list_calib_dataloader_name.append('trainer.get_eval_dataloader()') - elif globals.code_domain == 'transformers_no_trainer': + if globals.code_domain == "transformers_trainer": + globals.list_calib_dataloader_name.append("trainer.get_eval_dataloader()") + elif globals.code_domain == "transformers_no_trainer": pass - elif globals.code_domain == 'torchvision': - globals.list_calib_dataloader_name.append('val_loader') - elif globals.code_domain == 'onnx': - codes = open(globals.list_code_path[0], 'r').read().split('\n') + elif globals.code_domain == "torchvision": + globals.list_calib_dataloader_name.append("val_loader") + elif globals.code_domain == "onnx": + codes = open(globals.list_code_path[0], "r").read().split("\n") for line in codes: - line = line.strip() - if 'loader' in line and '=' in line: + line = line.strip() + if "loader" in line and "=" in line: end = 0 for i in range(len(line)): - if line[i] == '=': + if line[i] == "=": end = i - if line[end-1] == ' ': - globals.list_calib_dataloader_name.append(line[:end-1]) + if line[end - 1] == " ": + globals.list_calib_dataloader_name.append(line[: end - 1]) else: globals.list_calib_dataloader_name.append(line[:end]) - else: # random model + else: # random model pass diff --git a/neural_coder/coders/autoinc/domain.py b/neural_coder/coders/autoinc/domain.py index d321b421682..ed4437d7455 100644 --- a/neural_coder/coders/autoinc/domain.py +++ b/neural_coder/coders/autoinc/domain.py @@ -13,20 +13,22 @@ # limitations under the License. import re + + def determine_domain(path) -> str: - codes = open(path, 'r').read() - if ('import torchvision.models' in codes or 'from torchvision.models' in codes) and 'val_loader' in codes: - return 'torchvision' - elif re.search(r'from (.*)transformers import', codes) and re.search(r'(.*)Model(.*)', codes): - if 'Trainer' in codes or 'trainer' in codes: - return 'transformers_trainer' + codes = open(path, "r").read() + if ("import torchvision.models" in codes or "from torchvision.models" in codes) and "val_loader" in codes: + return "torchvision" + elif re.search(r"from (.*)transformers import", codes) and re.search(r"(.*)Model(.*)", codes): + if "Trainer" in codes or "trainer" in codes: + return "transformers_trainer" else: - return 'transformers_no_trainer' - elif 'onnx.load(' in codes: - return 'onnx' - elif 'keras.Sequential' in codes: - return 'keras_script' - elif 'from tensorflow import' in codes or 'import tensorflow' in codes: - return 'tensorflow_keras_model' + return "transformers_no_trainer" + elif "onnx.load(" in codes: + return "onnx" + elif "keras.Sequential" in codes: + return "keras_script" + elif "from tensorflow import" in codes or "import tensorflow" in codes: + return "tensorflow_keras_model" else: - return 'random model' + return "random model" diff --git a/neural_coder/coders/autoinc/eval_func.py b/neural_coder/coders/autoinc/eval_func.py index 716fcce2603..9ea65d36315 100644 --- a/neural_coder/coders/autoinc/eval_func.py +++ b/neural_coder/coders/autoinc/eval_func.py @@ -19,57 +19,58 @@ class Eval_Func(object): def __init__(self): pass + def register_transformation(self): - if globals.code_domain == 'transformers_trainer': + if globals.code_domain == "transformers_trainer": lines = [ - 'trainer.model = model', - 'metrics = trainer.evaluate() # check if all tasks do not have parameters in evaluate()', - 'keys = [', - ' "eval_accuracy",', - ' "eval_bleu",', - ' "eval_matthews_correlation",', - ' "eval_pearsonr",', - ' "eval_precision",', - ' "eval_recall",', - ' "eval_rouge",', - ' "eval_sacrebleu",', - ' "eval_spearmanr",', - ' "eval_mcc",', - ' "eval_acc",', - ' "eval_acc_and_f1",', - ' "eval_corr",', - ' "eval_mnli/acc",', - ' "eval_mnli-mm/acc",', - ' "eval_exact_match",', - ' "eval_f1",', - '] # METRIC_TAGS in transformers', - 'for key in keys:', - ' if key in metrics.keys():', - ' return metrics[key]', - 'assert False, \"No metric returned, Please check inference metric!\"' - ] + "trainer.model = model", + "metrics = trainer.evaluate() # check if all tasks do not have parameters in evaluate()", + "keys = [", + ' "eval_accuracy",', + ' "eval_bleu",', + ' "eval_matthews_correlation",', + ' "eval_pearsonr",', + ' "eval_precision",', + ' "eval_recall",', + ' "eval_rouge",', + ' "eval_sacrebleu",', + ' "eval_spearmanr",', + ' "eval_mcc",', + ' "eval_acc",', + ' "eval_acc_and_f1",', + ' "eval_corr",', + ' "eval_mnli/acc",', + ' "eval_mnli-mm/acc",', + ' "eval_exact_match",', + ' "eval_f1",', + "] # METRIC_TAGS in transformers", + "for key in keys:", + " if key in metrics.keys():", + " return metrics[key]", + 'assert False, "No metric returned, Please check inference metric!"', + ] for index, line in enumerate(lines): if index != 0: - lines[index] = '[+] ' + ' ' * 4 + line - lines = '\n'.join(lines) + lines[index] = "[+] " + " " * 4 + line + lines = "\n".join(lines) globals.list_eval_func_lines.append(lines) - elif globals.code_domain == 'transformers_no_trainer': + elif globals.code_domain == "transformers_no_trainer": pass - elif globals.code_domain == 'torchvision': + elif globals.code_domain == "torchvision": # search for 'validate()' - codes = open(globals.list_code_path[0], 'r').read().split('\n') + codes = open(globals.list_code_path[0], "r").read().split("\n") lines = [] for index, line in enumerate(codes): - if 'def validate(' in line: + if "def validate(" in line: start = index start_indent = get_line_indent_level(codes[start]) - for i in range(start+1, len(codes)): - if codes[i] == '': + for i in range(start + 1, len(codes)): + if codes[i] == "": continue line_indent = get_line_indent_level(codes[i]) if line_indent > start_indent: change_indent = line_indent - 4 - lines.append(' ' * change_indent + codes[i].lstrip()) + lines.append(" " * change_indent + codes[i].lstrip()) # no 'print' else: break @@ -77,19 +78,19 @@ def register_transformation(self): else: pass for index, line in enumerate(lines): - if 'return' in line: + if "return" in line: indent = get_line_indent_level(line) line_list = line.split() - line_list[1] = 'float(' + line_list[1] + ')' - lines[index] = ' ' * indent + ' '.join(line_list) + line_list[1] = "float(" + line_list[1] + ")" + lines[index] = " " * indent + " ".join(line_list) for index, line in enumerate(lines): if index != 0: - lines[index] = '[+] ' + ' ' * 8 + line - lines = '\n'.join(lines) + lines[index] = "[+] " + " " * 8 + line + lines = "\n".join(lines) globals.list_eval_func_lines.append(lines) - elif globals.code_domain == 'onnx': + elif globals.code_domain == "onnx": # look for sess = onnxruntime.InferenceSession(MODEL_NAME.SerializeToString(), None) - codes = open(globals.list_code_path[0], 'r').read().split('\n') + codes = open(globals.list_code_path[0], "r").read().split("\n") start = 0 for idx, line in enumerate(codes): if "onnxruntime.InferenceSession(" in line: @@ -99,12 +100,12 @@ def register_transformation(self): target = None for i in range(start, -1, -1): if "def" in codes[i] and (line_indent - get_line_indent_level(codes[i])) == 4: - target = codes[i].split(' ')[1] + target = codes[i].split(" ")[1] break func_name = None for i in range(len(target)): - if target[i] == '(': + if target[i] == "(": globals.list_eval_func_name.append(target[:i]) break - else: # random model + else: # random model pass diff --git a/neural_coder/coders/pytorch/batch_size.py b/neural_coder/coders/pytorch/batch_size.py index 68dd7687366..ec61e19d70f 100644 --- a/neural_coder/coders/pytorch/batch_size.py +++ b/neural_coder/coders/pytorch/batch_size.py @@ -21,7 +21,7 @@ def __init__(self, file) -> None: self.result = [] def transform(self): - lines = self.file.split('\n') + lines = self.file.split("\n") for line in lines: if self.not_modify(line): new_line = self.modify(line) @@ -29,53 +29,52 @@ def transform(self): else: self.result.append(line) for index, line in enumerate(self.result): - if index != len(self.result)-1: - self.result[index] += '\n' - return ''.join(self.result) + if index != len(self.result) - 1: + self.result[index] += "\n" + return "".join(self.result) def not_modify(self, s): - if 'batch_size' in s and '=' in s: + if "batch_size" in s and "=" in s: return True return False def modify(self, s): - idx = s.find('batch_size') + idx = s.find("batch_size") s_right = s[idx:] - if ' = ' in s_right: - index = s.find(' = ') + if " = " in s_right: + index = s.find(" = ") s_left = s[:index] - if 'batch_size' in s_left: - if ',' in s_left: - index1 = s_left.find(',') - index2 = s_left.find('batch_size') + if "batch_size" in s_left: + if "," in s_left: + index1 = s_left.find(",") + index2 = s_left.find("batch_size") if index1 > index2: slice1 = s_left[:index1] else: s_left1 = s_left[:index2] s_right = s_left[index2:] - index3 = s_left1.rfind(',') - if ',' in s_right: - index4 = s_right.find(',') + len(s_left1) - slice1 = s_left[index3+2:index4] + index3 = s_left1.rfind(",") + if "," in s_right: + index4 = s_right.find(",") + len(s_left1) + slice1 = s_left[index3 + 2 : index4] else: - slice1 = s_left[index3+2:index] - s1 = slice1 + ' = ' + globals.target_batch_size - s = s[:] + '\n' + s1 + slice1 = s_left[index3 + 2 : index] + s1 = slice1 + " = " + globals.target_batch_size + s = s[:] + "\n" + s1 else: - s_right = s[index+3:] - s_right = s_right.replace( - s_right, globals.target_batch_size) - s = s_left + ' = ' + s_right - elif 'batch_size=' in s: - idx = s.find('batch_size=') + s_right = s[index + 3 :] + s_right = s_right.replace(s_right, globals.target_batch_size) + s = s_left + " = " + s_right + elif "batch_size=" in s: + idx = s.find("batch_size=") s_right = s[idx:] - idx2 = s_right.find('batch_size') - if ',' in s_right: - index2 = s_right.find(',') + idx2 = s_right.find("batch_size") + if "," in s_right: + index2 = s_right.find(",") old = s_right[idx2:index2] s = s.replace(old, "batch_size=" + globals.target_batch_size) - elif ')' in s_right: - index2 = s_right.find(')') + elif ")" in s_right: + index2 = s_right.find(")") old = s_right[idx2:index2] s = s.replace(old, "batch_size=" + globals.target_batch_size) else: diff --git a/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py b/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py index 512310c46e9..7c0d71ce534 100644 --- a/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py +++ b/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py @@ -15,13 +15,14 @@ from ...utils.line_operation import get_line_indent_level + class TrainerToNLPTrainer(object): def __init__(self, file) -> None: self.file = file self.result = [] def transform(self): - lines = self.file.split('\n') + lines = self.file.split("\n") for line in lines: if self.is_modify(line): @@ -30,17 +31,17 @@ def transform(self): else: self.result.append(line) for index, line in enumerate(self.result): - if index != len(self.result)-1: - self.result[index] += '\n' - return ''.join(self.result) + if index != len(self.result) - 1: + self.result[index] += "\n" + return "".join(self.result) def is_modify(self, s): - if 'trainer = Trainer(' in s: + if "trainer = Trainer(" in s: return True else: return False def modify(self, s): - old = 'Trainer' - s = s.replace(old, 'NLPTrainer') + old = "Trainer" + s = s.replace(old, "NLPTrainer") return s diff --git a/neural_coder/coders/pytorch/cuda_to_cpu.py b/neural_coder/coders/pytorch/cuda_to_cpu.py index 6c28a327f1c..2d6c9c8a13e 100644 --- a/neural_coder/coders/pytorch/cuda_to_cpu.py +++ b/neural_coder/coders/pytorch/cuda_to_cpu.py @@ -15,13 +15,14 @@ from ...utils.line_operation import get_line_indent_level + class CudaToCpu(object): def __init__(self, file) -> None: self.file = file self.result = [] def transform(self): - lines = self.file.split('\n') + lines = self.file.split("\n") # determine if jump the whole file (in cases where: args.device, args.cuda etc) to_jump = False for line in lines: @@ -29,10 +30,10 @@ def transform(self): to_jump = True break - if to_jump: # this file do not need transformation + if to_jump: # this file do not need transformation for line in lines: self.result.append(line) - else: # this file might need transformation + else: # this file might need transformation for line in lines: if self.is_delete(line): indent_level = get_line_indent_level(line) @@ -44,44 +45,42 @@ def transform(self): else: self.result.append(line) for index, line in enumerate(self.result): - if index != len(self.result)-1: - self.result[index] += '\n' - return ''.join(self.result) + if index != len(self.result) - 1: + self.result[index] += "\n" + return "".join(self.result) def is_jump_file(self, s): - if "args.device" in s \ - or "args.cpu" in s \ - or "args.gpu" in s \ - or "args.cuda" in s \ - or "torch.cuda.is_available()" in s: + if ( + "args.device" in s + or "args.cpu" in s + or "args.gpu" in s + or "args.cuda" in s + or "torch.cuda.is_available()" in s + ): return True else: return False def is_delete(self, s): - if 'cuda.' in s and '=' not in s and "if" not in s: + if "cuda." in s and "=" not in s and "if" not in s: return True else: return False def is_modify(self, s): - if '\'cuda\'' in s \ - or '"cuda"' in s \ - or '\'cuda:0\'' in s \ - or '"cuda:0"' in s \ - or 'cuda()' in s: + if "'cuda'" in s or '"cuda"' in s or "'cuda:0'" in s or '"cuda:0"' in s or "cuda()" in s: return True else: return False def change_to_cpu(self, s): - if '\'cuda\'' in s or '\'cuda:0\'' in s: - old = '\'cuda\'' if '\'cuda\'' in s else '\'cuda:0\'' - s = s.replace(old, '\'cpu\'') + if "'cuda'" in s or "'cuda:0'" in s: + old = "'cuda'" if "'cuda'" in s else "'cuda:0'" + s = s.replace(old, "'cpu'") elif '"cuda"' in s or '"cuda:0"' in s: old = '"cuda"' if '"cuda"' in s else '"cuda:0"' s = s.replace(old, '"cpu"') - elif 'cuda()' in s: - old = 'cuda' - s = s.replace(old, 'cpu') + elif "cuda()" in s: + old = "cuda" + s = s.replace(old, "cpu") return s diff --git a/neural_coder/coders/pytorch/dummy_dataloader.py b/neural_coder/coders/pytorch/dummy_dataloader.py index c52972f4801..64e745cdacf 100644 --- a/neural_coder/coders/pytorch/dummy_dataloader.py +++ b/neural_coder/coders/pytorch/dummy_dataloader.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ... import globals -from ...utils.line_operation import get_line_indent_level, is_eval_func_model_name, get_line_left_hand_side - import logging -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +from ... import globals +from ...utils.line_operation import get_line_indent_level, get_line_left_hand_side, is_eval_func_model_name + +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) @@ -31,12 +31,12 @@ def print_info(self): for i in self.list_model_def_instance: logger.debug(f"i.print_info(): {i.print_info()}") - # collect file transformation info and register (store) in globals + # collect file transformation info and register (store) in globals # (i.e. which file to add which lines at which location) - def register_transformation(self): + def register_transformation(self): list_code = [] for i in globals.list_code_path: - list_code.append(open(i, 'r').read()) + list_code.append(open(i, "r").read()) for ins in self.list_model_def_instance: model_name = ins.model_name @@ -47,12 +47,12 @@ def register_transformation(self): # transformation file_path_idx = globals.list_code_path.index(file_path) - lines = list_code[file_path_idx].split('\n') + lines = list_code[file_path_idx].split("\n") line_idx = 0 # search DataLoader dataloader_name = "" - for i in range(len(lines)): # each item is a str of this code line + for i in range(len(lines)): # each item is a str of this code line line = lines[i] if "DataLoader(" in line and "=" in line and line.find("=") < line.find("DataLoader"): dataloader_name = get_line_left_hand_side(line) @@ -64,10 +64,10 @@ def register_transformation(self): input_dimension_str = "3, 224, 224)" for i in range(len(lines)): line = lines[i] - if ("input" in line and "=" in line and \ - line.find("=") > line.find("input")) or \ - ("image" in line and "=" in line and line.find("=") > line.find("image")): - input_dimension_str = line[line.find(",")+2:] + if ("input" in line and "=" in line and line.find("=") > line.find("input")) or ( + "image" in line and "=" in line and line.find("=") > line.find("image") + ): + input_dimension_str = line[line.find(",") + 2 :] for i in range(len(lines)): line = lines[i] @@ -77,8 +77,9 @@ def register_transformation(self): lines_to_insert += " " * indent_level + "import torch" + "\n" lines_to_insert += " " * indent_level + "from torch.utils.data import Dataset" + "\n" lines_to_insert += " " * indent_level + "class DummyDataset(Dataset):" + "\n" - lines_to_insert += " " * indent_level + \ - " def __init__(self, *shapes, num_samples: int = 10000):" + "\n" + lines_to_insert += ( + " " * indent_level + " def __init__(self, *shapes, num_samples: int = 10000):" + "\n" + ) lines_to_insert += " " * indent_level + " super().__init__()" + "\n" lines_to_insert += " " * indent_level + " self.shapes = shapes" + "\n" lines_to_insert += " " * indent_level + " self.num_samples = num_samples" + "\n" @@ -91,26 +92,28 @@ def register_transformation(self): lines_to_insert += " " * indent_level + " sample.append(spl)" + "\n" lines_to_insert += " " * indent_level + " return sample" + "\n" lines_to_insert += " " * indent_level + "from torch.utils.data import DataLoader" + "\n" - lines_to_insert += " " * indent_level + \ - "dummy_dataset = DummyDataset((" + input_dimension_str + ", (1, ))" + "\n" - lines_to_insert += " " * indent_level + \ - "dummy_dataloader = DataLoader(dummy_dataset, batch_size=1)" + lines_to_insert += ( + " " * indent_level + + "dummy_dataset = DummyDataset((" + + input_dimension_str + + ", (1, ))" + + "\n" + ) + lines_to_insert += ( + " " * indent_level + "dummy_dataloader = DataLoader(dummy_dataset, batch_size=1)" + ) trans_insert_location = 0 if file_path not in globals.list_trans_insert_modified_file: globals.list_trans_insert_modified_file.append(file_path) globals.list_trans_insert_location_idxs.append([trans_insert_location]) - globals.list_trans_insert_number_insert_lines.append( - [lines_to_insert.count("\n") + 1] - ) + globals.list_trans_insert_number_insert_lines.append([lines_to_insert.count("\n") + 1]) globals.list_trans_insert_lines_to_insert.append([lines_to_insert]) else: idx = globals.list_trans_insert_modified_file.index(file_path) globals.list_trans_insert_location_idxs[idx].append(trans_insert_location) - globals.list_trans_insert_number_insert_lines[idx].append( - lines_to_insert.count("\n") + 1 - ) + globals.list_trans_insert_number_insert_lines[idx].append(lines_to_insert.count("\n") + 1) globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert) line_idx += 1 diff --git a/neural_coder/coders/pytorch/harness.py b/neural_coder/coders/pytorch/harness.py index 9191e291886..eb91af756ad 100644 --- a/neural_coder/coders/pytorch/harness.py +++ b/neural_coder/coders/pytorch/harness.py @@ -12,23 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging +import os +import sys + +import yaml + from ... import globals from ...utils.line_operation import ( get_line_indent_level, - is_eval_func_model_name, get_line_left_hand_side, get_line_wo_comment, - single_line_comment_or_empty_line_detection + is_eval_func_model_name, + single_line_comment_or_empty_line_detection, ) -import logging -import yaml -import sys -import os - -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) @@ -43,8 +44,7 @@ def print_info(self): # collect file transformation info and register in globals # (i.e. which file to add which lines at which location) def register_transformation(self): - backend_file = open(os.path.dirname(__file__) + - "/../../backends/" + self.backend + ".yaml") + backend_file = open(os.path.dirname(__file__) + "/../../backends/" + self.backend + ".yaml") backend_dict = yaml.load(backend_file, Loader=yaml.BaseLoader) logger.debug(f"backend_dict: {backend_dict}") @@ -54,20 +54,18 @@ def register_transformation(self): list_code = [] for i in globals.list_code_path: - list_code.append(open(i, 'r').read()) + list_code.append(open(i, "r").read()) for loc in bk_trans_location: - # PART 1 - "model_definition_line" if "insert_below_model_definition_line" in loc: - for ins in globals.list_model_def_instance: model_name = ins.model_name file_path = ins.file_path model_def_line_idx = ins.model_def_line_idx file_path_idx = globals.list_code_path.index(file_path) - lines = list_code[file_path_idx].split('\n') + lines = list_code[file_path_idx].split("\n") line_idx = 0 # to check if this model has an inference line is in the file @@ -75,8 +73,9 @@ def register_transformation(self): to_transform = False for i in range(len(lines)): line = lines[i] - if model_name + "(" in line or \ - (model_name + "." in line and line.find(model_name) < line.find(".") and "(" in line): + if model_name + "(" in line or ( + model_name + "." in line and line.find(model_name) < line.find(".") and "(" in line + ): to_transform = True if not to_transform: continue @@ -88,9 +87,9 @@ def register_transformation(self): for i in range(len(lines)): line = lines[i] if not single_line_comment_or_empty_line_detection(line): - if ("DataLoader(" in line and "=" in line and line.find("=") < line.find("DataLoader")) \ - or ("dataloader" in line and "=" in line and \ - line.find("=") > line.find("dataloader")): + if ("DataLoader(" in line and "=" in line and line.find("=") < line.find("DataLoader")) or ( + "dataloader" in line and "=" in line and line.find("=") > line.find("dataloader") + ): dataloader_def_line_indent_level = get_line_indent_level(line) dataloader_name = get_line_left_hand_side(line) dataloader_def_line_idx = i @@ -104,7 +103,7 @@ def register_transformation(self): if not single_line_comment_or_empty_line_detection(line): if is_eval_func and "[coder-enabled]" not in line: inference_line = line - input_name = line[line.find("(")+1:line.find(")")].replace("*","") + input_name = line[line.find("(") + 1 : line.find(")")].replace("*", "") # get "c" in "a = b(**c)" # search input definition in this file (if any) @@ -115,7 +114,7 @@ def register_transformation(self): if input_name in line and "=" in line and line.find("=") > line.find(input_name): input_def_line_indent_level = get_line_indent_level(line) input_def_line_idx = i - + # search trainer definition in this file (for transformers trainer only) trainer_def_line_idx = -1 for i in range(len(lines)): @@ -138,8 +137,7 @@ def register_transformation(self): i_search = 1 while do_search: following_line = lines[line_idx + i_search] - if ")" in following_line \ - and following_line.count(")") > following_line.count("("): + if ")" in following_line and following_line.count(")") > following_line.count("("): do_search = False i_search += 1 model_definition_end_line_idx = line_idx + i_search @@ -149,26 +147,30 @@ def register_transformation(self): bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)] - if ("INPUT_NAME" in bk_trans_content_this and input_name == "") \ - or ("DATALOADER_NAME" in bk_trans_content_this and dataloader_name == "") \ - or ("INFERENCE_LINE" in bk_trans_content_this and inference_line == ""): - logger.info(f"Skipped due to not having enough information required by " - "the transformation content specified in the config file " - "(e.g. INPUT_NAME, DATALOADER_NAME, INFERENCE_LINE). " - f"File path: {file_path}") + if ( + ("INPUT_NAME" in bk_trans_content_this and input_name == "") + or ("DATALOADER_NAME" in bk_trans_content_this and dataloader_name == "") + or ("INFERENCE_LINE" in bk_trans_content_this and inference_line == "") + ): + logger.info( + f"Skipped due to not having enough information required by " + "the transformation content specified in the config file " + "(e.g. INPUT_NAME, DATALOADER_NAME, INFERENCE_LINE). " + f"File path: {file_path}" + ) continue ### location # search for features to put below them - ''' + """ Example (psuedo-code): model = Net() # jit script begin mark model = torch.jit.script(model) # jit script end mark (feature name + model name to handle multi-model situation) model = ipex.optimize(model, "fp32") # "ipex fp32" must be put below "jit script" - ''' + """ put_below_idx = 0 for i in range(len(lines)): for item in bk_trans_order[0]["below"]: @@ -183,12 +185,10 @@ def register_transformation(self): line = lines[i] if item in line and model_name in line: put_above_idx = min(put_above_idx, i) - + # location assignment (below model def / dataloader def / input def) if "insert_below_model_definition_line" in loc: - trans_insert_location = \ - min(max(model_definition_end_line_idx, - put_below_idx), put_above_idx) + trans_insert_location = min(max(model_definition_end_line_idx, put_below_idx), put_above_idx) if trainer_def_line_idx > 0: trans_insert_location = trainer_def_line_idx - 1 # for transformers trainer to put right above trainer def @@ -196,49 +196,66 @@ def register_transformation(self): try: dataloader_def_line_idx except: - logger.warning(f"Skipped due to not having dataloader definition required by " - "the transformation content specified in the config file. " - f"File path: {file_path}") + logger.warning( + f"Skipped due to not having dataloader definition required by " + "the transformation content specified in the config file. " + f"File path: {file_path}" + ) continue - trans_insert_location = max(trans_insert_location, - min(max(dataloader_def_line_idx + 1, - put_below_idx), put_above_idx)) + trans_insert_location = max( + trans_insert_location, min(max(dataloader_def_line_idx + 1, put_below_idx), put_above_idx) + ) if "insert_below_input_definition_line" in loc: try: input_def_line_idx except: - logger.warning(f"Skipped due to not having input definition required by " - "the transformation content specified in the config file. " - f"File path: {file_path}") + logger.warning( + f"Skipped due to not having input definition required by " + "the transformation content specified in the config file. " + f"File path: {file_path}" + ) continue - trans_insert_location = max(trans_insert_location, - min(max(input_def_line_idx + 1, - put_below_idx), put_above_idx)) - + trans_insert_location = max( + trans_insert_location, min(max(input_def_line_idx + 1, put_below_idx), put_above_idx) + ) + insert_indent_level = get_line_indent_level(lines[trans_insert_location - 1]) - if trainer_def_line_idx > 0: # for transformers trainer to put right above trainer def + if trainer_def_line_idx > 0: # for transformers trainer to put right above trainer def insert_indent_level = get_line_indent_level(lines[trans_insert_location]) ### content # lines to insert lines_to_insert = bk_trans_content_this # replace [+] indication with empty - lines_to_insert = lines_to_insert.replace( - "[+] ", " " * insert_indent_level) + lines_to_insert = lines_to_insert.replace("[+] ", " " * insert_indent_level) # add begin indicator - lines_to_insert = " " * insert_indent_level + "# [NeuralCoder] " + \ - self.backend + " for " + model_name + " [Beginning Line]\n" + lines_to_insert + lines_to_insert = ( + " " * insert_indent_level + + "# [NeuralCoder] " + + self.backend + + " for " + + model_name + + " [Beginning Line]\n" + + lines_to_insert + ) # replace INDICATIONS with real stuff - lines_to_insert = lines_to_insert \ - .replace("MODEL_NAME", model_name) \ - .replace("INPUT_NAME", input_name) \ - .replace("DATALOADER_NAME", dataloader_name) \ - .replace("INFERENCE_LINE", inference_line.strip()) \ + lines_to_insert = ( + lines_to_insert.replace("MODEL_NAME", model_name) + .replace("INPUT_NAME", input_name) + .replace("DATALOADER_NAME", dataloader_name) + .replace("INFERENCE_LINE", inference_line.strip()) .replace("\n", " # [coder-enabled]\n") + ) # add end indicator - lines_to_insert += " # [coder-enabled]\n" + \ - " " * insert_indent_level + "# [NeuralCoder] " + self.backend + " for " + \ - model_name + " [Ending Line] # [coder-enabled]" + lines_to_insert += ( + " # [coder-enabled]\n" + + " " * insert_indent_level + + "# [NeuralCoder] " + + self.backend + + " for " + + model_name + + " [Ending Line] # [coder-enabled]" + ) ### register @@ -254,13 +271,14 @@ def register_transformation(self): globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert) # PART 2 - "inference line" - if "indent_inference_line" in loc or \ - "insert_above_inference_line" in loc or \ - "insert_below_inference_line" in loc: - + if ( + "indent_inference_line" in loc + or "insert_above_inference_line" in loc + or "insert_below_inference_line" in loc + ): for file_path in globals.list_code_path: - code = open(file_path, 'r').read() - lines = code.split('\n') + code = open(file_path, "r").read() + lines = code.split("\n") line_idx = 0 for i in range(len(lines)): line = lines[i] @@ -268,7 +286,7 @@ def register_transformation(self): is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line) if is_eval_func and "[coder-enabled]" not in line: if eval_func_type == "non-forward": - pass # do something + pass # do something inference_line = line inference_line_indent_level = get_line_indent_level(line) @@ -302,7 +320,7 @@ def register_transformation(self): globals.list_trans_indent_modified_file.append(file_path) globals.list_trans_indent_location_idxs.append(trans_indent_location) globals.list_trans_indent_level.append(trans_indent_level) - else: + else: idx = globals.list_trans_indent_modified_file.index(file_path) for i in trans_indent_location: globals.list_trans_indent_location_idxs[idx].append(i) @@ -322,11 +340,14 @@ def register_transformation(self): if ")" in following_line: do_search = False i_search += 1 - inference_line = \ - inference_line + "\n" + \ - " " * (get_line_indent_level(line) + 4) + following_line + inference_line = ( + inference_line + + "\n" + + " " * (get_line_indent_level(line) + 4) + + following_line + ) idx_offset = i_search - + if "insert_above_inference_line" in loc or "insert_below_inference_line" in loc: bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)] @@ -339,32 +360,41 @@ def register_transformation(self): # lines to insert lines_to_insert = bk_trans_content_this # replace [+] indication with empty - lines_to_insert = lines_to_insert.replace( - "[+] ", " " * insert_indent_level) + lines_to_insert = lines_to_insert.replace("[+] ", " " * insert_indent_level) # add begin indicator - lines_to_insert = " " * insert_indent_level + "# [NeuralCoder] " + \ - self.backend + " [Beginning Line] \n" + lines_to_insert - # replace INDICATIONS with real stuff + lines_to_insert = ( + " " * insert_indent_level + + "# [NeuralCoder] " + + self.backend + + " [Beginning Line] \n" + + lines_to_insert + ) + # replace INDICATIONS with real stuff # (for now, inference_line related transformations ) # (have nothing to do with input, dataloader etc, ) # (so no need to put replaces here.) lines_to_insert = lines_to_insert.replace("\n", " # [coder-enabled]\n") # add end indicator - lines_to_insert += " # [coder-enabled]\n" + \ - " " * insert_indent_level + "# [NeuralCoder] " + \ - self.backend + " [Ending Line] # [coder-enabled]" + lines_to_insert += ( + " # [coder-enabled]\n" + + " " * insert_indent_level + + "# [NeuralCoder] " + + self.backend + + " [Ending Line] # [coder-enabled]" + ) # customized argument if self.backend == "pytorch_benchmark": - lines_to_insert = lines_to_insert.replace("NUM_BENCHMARK_ITERATION", - globals.num_benchmark_iteration) - lines_to_insert = lines_to_insert.replace("ACCURACY_MODE", - str(False)) - lines_to_insert = lines_to_insert.replace("INFERENCE_LINE", - inference_line.strip()) + lines_to_insert = lines_to_insert.replace( + "NUM_BENCHMARK_ITERATION", globals.num_benchmark_iteration + ) + lines_to_insert = lines_to_insert.replace("ACCURACY_MODE", str(False)) + lines_to_insert = lines_to_insert.replace( + "INFERENCE_LINE", inference_line.strip() + ) ### register - + if file_path not in globals.list_trans_insert_modified_file: globals.list_trans_insert_modified_file.append(file_path) globals.list_trans_insert_location_idxs.append([trans_insert_location]) @@ -380,17 +410,12 @@ def register_transformation(self): ) globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert) - break # already transformed this line, so skip any further model_name search + break # already transformed this line, so skip any further model_name search line_idx += 1 # PART 3 - for customized location - - logger.debug( - f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}") - logger.debug( - f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}") - logger.debug( - f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}") - logger.debug( - f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}") + logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}") + logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}") + logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}") + logger.debug(f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}") diff --git a/neural_coder/coders/pytorch/lightning.py b/neural_coder/coders/pytorch/lightning.py index 44e4afa307b..383432e2c3f 100644 --- a/neural_coder/coders/pytorch/lightning.py +++ b/neural_coder/coders/pytorch/lightning.py @@ -12,74 +12,75 @@ # See the License for the specific language governing permissions and # limitations under the License. + class Lightning(object): def __init__(self, file) -> None: self.file = file self.result = [] def transform(self): - lines = self.file.split('\n') + lines = self.file.split("\n") for line in lines: if self.not_add_accelerator(line) or self.not_add_precision(line): new_line = self.add(line) if self.not_modify(new_line): new_line = self.modify(new_line) - self.result.append(new_line) + self.result.append(new_line) elif self.not_modify(line): new_line = self.modify(line) - self.result.append(new_line) + self.result.append(new_line) if not self.not_add_accelerator(line) and not self.not_add_precision(line) and not self.not_modify(line): - if line == '' and self.result[-1] == '': + if line == "" and self.result[-1] == "": continue self.result.append(line) for index, line in enumerate(self.result): - if index != len(self.result)-1: - self.result[index] += '\n' - return ''.join(self.result) + if index != len(self.result) - 1: + self.result[index] += "\n" + return "".join(self.result) - def not_add_precision(self,s): - if 'Trainer' in s: - if 'precision' not in s: + def not_add_precision(self, s): + if "Trainer" in s: + if "precision" not in s: return True else: return False return False - def not_add_accelerator(self,s): - if 'Trainer' in s: - if 'accelerator' not in s: + def not_add_accelerator(self, s): + if "Trainer" in s: + if "accelerator" not in s: return True else: return False return False - def add(self,s): - if 'Trainer' in s: - if 'precision' not in s: - s_index = s.find(')') - s = s[:s_index] + ', precision=\"bf16\"' + s[s_index:] - if 'accelerator' not in s: - s_index = s.find(')') - s = s[:s_index] + ', accelerator=\"cpu\"' + s[s_index:] + def add(self, s): + if "Trainer" in s: + if "precision" not in s: + s_index = s.find(")") + s = s[:s_index] + ', precision="bf16"' + s[s_index:] + if "accelerator" not in s: + s_index = s.find(")") + s = s[:s_index] + ', accelerator="cpu"' + s[s_index:] return s def not_modify(self, s): - if 'bf16' in s and 'cpu' in s: + if "bf16" in s and "cpu" in s: return False return True def modify(self, s): - if '16' in s: - old = '16' - s = s.replace(old,'\"bf16\"') - if '32' in s: - old = '32' - s = s.replace(old,'\"bf16\"') - if '\"gpu\"' in s: - old = '\"gpu\"' - s = s.replace(old,'\"cpu\"') - if '\"tpu\"' in s: - old = '\"tpu\"' - s = s.replace(old,'\"cpu\"') + if "16" in s: + old = "16" + s = s.replace(old, '"bf16"') + if "32" in s: + old = "32" + s = s.replace(old, '"bf16"') + if '"gpu"' in s: + old = '"gpu"' + s = s.replace(old, '"cpu"') + if '"tpu"' in s: + old = '"tpu"' + s = s.replace(old, '"cpu"') return s diff --git a/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py b/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py index f10fc91afc6..82f324e26b3 100644 --- a/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py +++ b/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py @@ -12,35 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging + from ... import globals from ...utils.line_operation import ( get_line_indent_level, - is_eval_func_model_name, get_line_left_hand_side, - single_line_comment_or_empty_line_detection + is_eval_func_model_name, + single_line_comment_or_empty_line_detection, ) -import logging - -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) class ReclaimInferenceTransformersTrainer(object): def __init__(self, list_model_def_instance): self.list_model_def_instance = list_model_def_instance - + def print_info(self): for i in self.list_model_def_instance: logger.debug(f"i.print_info(): {i.print_info()}") - # collect file transformation info and register (store) in globals + # collect file transformation info and register (store) in globals # (i.e. which file to add which lines at which location) - def register_transformation(self): + def register_transformation(self): file_path = globals.list_code_path[0] - lines = open(file_path, 'r').read().split('\n') + lines = open(file_path, "r").read().split("\n") line_idx = 0 for i in range(len(lines)): @@ -68,7 +68,7 @@ def register_transformation(self): globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert) line_idx += 1 - + logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}") logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}") logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}") diff --git a/neural_coder/coders/pytorch/reclaim_inputs.py b/neural_coder/coders/pytorch/reclaim_inputs.py index edd6564fcc5..16397012e37 100644 --- a/neural_coder/coders/pytorch/reclaim_inputs.py +++ b/neural_coder/coders/pytorch/reclaim_inputs.py @@ -12,36 +12,36 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging + from ... import globals from ...utils.line_operation import ( get_line_indent_level, - is_eval_func_model_name, get_line_left_hand_side, - single_line_comment_or_empty_line_detection + is_eval_func_model_name, + single_line_comment_or_empty_line_detection, ) -import logging - -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) class ReclaimInputs(object): def __init__(self, list_model_def_instance): self.list_model_def_instance = list_model_def_instance - + def print_info(self): for i in self.list_model_def_instance: logger.debug(f"i.print_info(): {i.print_info()}") - # collect file transformation info and register (store) in globals + # collect file transformation info and register (store) in globals # (i.e. which file to add which lines at which location) - def register_transformation(self): + def register_transformation(self): list_code = [] for i in globals.list_code_path: - list_code.append(open(i, 'r').read()) + list_code.append(open(i, "r").read()) for ins in self.list_model_def_instance: model_name = ins.model_name @@ -49,10 +49,10 @@ def register_transformation(self): model_def_line_idx = ins.model_def_line_idx function_def_line_idx = ins.function_def_line_idx class_name = ins.class_name - + # transformation file_path_idx = globals.list_code_path.index(file_path) - lines = list_code[file_path_idx].split('\n') + lines = list_code[file_path_idx].split("\n") line_idx = 0 # search inference line in this file, and also input_name @@ -63,7 +63,7 @@ def register_transformation(self): is_eval_func, eval_func_type = is_eval_func_model_name(model_name, line) if is_eval_func and "[coder-enabled]" not in line: inference_line = line - input_name = line[line.find("(")+1:line.find(")")].replace("*","") # get "c" in "a = b(**c)" + input_name = line[line.find("(") + 1 : line.find(")")].replace("*", "") # get "c" in "a = b(**c)" # if there is already a "input = xxx", then quit this function if input_name != "": @@ -72,7 +72,7 @@ def register_transformation(self): if not single_line_comment_or_empty_line_detection(line): if input_name in line and "=" in line and line.find(input_name) < line.find("="): return - + # add the created lines for inputs if inference_line != "" and input_name != "": for i in range(len(lines)): @@ -97,9 +97,9 @@ def register_transformation(self): globals.list_trans_insert_location_idxs[idx].append(trans_insert_location) globals.list_trans_insert_number_insert_lines[idx].append(lines_to_insert.count("\n") + 1) globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert) - + line_idx += 1 - + logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}") logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}") logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}") diff --git a/neural_coder/coders/tensorflow/amp.py b/neural_coder/coders/tensorflow/amp.py index b1bd8510f9d..70302d78d4a 100644 --- a/neural_coder/coders/tensorflow/amp.py +++ b/neural_coder/coders/tensorflow/amp.py @@ -14,6 +14,7 @@ from ...utils.line_operation import get_line_left_hand_side + class TensorFlowKerasAMP(object): def __init__(self, file) -> None: self.file = file @@ -23,19 +24,21 @@ def __init__(self, file) -> None: def transform(self): # import pdb # pdb.set_trace() - lines = self.file.split('\n') + lines = self.file.split("\n") for line in lines: if self.is_modify(line): - if '.ConfigProto()'in line: # TF AMP + if ".ConfigProto()" in line: # TF AMP config_name = get_line_left_hand_side(line) new_line_1 = "from tensorflow.core.protobuf import rewriter_config_pb2" - new_line_2 = config_name + \ - ".graph_options.rewrite_options.auto_mixed_precision_mkl = " + \ - "rewriter_config_pb2.RewriterConfig.ON" + new_line_2 = ( + config_name + + ".graph_options.rewrite_options.auto_mixed_precision_mkl = " + + "rewriter_config_pb2.RewriterConfig.ON" + ) self.result.append(line) self.result.append(new_line_1) self.result.append(new_line_2) - elif 'keras' in line and 'import' in line: # Keras AMP + elif "keras" in line and "import" in line: # Keras AMP if not self.keras_edited_flag: new_line_1 = "from tensorflow.keras.mixed_precision import experimental as mixed_precision" new_line_2 = "policy = mixed_precision.Policy('mixed_bfloat16')" @@ -50,12 +53,12 @@ def transform(self): else: self.result.append(line) for index, line in enumerate(self.result): - if index != len(self.result)-1: - self.result[index] += '\n' - return ''.join(self.result) + if index != len(self.result) - 1: + self.result[index] += "\n" + return "".join(self.result) def is_modify(self, s): - if '.ConfigProto()' in s or ('keras' in s and 'import' in s): + if ".ConfigProto()" in s or ("keras" in s and "import" in s): return True else: return False diff --git a/neural_coder/coders/tensorflow/inc.py b/neural_coder/coders/tensorflow/inc.py index 2bd63fc869c..837dff143fb 100644 --- a/neural_coder/coders/tensorflow/inc.py +++ b/neural_coder/coders/tensorflow/inc.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...utils.line_operation import get_line_left_hand_side, get_line_indent_level +from ...utils.line_operation import get_line_indent_level, get_line_left_hand_side + class TensorFlowKerasINC(object): def __init__(self, file) -> None: @@ -22,7 +23,7 @@ def __init__(self, file) -> None: def transform(self): # import pdb # pdb.set_trace() - lines = self.file.split('\n') + lines = self.file.split("\n") for line in lines: if self.is_modify(line): model_name = "model" @@ -37,13 +38,13 @@ def transform(self): else: self.result.append(line) for index, line in enumerate(self.result): - if index != len(self.result)-1: - self.result[index] += '\n' - return ''.join(self.result) + if index != len(self.result) - 1: + self.result[index] += "\n" + return "".join(self.result) def is_modify(self, s): - if 'model = tf.' in s or 'model = load_model(' in s: - if 'self.model' not in s: + if "model = tf." in s or "model = load_model(" in s: + if "self.model" not in s: return True else: return False diff --git a/neural_coder/coders/transform.py b/neural_coder/coders/transform.py index f602f8aec38..c553cbbb87b 100644 --- a/neural_coder/coders/transform.py +++ b/neural_coder/coders/transform.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .. import globals import logging -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +from .. import globals + +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) @@ -29,16 +30,12 @@ def execute_insert_transformation(list_transformed_code): trans_lines_to_insert = globals.list_trans_insert_lines_to_insert[index] # sort trans_location_idxs and sort the other lists accordingly - trans_number_insert_lines = [ - i for _, i in sorted(zip(trans_location_idxs, trans_number_insert_lines)) - ] - trans_lines_to_insert = [ - i for _, i in sorted(zip(trans_location_idxs, trans_lines_to_insert)) - ] + trans_number_insert_lines = [i for _, i in sorted(zip(trans_location_idxs, trans_number_insert_lines))] + trans_lines_to_insert = [i for _, i in sorted(zip(trans_location_idxs, trans_lines_to_insert))] trans_location_idxs = sorted(trans_location_idxs) - + file_path_idx = globals.list_code_path.index(file_path) - lines_transformed = list_transformed_code[file_path_idx].split('\n') + lines_transformed = list_transformed_code[file_path_idx].split("\n") # math t = [0] @@ -47,7 +44,7 @@ def execute_insert_transformation(list_transformed_code): u = u + n t.append(u) t = t[:-1] - + logger.debug(f"t: {t}") trans_location_idxs = [sum(i) for i in zip(trans_location_idxs, t)] logger.debug(f"trans_location_idxs after adjustment: {trans_location_idxs}") @@ -73,7 +70,7 @@ def execute_indent_transformation(list_transformed_code): trans_indent_level = globals.list_trans_indent_level[index] file_path_idx = globals.list_code_path.index(file_path) - lines_transformed = list_transformed_code[file_path_idx].split('\n') + lines_transformed = list_transformed_code[file_path_idx].split("\n") for idx in trans_location_idxs: # actual transformation (indent) this_indent_level = trans_indent_level[trans_location_idxs.index(idx)] diff --git a/neural_coder/docs/cloud_autobench/code/resnet50.py b/neural_coder/docs/cloud_autobench/code/resnet50.py index 9161661cdfb..13caa61334a 100644 --- a/neural_coder/docs/cloud_autobench/code/resnet50.py +++ b/neural_coder/docs/cloud_autobench/code/resnet50.py @@ -14,6 +14,7 @@ import torch import torchvision.models as models + model = models.resnet50(pretrained=True) model.eval() batch_size = 1 diff --git a/neural_coder/globals.py b/neural_coder/globals.py index 9b402ec85c2..4603e5bc46b 100644 --- a/neural_coder/globals.py +++ b/neural_coder/globals.py @@ -50,7 +50,8 @@ # modular design use_modular = True -modular_item = "" # str +modular_item = "" # str + def reset_globals(): global list_code_path diff --git a/neural_coder/graphers/code_line.py b/neural_coder/graphers/code_line.py index 966d2d7c55c..128482f9735 100644 --- a/neural_coder/graphers/code_line.py +++ b/neural_coder/graphers/code_line.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pprint from typing import List + from .. import globals -from ..utils.line_operation import get_line_indent_level -from ..utils.line_operation import multi_line_comment_detection -from ..utils.line_operation import single_line_comment_or_empty_line_detection -import pprint +from ..utils.line_operation import ( + get_line_indent_level, + multi_line_comment_detection, + single_line_comment_or_empty_line_detection, +) class CodeLine: @@ -49,29 +52,30 @@ def print_info(self): def register_code_line(): if globals.print_code_line_info: - print("{:<100} {:<10} {:<20} {:<20} {:<20} {:<40} {:<20} \ + print( + "{:<100} {:<10} {:<20} {:<20} {:<20} {:<40} {:<20} \ {:<20} {:<20} {:<20} {:<20} {:<20} {:<20} {:<20} {:<20}".format( - 'line', - 'line_idx', - 'is_class_def_line', - 'is_in_class', - 'class_name', - 'parent_class_name', - 'class_def_line_idx', - 'class_end_line_idx', - 'is_func_def_line', - 'is_in_func', - 'func_name', - 'func_return_idx', - 'return_item', - 'func_def_line_idx', - 'func_end_line_idx',) + "line", + "line_idx", + "is_class_def_line", + "is_in_class", + "class_name", + "parent_class_name", + "class_def_line_idx", + "class_end_line_idx", + "is_func_def_line", + "is_in_func", + "func_name", + "func_return_idx", + "return_item", + "func_def_line_idx", + "func_end_line_idx", + ) ) - for path in globals.list_code_path: - code = open(path, 'r').read() - lines = code.split('\n') + code = open(path, "r").read() + lines = code.split("\n") line_idx = 0 is_multi_line_comment = False @@ -100,11 +104,11 @@ def register_code_line(): CL.indent_level = get_line_indent_level(line) is_multi_line_comment, end_multi_line_comment_flag = multi_line_comment_detection( - line, is_multi_line_comment, end_multi_line_comment_flag) + line, is_multi_line_comment, end_multi_line_comment_flag + ) CL.is_multi_line_comment = is_multi_line_comment - is_single_line_comment_or_empty = single_line_comment_or_empty_line_detection( - line) + is_single_line_comment_or_empty = single_line_comment_or_empty_line_detection(line) CL.is_single_line_comment_or_empty = is_single_line_comment_or_empty # class @@ -114,9 +118,8 @@ def register_code_line(): is_class_def_line = True line_ls = line.lstrip() if "(" in line_ls: # "class A(B):" - class_name = line_ls[line_ls.find(" ")+1:line_ls.find("(")] - parent_content = line_ls[line_ls.find( - "(")+1:line_ls.find(")")] + class_name = line_ls[line_ls.find(" ") + 1 : line_ls.find("(")] + parent_content = line_ls[line_ls.find("(") + 1 : line_ls.find(")")] if "," in parent_content: # "class A(B, C):" parent_class_name = [] parent_content_items = parent_content.split(", ") @@ -125,7 +128,7 @@ def register_code_line(): else: # "class A(B):" parent_class_name = [parent_content] else: # "class A:" - class_name = line_ls[line_ls.find(" ")+1:line_ls.find(":")] + class_name = line_ls[line_ls.find(" ") + 1 : line_ls.find(":")] parent_class_name = [] # search for class end line @@ -141,25 +144,24 @@ def register_code_line(): except: # end of file situation class_end_line_idx = search_idx break - following_indent_level = get_line_indent_level( - following_line) + following_indent_level = get_line_indent_level(following_line) _is_multi_line_comment, _end_multi_line_comment_flag = multi_line_comment_detection( - following_line, _is_multi_line_comment, _end_multi_line_comment_flag) - _is_single_line_comment_or_empty = single_line_comment_or_empty_line_detection( - following_line) + following_line, _is_multi_line_comment, _end_multi_line_comment_flag + ) + _is_single_line_comment_or_empty = single_line_comment_or_empty_line_detection(following_line) # judge_1: indent is equal to def indent judge_1 = following_indent_level <= class_def_indent_level # judge_2: not starting with")" try: - judge_2 = True if ( - following_line != "" and following_line[following_indent_level] != ")") else False + judge_2 = ( + True if (following_line != "" and following_line[following_indent_level] != ")") else False + ) except: judge_2 = False # judge_3: is not a comment or empty line - judge_3 = True if ( - not _is_multi_line_comment and not _is_single_line_comment_or_empty) else False + judge_3 = True if (not _is_multi_line_comment and not _is_single_line_comment_or_empty) else False if judge_1 and judge_2 and judge_3: search_following_lines = False @@ -188,7 +190,7 @@ def register_code_line(): if not is_in_func and "def " in line: is_in_func = True is_func_def_line = True - func_name = line[line.find("def")+4:line.find("(")] + func_name = line[line.find("def") + 4 : line.find("(")] # search for func end line func_def_indent_level = get_line_indent_level(line) @@ -203,30 +205,28 @@ def register_code_line(): except: # end of file situation func_end_line_idx = search_idx break - following_indent_level = get_line_indent_level( - following_line) + following_indent_level = get_line_indent_level(following_line) if "return" in following_line: func_return_idx = search_idx - return_item = following_line[following_line.find( - "return")+7:].strip() + return_item = following_line[following_line.find("return") + 7 :].strip() _is_multi_line_comment, _end_multi_line_comment_flag = multi_line_comment_detection( - following_line, _is_multi_line_comment, _end_multi_line_comment_flag) - _is_single_line_comment_or_empty = single_line_comment_or_empty_line_detection( - following_line) + following_line, _is_multi_line_comment, _end_multi_line_comment_flag + ) + _is_single_line_comment_or_empty = single_line_comment_or_empty_line_detection(following_line) # judge_1: indent is equal to def indent judge_1 = following_indent_level <= func_def_indent_level # judge_2: not starting with")" try: - judge_2 = True if ( - following_line != "" and following_line[following_indent_level] != ")") else False + judge_2 = ( + True if (following_line != "" and following_line[following_indent_level] != ")") else False + ) except: judge_2 = False # judge_3: is not a comment or empty line - judge_3 = True if ( - not _is_multi_line_comment and not _is_single_line_comment_or_empty) else False + judge_3 = True if (not _is_multi_line_comment and not _is_single_line_comment_or_empty) else False if judge_1 and judge_2 and judge_3: search_following_lines = False @@ -250,28 +250,27 @@ def register_code_line(): CL.func_end_line_idx = func_end_line_idx if globals.print_code_line_info: - print("{:<100} {:<10} {:<20} {:<20} {:<20} {:<40} {:<20} \ + print( + "{:<100} {:<10} {:<20} {:<20} {:<20} {:<40} {:<20} \ {:<20} {:<20} {:<20} {:<20} {:<20} {:<20} {:<20} {:<20}".format( - line[0:100], - line_idx, - is_class_def_line, - is_in_class, - class_name, - str( - parent_class_name), - class_def_line_idx, - class_end_line_idx, - is_func_def_line, - is_in_func, - func_name, - func_return_idx, - return_item[0:20], - func_def_line_idx, - func_end_line_idx) + line[0:100], + line_idx, + is_class_def_line, + is_in_class, + class_name, + str(parent_class_name), + class_def_line_idx, + class_end_line_idx, + is_func_def_line, + is_in_func, + func_name, + func_return_idx, + return_item[0:20], + func_def_line_idx, + func_end_line_idx, + ) ) - - globals.list_code_line_instance.append(CL) line_idx += 1 diff --git a/neural_coder/graphers/function.py b/neural_coder/graphers/function.py index dfef282d48f..4740b0a3d4f 100644 --- a/neural_coder/graphers/function.py +++ b/neural_coder/graphers/function.py @@ -12,38 +12,39 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging from typing import List -from ..utils.line_operation import get_line_indent_level + from .. import globals -import logging +from ..utils.line_operation import get_line_indent_level -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) def register_func_wrap_pair(): - """ register all relationships of ( [function name] : [return_item] ) pair of the list of code path provided + """Register all relationships of ( [function name] : [return_item] ) pair of the list of code path provided but only for "return xxx()" (return a function w/o class prefix) or "return xxx" (return an instance) e.g. def a1(): - return b1() + + return b1() def b1(): return x def c(): return T.q() - INPUT: + INPUT: ["example.py"] (above code snippet) OUTPUT: globals.list_all_function_return_item = ["b1", "x"] globals.list_all_function_name = ["a1", "b1"] """ - logger.info( - f"Analyzing function wrapping relationship for call graph analysis...") + logger.info("Analyzing function wrapping relationship for call graph analysis...") for path in globals.list_code_path: - code = open(path, 'r').read() - lines = code.split('\n') + code = open(path, "r").read() + lines = code.split("\n") line_idx = 0 is_in_function = False func_end_line_idx = -1 @@ -57,7 +58,7 @@ def c(): # handle function's defnition line, to initiate a function if not is_in_function and "def " in line: # only deal with outermost def - function_name = line[line.find("def")+4:line.find("(")] + function_name = line[line.find("def") + 4 : line.find("(")] def_indent_level = get_line_indent_level(line) function_def_line_idx = line_idx @@ -80,8 +81,9 @@ def c(): judge_1 = following_indent_level <= def_indent_level # judge_2: not starting with")" try: - judge_2 = True if ( - following_line != "" and following_line[following_indent_level] != ")") else False + judge_2 = ( + True if (following_line != "" and following_line[following_indent_level] != ")") else False + ) except: judge_2 = False # judge_3: is not a comment @@ -109,7 +111,7 @@ def c(): if is_in_function and line_idx < func_end_line_idx: # handle return if "return" in line: - line_s = line[line.find("return")+7:].strip() + line_s = line[line.find("return") + 7 :].strip() # line_s common case: 1. "" 2. "xxx" 3. "xxx, xxx" 3. "xxx()" 4. "xxx(xxx)" 5. "xxx(xxx, xxx)" if line_s == "": # case 1 pass @@ -118,36 +120,31 @@ def c(): elif 'f"' in line or "#" in line or "if" in line or "." in line or '""' in line or "+" in line: pass elif "(" in line_s: # case 4 or case 5 - return_item = line_s[:line_s.find("(")] - globals.list_all_function_return_item.append( - return_item) + return_item = line_s[: line_s.find("(")] + globals.list_all_function_return_item.append(return_item) globals.list_all_function_name.append(function_name) elif ", " in line_s: # case 3 ls = line_s.split(", ") for return_item in ls: - globals.list_all_function_return_item.append( - return_item) - globals.list_all_function_name.append( - function_name) + globals.list_all_function_return_item.append(return_item) + globals.list_all_function_name.append(function_name) else: # case 2 return_item = line_s - globals.list_all_function_return_item.append( - return_item) + globals.list_all_function_return_item.append(return_item) globals.list_all_function_name.append(function_name) line_idx += 1 continue - logger.debug( - f"globals.list_all_function_name: {globals.list_all_function_name}") - logger.debug( - f"globals.list_all_function_return_item: {globals.list_all_function_return_item}") + logger.debug(f"globals.list_all_function_name: {globals.list_all_function_name}") + logger.debug(f"globals.list_all_function_return_item: {globals.list_all_function_return_item}") def get_all_wrap_children(base_function_name: str) -> List: - """get all wrapper children names of the base function name + """Get all wrapper children names of the base function name e.g. class Net(nn.Module): + xxx # def _resnet(): diff --git a/neural_coder/graphers/model.py b/neural_coder/graphers/model.py index 28b20c4195a..70739b124ea 100644 --- a/neural_coder/graphers/model.py +++ b/neural_coder/graphers/model.py @@ -14,15 +14,16 @@ # FOR PYTORCH ONLY -from .function import get_all_wrap_children -from ..utils.line_operation import get_line_indent_level, of_definition_format +import logging from typing import List + from .. import globals -import logging +from ..utils.line_operation import get_line_indent_level, of_definition_format +from .function import get_all_wrap_children -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) @@ -61,11 +62,16 @@ def print_info(self): # search nnModule classes def register_nnModule_class(): - logger.info(f"Analyzing nn.Module class definitions in all files ...") + logger.info("Analyzing nn.Module class definitions in all files ...") # search raw nnModule class (e.g. class ClassName(nn.Module):) for cl in globals.list_code_line_instance: - parent_class_has_nnModule = list(set(cl.parent_class_name) & set( - ["nn.Module", "torch.nn.Module", "nn.Sequential", "torch.Sequential", "_BaseAutoModelClass"])) != [] + parent_class_has_nnModule = ( + list( + set(cl.parent_class_name) + & set(["nn.Module", "torch.nn.Module", "nn.Sequential", "torch.Sequential", "_BaseAutoModelClass"]) + ) + != [] + ) if cl.is_class_def_line and parent_class_has_nnModule: CD = ClassDefinition( class_name=cl.class_name, @@ -85,8 +91,7 @@ def register_nnModule_class(): while do_search: list_child_class_name = [] for cl in globals.list_code_line_instance: - parent_class_has_nnModule = list( - set(cl.parent_class_name) & set(search_scope)) != [] + parent_class_has_nnModule = list(set(cl.parent_class_name) & set(search_scope)) != [] if cl.is_class_def_line and parent_class_has_nnModule: CD = ClassDefinition( class_name=cl.class_name, @@ -111,8 +116,7 @@ def register_nnModule_class(): # search nnModule instance definition def register_nnModule_instance_definition(): - logger.info( - f"Analyzing nn.Module instance (model instance) definitions in all files ...") + logger.info("Analyzing nn.Module instance (model instance) definitions in all files ...") # search model definition lines like "model_name = ClassName(xxx)" def_cl = [] @@ -120,10 +124,12 @@ def register_nnModule_instance_definition(): if not cl.is_multi_line_comment and not cl.is_single_line_comment_or_empty: is_def, lhs, rhs = of_definition_format(cl.line_content) stripped = cl.line_content.replace(" ", "") - if is_def and \ - rhs in globals.list_class_name + ["Module", "Sequential"] and \ - cl.class_name not in globals.list_class_name and \ - "(" not in cl.return_item: + if ( + is_def + and rhs in globals.list_class_name + ["Module", "Sequential"] + and cl.class_name not in globals.list_class_name + and "(" not in cl.return_item + ): def_cl.append(cl) elif is_def and "__dict__[args.arch]" in cl.line_content: def_cl.append(cl) @@ -134,8 +140,11 @@ def register_nnModule_instance_definition(): def_cl.append(cl) elif is_def and "keras.Sequential" in cl.line_content: def_cl.append(cl) - elif is_def and "." in stripped and \ - stripped[stripped.find("=") + 1: stripped.find(".")] in globals.list_class_name: + elif ( + is_def + and "." in stripped + and stripped[stripped.find("=") + 1 : stripped.find(".")] in globals.list_class_name + ): def_cl.append(cl) list_lhs = [] @@ -176,8 +185,11 @@ def register_nnModule_instance_definition(): globals.list_model_def_instance.append(MD) elif list_is_in_func[i]: # situation 2: "model = Net()" is inside a function # situation 2-1: the function does not return another model's name, and is not __init__ - if list_return_item[i] not in list_lhs and \ - list_func_name[i] != "__init__" and "tokenizer" not in list_lhs[i]: + if ( + list_return_item[i] not in list_lhs + and list_func_name[i] != "__init__" + and "tokenizer" not in list_lhs[i] + ): # register this model globals.list_model_name.append(list_lhs[i]) MD = ModelDefinition( @@ -193,13 +205,15 @@ def register_nnModule_instance_definition(): globals.list_model_def_instance.append(MD) # situation 2-2: the function returns another model's name elif list_return_item[i] in list_lhs: - globals.list_wrapper_base_function_name.append( - list_func_name[i]) + globals.list_wrapper_base_function_name.append(list_func_name[i]) # register function_name like "xxx" in "def xxx() ... return NNModuleClass()" for cl in globals.list_code_line_instance: - if cl.is_in_func and cl.line_idx == cl.func_return_idx and \ - cl.return_item[:cl.return_item.find("(")] in globals.list_class_name: + if ( + cl.is_in_func + and cl.line_idx == cl.func_return_idx + and cl.return_item[: cl.return_item.find("(")] in globals.list_class_name + ): globals.list_wrapper_base_function_name.append(cl.func_name) # for all base function_name (that returns nnModule instance), @@ -208,47 +222,53 @@ def register_nnModule_instance_definition(): globals.list_wrapper_children_function_name = [] for i in globals.list_wrapper_base_function_name: globals.list_wrapper_children_function_name += get_all_wrap_children(i) - globals.list_wrapper_all_function_name = globals.list_wrapper_base_function_name + \ - globals.list_wrapper_children_function_name + globals.list_wrapper_all_function_name = ( + globals.list_wrapper_base_function_name + globals.list_wrapper_children_function_name + ) globals.list_wrapper_all_function_name = list(set(globals.list_wrapper_all_function_name)) # register function_name like "xxx" in "def xxx() ... model = some_wrapper_function() ... return model" for cl in globals.list_code_line_instance: if cl.is_in_func and not cl.is_multi_line_comment and not cl.is_single_line_comment_or_empty: is_def, lhs, rhs = of_definition_format(cl.line_content) - if is_def and \ - rhs in globals.list_wrapper_all_function_name and \ - cl.class_name not in globals.list_class_name and \ - cl.return_item == lhs: + if ( + is_def + and rhs in globals.list_wrapper_all_function_name + and cl.class_name not in globals.list_class_name + and cl.return_item == lhs + ): globals.list_wrapper_base_function_name.append(cl.func_name) # (again) # for all base function_name (that returns nnModule instance), # find all wrapper function_name of the base wrapper function_name - globals.list_wrapper_base_function_name = list( - set(globals.list_wrapper_base_function_name)) + globals.list_wrapper_base_function_name = list(set(globals.list_wrapper_base_function_name)) for i in globals.list_wrapper_base_function_name: globals.list_wrapper_children_function_name += get_all_wrap_children(i) - globals.list_wrapper_all_function_name += globals.list_wrapper_base_function_name + \ - globals.list_wrapper_children_function_name - globals.list_wrapper_all_function_name = list( - set(globals.list_wrapper_all_function_name)) + globals.list_wrapper_all_function_name += ( + globals.list_wrapper_base_function_name + globals.list_wrapper_children_function_name + ) + globals.list_wrapper_all_function_name = list(set(globals.list_wrapper_all_function_name)) # print all wrapper function names for debug purpose - logger.debug( - f"globals.list_wrapper_all_function_name: {globals.list_wrapper_all_function_name}") + logger.debug(f"globals.list_wrapper_all_function_name: {globals.list_wrapper_all_function_name}") for cl in globals.list_code_line_instance: if not cl.is_multi_line_comment and not cl.is_single_line_comment_or_empty and cl.func_name != "__init__": is_def, lhs, rhs = of_definition_format(cl.line_content) - if is_def and \ - rhs in globals.list_wrapper_all_function_name and \ - rhs not in ["self.model", "model", "self.call", "call"] and \ - "forward" not in rhs and \ - "config" not in lhs and "congfig" not in lhs and "," not in lhs and "inference" not in lhs and \ - "tokenizer" not in lhs and \ - cl.class_name not in globals.list_class_name and \ - cl.func_name not in globals.list_wrapper_all_function_name: + if ( + is_def + and rhs in globals.list_wrapper_all_function_name + and rhs not in ["self.model", "model", "self.call", "call"] + and "forward" not in rhs + and "config" not in lhs + and "congfig" not in lhs + and "," not in lhs + and "inference" not in lhs + and "tokenizer" not in lhs + and cl.class_name not in globals.list_class_name + and cl.func_name not in globals.list_wrapper_all_function_name + ): # register this model globals.list_model_name.append(lhs) MD = ModelDefinition( @@ -264,7 +284,6 @@ def register_nnModule_instance_definition(): MD.print_info() globals.list_model_def_instance.append(MD) - globals.list_model_name = list(set(globals.list_model_name)) # print all model names for debug purpose diff --git a/neural_coder/graphers/preloads/transformers.yaml b/neural_coder/graphers/preloads/transformers.yaml index eacda4ca635..b1ebe26de23 100644 --- a/neural_coder/graphers/preloads/transformers.yaml +++ b/neural_coder/graphers/preloads/transformers.yaml @@ -2157,4 +2157,4 @@ class: 'LayoutLMv3Encoder', 'Data2VecAudioForCTC', 'NezhaLayer' - ] \ No newline at end of file + ] diff --git a/neural_coder/interface.py b/neural_coder/interface.py index 9b5b2ee841d..98206ee3e06 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os +import re import subprocess -import logging import time + import yaml -import re from . import globals @@ -27,19 +28,20 @@ def detect_device_(logger): # device detection - logger.info(f"Device detection started ...") + logger.info("Device detection started ...") from .utils.device import detect_device + detect_device() if globals.device == "cpu_with_amx": - logger.info(f"Device: CPU with AMX") + logger.info("Device: CPU with AMX") elif globals.device == "cpu_without_amx": - logger.info(f"Device: CPU without AMX") + logger.info("Device: CPU without AMX") elif globals.device == "intel_gpu": - logger.info(f"Device: Intel(R) GPU") + logger.info("Device: Intel(R) GPU") elif globals.device == "cuda": - logger.info(f"Device: CUDA") + logger.info("Device: CUDA") elif globals.device == "mutli": - logger.info(f"Device: Multi-Device") + logger.info("Device: Multi-Device") def enable( @@ -63,22 +65,19 @@ def enable( ncore_per_instance=-1, # only for "self_defined" mode ninstances=-1, # only for "self_defined" mode bench_batch_size=-1, # only for "self_defined" mode - test_code_line=False, # print code line info for debug use + test_code_line=False, # print code line info for debug use cache_load_transformers=True, - optimum_quant_config="", # only for HF optimum optimizations, yaml or hub path + optimum_quant_config="", # only for HF optimum optimizations, yaml or hub path use_inc=True, use_modular=False, modular_item="", ): - """enable a feature or a couple of features for the code - - """ + """Enable a feature or a couple of features for the code.""" ### Preparation # set up workspace - ws_path = "neural_coder_workspace/" + \ - "enable" + str(time.time()).replace(".","") + "/" + ws_path = "neural_coder_workspace/" + "enable" + str(time.time()).replace(".", "") + "/" os.makedirs(ws_path) # user parameters @@ -89,10 +88,9 @@ def enable( # set up logging logger = logging.getLogger(ws_path) logger.setLevel(globals.logging_level) - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s: - %(message)s', - datefmt='%Y-%m-%d %H:%M:%S') + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s: - %(message)s", datefmt="%Y-%m-%d %H:%M:%S") - fh = logging.FileHandler(ws_path + 'enable.log') + fh = logging.FileHandler(ws_path + "enable.log") fh.setLevel(globals.logging_level) fh.setFormatter(formatter) ch = logging.StreamHandler() @@ -106,12 +104,12 @@ def enable( detect_device_(logger) # print key inputs - logger.info(f"Enabling started ...") + logger.info("Enabling started ...") logger.info(f"code: {code}") logger.info(f"features: {features}") # feature list for reference - ''' + """ feature_list = [ "pytorch_jit_script", "pytorch_jit_script_ofi", @@ -149,13 +147,14 @@ def enable( "onnx_inc_dynamic_quant", "inc_auto", ] - ''' + """ ### Enable Benchmark (if run_bench) - + if run_bench: # add "pytorch_change_batch_size" to features from .utils.cpu_info import get_num_cpu_cores + ncores = get_num_cpu_cores() if mode == "throughput": target_batch_size = 2 * ncores @@ -173,21 +172,21 @@ def enable( if "pytorch_benchmark" not in features: features.append("pytorch_benchmark") - logger.info( - f"Will perform benchmark on [{mode}] mode with batch size [{target_batch_size}] ...") + logger.info(f"Will perform benchmark on [{mode}] mode with batch size [{target_batch_size}] ...") #### Feature Enabling - globals.num_benchmark_iteration = str(num_benchmark_iteration + 10) # 10: warmup iteration number + globals.num_benchmark_iteration = str(num_benchmark_iteration + 10) # 10: warmup iteration number globals.cache_load_transformers = cache_load_transformers globals.optimum_quant_config = optimum_quant_config globals.use_modular = use_modular globals.modular_item = modular_item - + # move "pytorch_benchmark" to the last from .utils.common import move_element_to_last + features = move_element_to_last(features, "pytorch_benchmark") # not in harness scope @@ -199,17 +198,19 @@ def enable( "tensorflow_inc", "change_trainer_to_nlptrainer", ] - + # # features that need creating dummy dataloader (when needed) first # if "pytorch_inc_static_quant_fx" in features or \ # "pytorch_inc_static_quant_ipex" in features: # features = ["pytorch_dummy_dataloader"] + features - + # features that need reclaiming inputs first (e.g. for "for step, inputs in enumerate(dataloader)") - if "pytorch_jit_trace" in features or \ - "pytorch_jit_trace_ofi" in features or \ - "pytorch_inc_static_quant_fx" in features or \ - "pytorch_inc_static_quant_ipex" in features: + if ( + "pytorch_jit_trace" in features + or "pytorch_jit_trace_ofi" in features + or "pytorch_inc_static_quant_fx" in features + or "pytorch_inc_static_quant_ipex" in features + ): features = ["pytorch_reclaim_inputs"] + features # intel_extension_for_transformers @@ -223,9 +224,11 @@ def enable( globals.reset_globals() from .utils import handle_user_input + globals.list_code_path, num_user_code_path = handle_user_input.get_all_code_path(code) from .coders.autoinc import domain + code_domain = domain.determine_domain(globals.list_code_path[0]) if code_domain == "transformers_trainer": if "pytorch_benchmark" in features: @@ -236,10 +239,10 @@ def enable( ## Feature Transformation for idx_feature, feature in enumerate(features): - # "inc_auto" auto selection of feature according to fwk if feature == "inc_auto": from .coders.autoinc import domain + code_domain = domain.determine_domain(globals.list_code_path[0]) if code_domain == "keras_script": feature = "keras_inc" @@ -254,6 +257,7 @@ def enable( globals.reset_globals() from .utils import handle_user_input + globals.list_code_path, num_user_code_path = handle_user_input.get_all_code_path(code) if len(transformed_list_code_path) > 0: globals.list_code_path = transformed_list_code_path @@ -265,56 +269,59 @@ def enable( # by the order in code_path # global behaviors - logger.info( - f"Performing code transformation for feature: [{feature}] ...") + logger.info(f"Performing code transformation for feature: [{feature}] ...") for i in globals.list_code_path: - list_transformed_code.append(open(i, 'r').read()) + list_transformed_code.append(open(i, "r").read()) ## 1. Features in Harness Scope if feature not in features_outside_harness: + from .coders.transform import execute_indent_transformation, execute_insert_transformation from .graphers.code_line import register_code_line - from .graphers.model import register_nnModule_class, register_nnModule_instance_definition from .graphers.function import register_func_wrap_pair - from .coders.transform import execute_insert_transformation, execute_indent_transformation + from .graphers.model import register_nnModule_class, register_nnModule_instance_definition # code analysis (call graph, type inference etc) register_code_line() register_func_wrap_pair() register_nnModule_class() if cache_load_transformers: - preload_file = open(os.path.dirname(__file__) + - "/graphers/preloads/" + "transformers" + ".yaml") + preload_file = open(os.path.dirname(__file__) + "/graphers/preloads/" + "transformers" + ".yaml") preload_dict = yaml.load(preload_file, Loader=yaml.BaseLoader) globals.list_class_name += preload_dict["class"] register_nnModule_instance_definition() # register transformation - if feature == "pytorch_dummy_dataloader": # is not in harness scope, but needs call graph and type inference + if ( + feature == "pytorch_dummy_dataloader" + ): # is not in harness scope, but needs call graph and type inference from .coders.pytorch.dummy_dataloader import DummyDataLoader + opt = DummyDataLoader(globals.list_model_def_instance) opt.register_transformation() elif feature == "pytorch_reclaim_inputs": from .coders.pytorch.reclaim_inputs import ReclaimInputs + opt = ReclaimInputs(globals.list_model_def_instance) opt.register_transformation() elif feature == "pytorch_reclaim_inference_transformers_trainer": from .coders.pytorch.reclaim_inference_transformers_trainer import ReclaimInferenceTransformersTrainer + opt = ReclaimInferenceTransformersTrainer(globals.list_model_def_instance) opt.register_transformation() elif feature in [ - "pytorch_inc_dynamic_quant", - "pytorch_inc_static_quant_fx", - "pytorch_inc_static_quant_ipex", - "pytorch_inc_huggingface_optimum_static", - "pytorch_inc_huggingface_optimum_dynamic", - "onnx_inc_static_quant_qlinear", - "onnx_inc_static_quant_qdq", - "onnx_inc_dynamic_quant", - "intel_extension_for_transformers", - ]: - + "pytorch_inc_dynamic_quant", + "pytorch_inc_static_quant_fx", + "pytorch_inc_static_quant_ipex", + "pytorch_inc_huggingface_optimum_static", + "pytorch_inc_huggingface_optimum_dynamic", + "onnx_inc_static_quant_qlinear", + "onnx_inc_static_quant_qdq", + "onnx_inc_dynamic_quant", + "intel_extension_for_transformers", + ]: # determine domain from .coders.autoinc.domain import determine_domain + globals.code_domain = determine_domain(globals.list_code_path[0]) # for transformers code, enable optimum-intel api by default @@ -336,6 +343,7 @@ def enable( from .coders.autoinc.autoinc_harness import AutoInc_Harness from .coders.autoinc.calib_dataloader import Calib_Dataloader from .coders.autoinc.eval_func import Eval_Func + opt = Calib_Dataloader() opt.register_transformation() @@ -346,6 +354,7 @@ def enable( opt.register_transformation() else: from .coders.pytorch.harness import Harness + opt = Harness(backend=feature) opt.register_transformation() @@ -361,26 +370,32 @@ def enable( if "batch_size" in list_transformed_code[0]: # entry code has "batch_size" globals.batch_size_changed = True from .coders.pytorch.batch_size import BatchSizeCoder + globals.target_batch_size = str(target_batch_size) list_transformed_code[i] = BatchSizeCoder(list_transformed_code[i]).transform() # CUDA to CPU if "pytorch_cuda_to_cpu" in features: from .coders.pytorch.cuda_to_cpu import CudaToCpu + list_transformed_code[i] = CudaToCpu(list_transformed_code[i]).transform() # Lightning if "pytorch_lightning_bf16_cpu" in features: from .coders.pytorch.lightning import Lightning + list_transformed_code[i] = Lightning(list_transformed_code[i]).transform() # TF & Keras AMP if "tensorflow_mixed_precision" in features: from .coders.tensorflow.amp import TensorFlowKerasAMP + list_transformed_code[i] = TensorFlowKerasAMP(list_transformed_code[i]).transform() if feature == "tensorflow_inc": from .coders.tensorflow.inc import TensorFlowKerasINC + list_transformed_code[i] = TensorFlowKerasINC(list_transformed_code[i]).transform() # Change Trainer to NLPTrainer (only for intel_extension_for_pytorch) if "change_trainer_to_nlptrainer" in features: from .coders.pytorch.change_trainer_to_nlptrainer import TrainerToNLPTrainer + list_transformed_code[i] = TrainerToNLPTrainer(list_transformed_code[i]).transform() logger.info(f"Code transformation for feature: [{feature}] finished.") @@ -404,6 +419,7 @@ def enable( globals.print_code_line_info = True from .utils import handle_user_input + globals.list_code_path, num_user_code_path = handle_user_input.get_all_code_path(code) if len(transformed_list_code_path) > 0: globals.list_code_path = transformed_list_code_path @@ -415,12 +431,12 @@ def enable( # by the order in code_path for i in globals.list_code_path: - list_transformed_code.append(open(i, 'r').read()) - + list_transformed_code.append(open(i, "r").read()) + + from .coders.transform import execute_indent_transformation, execute_insert_transformation from .graphers.code_line import register_code_line - from .graphers.model import register_nnModule_class, register_nnModule_instance_definition from .graphers.function import register_func_wrap_pair - from .coders.transform import execute_insert_transformation, execute_indent_transformation + from .graphers.model import register_nnModule_class, register_nnModule_instance_definition # code analysis (call graph, type inference etc) register_code_line() @@ -438,8 +454,7 @@ def enable( if path_transformed[-25:] == "_nc_enabled_nc_enabled.py": continue cmd_gen_patch = "diff -up " + path + " " + path_transformed - sp_gen_patch = subprocess.Popen( - cmd_gen_patch, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec + sp_gen_patch = subprocess.Popen(cmd_gen_patch, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec sp_gen_patch.wait() this_patch, _ = sp_gen_patch.communicate() this_patch = str(this_patch)[2:-1] @@ -447,14 +462,15 @@ def enable( if save_patch_path == "": save_patch_path = ws_path + "neural_coder_patch" open(save_patch_path + patch_suffix, "w").write( - whole_patch_user_code.replace(r'\n', '\n').replace(r'\t', '\t').replace(r"\'", "\'")) - abs_patch_path = os.path.abspath( - save_patch_path + patch_suffix) + whole_patch_user_code.replace(r"\n", "\n").replace(r"\t", "\t").replace(r"\'", "'") + ) + abs_patch_path = os.path.abspath(save_patch_path + patch_suffix) logger.info(f"The patch is saved to: [{abs_patch_path}]") if overwrite: sp_overwrite = subprocess.Popen( - "patch -d/ -p0 < " + abs_patch_path, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec + "patch -d/ -p0 < " + abs_patch_path, env=os.environ, shell=True, stdout=subprocess.PIPE + ) # nosec sp_overwrite.wait() # os.remove(abs_patch_path) # remove patch after overwrite @@ -464,7 +480,8 @@ def enable( path_transformed = path[:-3] + "_nc_enabled.py" cmd_gen_patch = "diff -up " + path + " " + path_transformed sp_gen_patch = subprocess.Popen( - cmd_gen_patch, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec + cmd_gen_patch, env=os.environ, shell=True, stdout=subprocess.PIPE + ) # nosec sp_gen_patch.wait() this_patch, _ = sp_gen_patch.communicate() this_patch = str(this_patch)[2:-1] @@ -472,11 +489,10 @@ def enable( if save_patch_path == "": save_patch_path = ws_path + "neural_coder_patch_import_modules" open(save_patch_path + patch_suffix, "w").write( - whole_patch_import_modules.replace(r'\n', '\n').replace(r'\t', '\t').replace(r"\'", "\'")) - abs_patch_path = os.path.abspath( - save_patch_path + patch_suffix) - logger.info( - f"The patch for imported modules is saved to: [{abs_patch_path}]") + whole_patch_import_modules.replace(r"\n", "\n").replace(r"\t", "\t").replace(r"\'", "'") + ) + abs_patch_path = os.path.abspath(save_patch_path + patch_suffix) + logger.info(f"The patch for imported modules is saved to: [{abs_patch_path}]") # remove copy for imports if remove_copy: @@ -520,16 +536,15 @@ def bench( ninstances=-1, # only for "self_defined" mode bench_batch_size=-1, # only for "self_defined" mode ): - """benchmark on either "optimized code", or "patch" + "original code" + """Benchmark on either "optimized code", or "patch" + "original code" it does not enable benchmark code lines, or enable change of batch size all the enabling should be done within enable API which means the "optimized code" should already have "pytorch_benchmark" and "pytorch_change_batch_size" enabled or the "patch" should already have the code modification - for "pytorch_benchmark" and "pytorch_change_batch_size" in it - """ + for "pytorch_benchmark" and "pytorch_change_batch_size" in it.""" # set up workspace - ws_path = "neural_coder_workspace/" + "bench" + str(time.time()).replace(".","") + "/" + ws_path = "neural_coder_workspace/" + "bench" + str(time.time()).replace(".", "") + "/" os.makedirs(ws_path) # set up logging @@ -538,10 +553,9 @@ def bench( logger = logging.getLogger(ws_path) logger.setLevel(globals.logging_level) - formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s: - %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s: - %(message)s", datefmt="%Y-%m-%d %H:%M:%S") - fh = logging.FileHandler(ws_path + 'bench.log') + fh = logging.FileHandler(ws_path + "bench.log") fh.setLevel(globals.logging_level) fh.setFormatter(formatter) ch = logging.StreamHandler() @@ -555,7 +569,7 @@ def bench( detect_device_(logger) # print key inputs - logger.info(f"Benchmarking started ...") + logger.info("Benchmarking started ...") logger.info(f"code: {code}") logger.info(f"mode: {mode}") @@ -568,28 +582,29 @@ def bench( elif type(code) == str: entry_code = code else: - logger.error( - f"You have to specify an entry_code of your code: [{code}]") + logger.error(f"You have to specify an entry_code of your code: [{code}]") quit() # patch if patch_path != "": sp_patch = subprocess.Popen( - "patch -d/ -p0 < " + patch_path, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec + "patch -d/ -p0 < " + patch_path, env=os.environ, shell=True, stdout=subprocess.PIPE + ) # nosec sp_patch.wait() # if mode is "self_defined", user must specify ncpi, nins and bs if mode == "self_defined": if ncore_per_instance == -1 or ninstances == -1 or bench_batch_size == -1: logger.error( - f"You have to specify ncore_per_instance," - f"ninstances and bench_batch_size for self-defined benchmark mode.") + "You have to specify ncore_per_instance," + "ninstances and bench_batch_size for self-defined benchmark mode." + ) quit() # numactl from .utils import numa_launcher - from .utils.cpu_info import get_num_cpu_cores + ncores = get_num_cpu_cores() # numactl setup for different modes @@ -612,38 +627,36 @@ def bench( # set cpu env variables if cpu_set_env: - cmd_env = '' - cmd_env += 'export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so' - cmd_env += ' && ' - cmd_env += 'export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so' - cmd_env += ' && ' + cmd_env = "" + cmd_env += "export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so" + cmd_env += " && " + cmd_env += "export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so" + cmd_env += " && " cmd_env += 'export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,' cmd_env += 'dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"' - cmd_env += ' && ' + cmd_env += " && " cmd_env += 'export KMP_AFFINITY="granularity=fine,compact,1,0"' - cmd_env += ' && ' - cmd_env += 'export KMP_BLOCKTIME=1' - cmd_env += ' && ' - cmd_env += 'export DNNL_PRIMITIVE_CACHE_CAPACITY=1024' - cmd_env += ' && ' - cmd_env += 'export KMP_SETTINGS=1' - - sp_set_env = subprocess.Popen( - cmd_env, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec + cmd_env += " && " + cmd_env += "export KMP_BLOCKTIME=1" + cmd_env += " && " + cmd_env += "export DNNL_PRIMITIVE_CACHE_CAPACITY=1024" + cmd_env += " && " + cmd_env += "export KMP_SETTINGS=1" + + sp_set_env = subprocess.Popen(cmd_env, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec sp_set_env.wait() # benchmark - logger.info(f"Start benchmark on the code ...") + logger.info("Start benchmark on the code ...") bench_log_path = ws_path + "performance.log" os.remove(bench_log_path) if os.path.exists(bench_log_path) else 0 args = [args] - numa_launcher.exec_launcher( - ncore_per_instance, ninstances, entry_code, args, bench_log_path) + numa_launcher.exec_launcher(ncore_per_instance, ninstances, entry_code, args, bench_log_path) # get performance (throughput and latency) - bench_log = open(bench_log_path, "r", encoding='unicode_escape').read().split('\n') + bench_log = open(bench_log_path, "r", encoding="unicode_escape").read().split("\n") IPS = [] MSPI = 0 count_MSPI = 0 @@ -657,30 +670,30 @@ def bench( for line in bench_log: if "Neural_Coder_Bench_IPS" in line: try: - IPS.append(float(line[line.find(":")+3:])) + IPS.append(float(line[line.find(":") + 3 :])) except ValueError as ve: pass if "Neural_Coder_Bench_MSPI" in line: try: - MSPI += float(line[line.find(":")+3:]) + MSPI += float(line[line.find(":") + 3 :]) count_MSPI += 1 except ValueError as ve: pass if "Neural_Coder_Bench_P50" in line: try: - P50 += float(line[line.find(":")+3:]) + P50 += float(line[line.find(":") + 3 :]) count_P50 += 1 except ValueError as ve: pass if "Neural_Coder_Bench_P90" in line: try: - P90 += float(line[line.find(":")+3:]) + P90 += float(line[line.find(":") + 3 :]) count_P90 += 1 except ValueError as ve: pass if "Neural_Coder_Bench_P99" in line: try: - P99 += float(line[line.find(":")+3:]) + P99 += float(line[line.find(":") + 3 :]) count_P99 += 1 except ValueError as ve: pass @@ -688,7 +701,7 @@ def bench( try: acc_int8 = float(re.search(r"\d+\.\d+", line).group()) acc_fp32 = float(re.search(r"(?<=\|)\d+\.\d+", line).group()) - acc_delta = round((acc_int8 - acc_fp32) / acc_fp32 * 100, 2) # percent of increase/decrease + acc_delta = round((acc_int8 - acc_fp32) / acc_fp32 * 100, 2) # percent of increase/decrease except ValueError as ve: pass @@ -698,7 +711,7 @@ def bench( IPS[-1] = IPS[-2] try: - if globals.batch_size_changed: # only times BS if BS has been modified, otherwise times 1 + if globals.batch_size_changed: # only times BS if BS has been modified, otherwise times 1 FPS = round(sum(IPS) / len(IPS) * ninstances * bench_batch_size, 3) else: FPS = round(sum(IPS) / len(IPS) * ninstances * 1, 3) @@ -731,7 +744,8 @@ def bench( # unpatch if patch_path != "": sp_unpatch = subprocess.Popen( - "patch -R -d/ -p0 < " + patch_path, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec + "patch -R -d/ -p0 < " + patch_path, env=os.environ, shell=True, stdout=subprocess.PIPE + ) # nosec sp_unpatch.wait() return [FPS, MSPI, P50, P90, P99, acc_delta], mode, os.path.abspath(ws_path) @@ -755,10 +769,8 @@ def superbench( use_inc=True, auto_quant=False, ): - # set up workspace - ws_path = "neural_coder_workspace/" + \ - "superbench" + str(time.time()).replace(".","") + "/" + ws_path = "neural_coder_workspace/" + "superbench" + str(time.time()).replace(".", "") + "/" os.makedirs(ws_path) # set up logging @@ -767,10 +779,9 @@ def superbench( logger = logging.getLogger(ws_path) logger.setLevel(globals.logging_level) - formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s: - %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s: - %(message)s", datefmt="%Y-%m-%d %H:%M:%S") - fh = logging.FileHandler(ws_path+'superbench.log') + fh = logging.FileHandler(ws_path + "superbench.log") fh.setLevel(globals.logging_level) fh.setFormatter(formatter) ch = logging.StreamHandler() @@ -785,12 +796,12 @@ def superbench( # print key inputs if auto_quant: - logger.info(f"Auto-Quant started ...") + logger.info("Auto-Quant started ...") logger.info(f"Code: {code}") logger.info(f"Benchmark Mode: {mode} mode") logger.debug(f"Number of benchmark iterations: {num_benchmark_iteration}") else: - logger.info(f"SuperBench started ...") + logger.info("SuperBench started ...") logger.info(f"Code: {code}") logger.info(f"Benchmark Mode: {mode} mode") logger.debug(f"Sweep Objective: {sweep_objective}") @@ -806,12 +817,12 @@ def superbench( elif type(code) == str: entry_code = code else: - logger.error( - f"You have to specify an entry_code of your code: [{code}]") + logger.error(f"You have to specify an entry_code of your code: [{code}]") quit() # detect device compatibility of entry code from .utils.device import detect_code_device_compatibility + detect_code_device_compatibility(entry_code) if sweep_objective == "feature": @@ -859,6 +870,7 @@ def superbench( standalones = [] standalones.append("") from itertools import combinations + for num_items in range(len(standalones_pool)): list_comb = list(combinations(standalones_pool, num_items + 1)) for item in list_comb: @@ -927,7 +939,8 @@ def superbench( if iteration_dynamic_adjust: num_benchmark_iteration = max(int(300 / (t_end - t_start)), 5) logger.debug( - f"Adjusted number of benchmark iterations after dry-run is {num_benchmark_iteration}") + f"Adjusted number of benchmark iterations after dry-run is {num_benchmark_iteration}" + ) dry_run = False def remove_if_have(list, element): @@ -941,29 +954,30 @@ def remove_if_have(list, element): if auto_quant: # convert feature name to display name for better user experience - if features == ['pytorch_inc_dynamic_quant']: + if features == ["pytorch_inc_dynamic_quant"]: features_display = "Intel INT8 (Dynamic)" - elif features == ['pytorch_inc_static_quant_fx']: + elif features == ["pytorch_inc_static_quant_fx"]: features_display = "Intel INT8 (Static)" - elif features == ['pytorch_inc_static_quant_ipex']: + elif features == ["pytorch_inc_static_quant_ipex"]: features_display = "Intel INT8 (IPEX)" - elif features == ['pytorch_inc_bf16']: + elif features == ["pytorch_inc_bf16"]: features_display = "Intel BF16" elif features == []: features_display = "The Original Model" logger.info( - f"Benchmark result (performance) of {features_display}" - f" is {bench_performance[0]} (FPS)") - logger.info( - f"Benchmark result (accuracy delta) of {features_display} is {bench_performance[5]} %") + f"Benchmark result (performance) of {features_display}" f" is {bench_performance[0]} (FPS)" + ) + logger.info(f"Benchmark result (accuracy delta) of {features_display} is {bench_performance[5]} %") else: logger.info( f"Benchmark result (performance) of optimization set [{features}]" - f" is [{bench_performance[0]}] (FPS)") + f" is [{bench_performance[0]}] (FPS)" + ) logger.info( f"Benchmark result (accuracy delta) of optimization set [{features}]" - f" is [{bench_performance[5]}] %") + f" is [{bench_performance[5]}] %" + ) d = {} # initialize dict d["features"] = features @@ -981,8 +995,11 @@ def remove_if_have(list, element): # print result print(f"Superbench result of sweeping [{sweep_objective}] printed below with sorted FPS: ") - print("{:<20} {:<20} {:<20} {:<120}".format( - 'Numactl Mode', 'Performance (FPS)', 'Accuracy Delta (%)', 'Features Applied')) + print( + "{:<20} {:<20} {:<20} {:<120}".format( + "Numactl Mode", "Performance (FPS)", "Accuracy Delta (%)", "Features Applied" + ) + ) sort_index = sorted( range(len(list_FPS)), @@ -1021,16 +1038,16 @@ def remove_if_have(list, element): if list_features[i] == []: original_model_performance = list_FPS[i] break - + if auto_quant: # convert feature name to display name for better user experience - if list_optimization_set_top3[0] == ['pytorch_inc_dynamic_quant']: + if list_optimization_set_top3[0] == ["pytorch_inc_dynamic_quant"]: best_optimization_display = "Intel INT8 (Dynamic)" - elif list_optimization_set_top3[0] == ['pytorch_inc_static_quant_fx']: + elif list_optimization_set_top3[0] == ["pytorch_inc_static_quant_fx"]: best_optimization_display = "Intel INT8 (Static)" - elif list_optimization_set_top3[0] == ['pytorch_inc_static_quant_ipex']: + elif list_optimization_set_top3[0] == ["pytorch_inc_static_quant_ipex"]: best_optimization_display = "Intel INT8 (IPEX)" - elif list_optimization_set_top3[0] == ['pytorch_inc_bf16']: + elif list_optimization_set_top3[0] == ["pytorch_inc_bf16"]: best_optimization_display = "Intel BF16" elif list_optimization_set_top3[0] == []: best_optimization_display = "The Original Model" @@ -1072,11 +1089,13 @@ def remove_if_have(list, element): result_p99_thp = [] if bench_feature == []: logger.error( - f'You must specify a feature (optimization set) ' - f'for benchmark when "sweep_objective" is "bench_config"') + "You must specify a feature (optimization set) " + 'for benchmark when "sweep_objective" is "bench_config"' + ) quit() else: from .utils.cpu_info import get_num_cpu_cores + ncores = get_num_cpu_cores() list_ncpi = [1, 2, 4, 8] for i in [1, 2, 4, 8]: @@ -1089,8 +1108,19 @@ def remove_if_have(list, element): for this_ncpi in list_ncpi: ncore_per_instance = this_ncpi ninstances = int(ncores / this_ncpi) - list_bs = [1, 2, 4, 8, this_ncpi * 1, this_ncpi * 2, this_ncpi * - 4, this_ncpi * 8, this_ncpi * 16, this_ncpi * 32, this_ncpi * 64] + list_bs = [ + 1, + 2, + 4, + 8, + this_ncpi * 1, + this_ncpi * 2, + this_ncpi * 4, + this_ncpi * 8, + this_ncpi * 16, + this_ncpi * 32, + this_ncpi * 64, + ] list_bs = list(set(list_bs)) list_bs.sort() if logging_level == "debug": @@ -1100,7 +1130,6 @@ def remove_if_have(list, element): for this_bs in list_bs: bench_batch_size = this_bs try: - if dry_run: t_start = time.time() @@ -1125,16 +1154,14 @@ def remove_if_have(list, element): num_benchmark_iteration = max(int(300 / (t_end - t_start)), 5) logger.debug( f"Adjusted number of benchmark iterations after dry-run is " - f"{num_benchmark_iteration}") + f"{num_benchmark_iteration}" + ) dry_run = False socket_regular_thp = bench_performance[0] - socket_p50_thp = round( - 1000 / bench_performance[2] * ninstances * bench_batch_size, 3) - socket_p90_thp = round( - 1000 / bench_performance[3] * ninstances * bench_batch_size, 3) - socket_p99_thp = round( - 1000 / bench_performance[4] * ninstances * bench_batch_size, 3) + socket_p50_thp = round(1000 / bench_performance[2] * ninstances * bench_batch_size, 3) + socket_p90_thp = round(1000 / bench_performance[3] * ninstances * bench_batch_size, 3) + socket_p99_thp = round(1000 / bench_performance[4] * ninstances * bench_batch_size, 3) result_ncpi.append(ncore_per_instance) result_nins.append(ninstances) @@ -1168,21 +1195,25 @@ def remove_if_have(list, element): elif item is result_p99_thp: display_item_name = "Throughput based on P99-Latency" - print("{:<30} {:<30} {:<30} {:<30}".format( - 'Num Cores Per Instance', 'Num of Instances', 'Batch Size', display_item_name)) - sort_index = sorted( - range(len(item)), key=lambda k: item[k], reverse=True) + print( + "{:<30} {:<30} {:<30} {:<30}".format( + "Num Cores Per Instance", "Num of Instances", "Batch Size", display_item_name + ) + ) + sort_index = sorted(range(len(item)), key=lambda k: item[k], reverse=True) for i in sort_index: - print("{:<30} {:<30} {:<30} {:<30}".format(str(result_ncpi[i]), str( - result_nins[i]), str(result_bs[i]), str(item[i]))) + print( + "{:<30} {:<30} {:<30} {:<30}".format( + str(result_ncpi[i]), str(result_nins[i]), str(result_bs[i]), str(item[i]) + ) + ) list_config_best_ncpi = [] list_config_best_nins = [] list_config_best_bs = [] list_config_best_performance = [] for item in [result_regular_thp, result_p50_thp, result_p90_thp, result_p99_thp]: - sort_index = sorted( - range(len(item)), key=lambda k: item[k], reverse=True) + sort_index = sorted(range(len(item)), key=lambda k: item[k], reverse=True) for i in sort_index: list_config_best_ncpi.append(result_ncpi[i]) list_config_best_nins.append(result_nins[i]) @@ -1224,13 +1255,13 @@ def remove_if_have(list, element): # # pricing: https://aws.amazon.com/ec2/pricing/on-demand/ # import subprocess # res = subprocess.Popen( -# "grep 'DMI' /var/log/dmesg", -# shell=True, -# stdout=subprocess.PIPE, -# stderr=subprocess.PIPE, +# "grep 'DMI' /var/log/dmesg", +# shell=True, +# stdout=subprocess.PIPE, +# stderr=subprocess.PIPE, # ) # res.wait() -# result = res.stdout.read() +# result = res.stdout.read() # result = str(result, encoding="utf-8") # cloud_vendor = result.split()[4] + ' ' + result.split()[5] # if cloud_vendor == 'Amazon EC2': @@ -1250,7 +1281,7 @@ def remove_if_have(list, element): # cloud_vendor="Intel internal machine" # cloud_instance_type=bare_metal_machine_type # cloud_unit_price="1" - + # report = PDFReport( # path=save_path, # list_optimization_set_top3=res1, diff --git a/neural_coder/launcher.py b/neural_coder/launcher.py index 4ef71fb1c39..43446712e35 100644 --- a/neural_coder/launcher.py +++ b/neural_coder/launcher.py @@ -2,10 +2,10 @@ import shutil import subprocess import sys +from argparse import REMAINDER, ArgumentParser -from argparse import ArgumentParser, REMAINDER -class Launcher(): +class Launcher: def parse_args(): """ Helper function parsing the command line options @@ -13,31 +13,31 @@ def parse_args(): """ parser = ArgumentParser(description="command-launch a Python script with quantization auto-enabled") - parser.add_argument("-o", "--opt", type=str, default="", - help="optimization feature to enable") + parser.add_argument("-o", "--opt", type=str, default="", help="optimization feature to enable") - parser.add_argument("-a", "--approach", type=str, default="auto", + parser.add_argument("-a", "--approach", type=str, default="auto", help="quantization approach (strategy)") - help="quantization approach (strategy)") + parser.add_argument("--config", type=str, default="", help="quantization configuration file path") - parser.add_argument('--config', type=str, default="", - help='quantization configuration file path') + parser.add_argument( + "-b", "--bench", default=False, action="store_true", help="conduct auto_quant benchmark instead of enable" + ) - parser.add_argument('-b', '--bench', default=False, action='store_true', - help='conduct auto_quant benchmark instead of enable') - - parser.add_argument('-e', '--enable', default=False, action='store_true', - help='only do enable, not overwrite or run program') + parser.add_argument( + "-e", "--enable", default=False, action="store_true", help="only do enable, not overwrite or run program" + ) # positional - parser.add_argument("script", type=str, - help="The full path to the script to be launched. " - "followed by all the arguments for the script") + parser.add_argument( + "script", + type=str, + help="The full path to the script to be launched. " "followed by all the arguments for the script", + ) # script args - parser.add_argument('script_args', nargs=REMAINDER) + parser.add_argument("script_args", nargs=REMAINDER) return parser.parse_args() - + def execute( args, use_modular=False, @@ -48,9 +48,10 @@ def execute( script_copied = args.script[:-3] + "_optimized.py" shutil.copy(args.script, script_copied) - if not args.bench: # "enable and run" or "only enable" + if not args.bench: # "enable and run" or "only enable" # optimize on copied script with Neural Coder from neural_coder import enable + if args.opt == "": if args.approach == "static": args.opt = "pytorch_inc_static_quant_fx" @@ -63,12 +64,12 @@ def execute( features = [args.opt] else: features = args.opt.split(",") - + # modular design modular_item = "" if use_modular: modular_item = modular_pattern[args.opt] - + # execute optimization enabling enable( code=script_copied, @@ -79,23 +80,24 @@ def execute( use_inc=use_inc, ) - if not args.enable: # enable and run + if not args.enable: # enable and run # execute on copied script, which has already been optimized cmd = [] - cmd.append(sys.executable) # "/xxx/xxx/python" + cmd.append(sys.executable) # "/xxx/xxx/python" cmd.append("-u") cmd.append(script_copied) cmd.extend(args.script_args) - cmd = " ".join(cmd) # list convert to string + cmd = " ".join(cmd) # list convert to string process = subprocess.Popen(cmd, env=os.environ, shell=True) # nosec process.wait() - else: # auto_quant + else: # auto_quant from neural_coder import auto_quant + auto_quant( code=script_copied, - args=' '.join(args.script_args), # convert list of strings to a single string + args=" ".join(args.script_args), # convert list of strings to a single string use_inc=use_inc, ) diff --git a/neural_coder/utils/common.py b/neural_coder/utils/common.py index d03ba4e3be4..474f4cf0643 100644 --- a/neural_coder/utils/common.py +++ b/neural_coder/utils/common.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + def move_element_to_front(list, element): if element in list: idx = list.index(element) @@ -23,4 +24,4 @@ def move_element_to_last(list, element): if element in list: idx = list.index(element) list.insert(len(list), list.pop(idx)) - return list \ No newline at end of file + return list diff --git a/neural_coder/utils/cpu_info.py b/neural_coder/utils/cpu_info.py index c44907a88e0..276b1271fe6 100644 --- a/neural_coder/utils/cpu_info.py +++ b/neural_coder/utils/cpu_info.py @@ -12,33 +12,31 @@ # See the License for the specific language governing permissions and # limitations under the License. -import subprocess import os +import subprocess def get_num_cpu_cores() -> int: - - cmd_cpu_info = '' + cmd_cpu_info = "" cmd_cpu_info += "sockets_num=$(lscpu |grep 'Socket(s):' |sed 's/[^0-9]//g')" - cmd_cpu_info += ' && ' + cmd_cpu_info += " && " cmd_cpu_info += "cores_per_socket=$(lscpu |grep 'Core(s) per socket:' |sed 's/[^0-9]//g')" - cmd_cpu_info += ' && ' + cmd_cpu_info += " && " cmd_cpu_info += 'phsical_cores_num=$( echo "${sockets_num} * ${cores_per_socket}" |bc )' - cmd_cpu_info += ' && ' + cmd_cpu_info += " && " cmd_cpu_info += "numa_nodes_num=$(lscpu |grep 'NUMA node(s):' |sed 's/[^0-9]//g')" - cmd_cpu_info += ' && ' + cmd_cpu_info += " && " cmd_cpu_info += 'cores_per_node=$( echo "${phsical_cores_num} / ${numa_nodes_num}" |bc )' - cmd_cpu_info += ' && ' + cmd_cpu_info += " && " cmd_cpu_info += "echo ${cores_per_node}" - original_lang_val = os.environ.get('LANG') - os.environ['LANG'] = 'C' - sp_grep_cpu_info = subprocess.Popen( - cmd_cpu_info, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec + original_lang_val = os.environ.get("LANG") + os.environ["LANG"] = "C" + sp_grep_cpu_info = subprocess.Popen(cmd_cpu_info, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec sp_grep_cpu_info.wait() if original_lang_val: - os.environ['LANG'] = original_lang_val + os.environ["LANG"] = original_lang_val else: - del os.environ['LANG'] + del os.environ["LANG"] log_cpu_info, _ = sp_grep_cpu_info.communicate() ncores = int(str(log_cpu_info)[2:-3]) diff --git a/neural_coder/utils/device.py b/neural_coder/utils/device.py index e7ff4f3854e..164c17fd5c4 100644 --- a/neural_coder/utils/device.py +++ b/neural_coder/utils/device.py @@ -21,11 +21,12 @@ def detect_device(): try: import torch + if torch.cuda.is_available(): globals.device = "cuda" return except: - pass # cuda tf wip + pass # cuda tf wip if check_has('clinfo | grep "Intel(R) Graphics"'): globals.device = "intel_gpu" @@ -38,21 +39,17 @@ def detect_device(): globals.device = "cpu_without_amx" return + def check_has(s): cmd = s try: - sp = subprocess.Popen( - cmd, - env=os.environ, - shell=True, # nosec - stdout=subprocess.PIPE - ) # nosec + sp = subprocess.Popen(cmd, env=os.environ, shell=True, stdout=subprocess.PIPE) # nosec # nosec sp.wait() sp, _ = sp.communicate() has = bool(len(sp.decode()) > 0) # 0: no, >0: yes except: has = False - print('Checking failed.') + print("Checking failed.") return has @@ -60,14 +57,15 @@ def detect_code_device_compatibility(code_path): # handle github py url if "github.com" in code_path and ".py" in code_path: import requests - code_path = code_path.replace("github.com", "raw.githubusercontent.com").replace("/blob","") + + code_path = code_path.replace("github.com", "raw.githubusercontent.com").replace("/blob", "") r = requests.get(code_path) save_py_path = "./neural_coder_workspace/model_analyze_device.py" f = open(save_py_path, "wb") f.write(r.content) code_path = save_py_path - lines = open(code_path, 'r').read().split('\n') + lines = open(code_path, "r").read().split("\n") for line in lines: if "torch.cuda.is_available()" in line: globals.list_code_device_compatibility.append("cuda") @@ -88,5 +86,4 @@ def detect_code_device_compatibility(code_path): if "args.gpu" in line: globals.list_code_device_compatibility.append("gpu") - globals.list_code_device_compatibility = \ - list(set(globals.list_code_device_compatibility)) + globals.list_code_device_compatibility = list(set(globals.list_code_device_compatibility)) diff --git a/neural_coder/utils/handle_user_input.py b/neural_coder/utils/handle_user_input.py index 846df845602..35515b13129 100644 --- a/neural_coder/utils/handle_user_input.py +++ b/neural_coder/utils/handle_user_input.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os from typing import List + from .. import globals -import logging -logging.basicConfig(level=globals.logging_level, - format='%(asctime)s %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S +0000') +logging.basicConfig( + level=globals.logging_level, format="%(asctime)s %(levelname)s %(message)s", datefmt="%a, %d %b %Y %H:%M:%S +0000" +) logger = logging.getLogger(__name__) @@ -31,7 +32,7 @@ def get_all_code_path(user_input: str) -> List: import_path = [] # if import_path intersects user_code_path, clear import_path: this is - # for cases where there is import of self folder and we only care about + # for cases where there is import of self folder and we only care about # the main file (user_code_path) itself if len(list(set(user_code_path).intersection(set(import_path)))) > 0: import_path = [] @@ -65,7 +66,8 @@ def get_user_code_path(user_input: str) -> List: # get list of file path if user_input_type == "url_repo": from git import Repo - Repo.clone_from(user_input, "./cloned_github_repo") + + Repo.clone_from(user_input, "./cloned_github_repo") dir_input = "./cloned_github_repo" if user_input_type == "folder": dir_input = user_input @@ -74,7 +76,8 @@ def get_user_code_path(user_input: str) -> List: list_path.append(os.path.abspath(user_input)) elif user_input_type == "url_py": import requests - user_input = user_input.replace("github.com", "raw.githubusercontent.com").replace("/blob","") + + user_input = user_input.replace("github.com", "raw.githubusercontent.com").replace("/blob", "") r = requests.get(user_input) save_py_path = "./neural_coder_workspace/model.py" f = open(save_py_path, "wb") @@ -93,7 +96,6 @@ def get_user_code_path(user_input: str) -> List: def get_imports_path(user_code_path: List) -> List: - pip_name_exceptions = [ "argparse", "ast", @@ -176,7 +178,7 @@ def get_imports_path(user_code_path: List) -> List: # get list of pip name for path in user_code_path: - lines = open(path, 'r').read().split('\n') + lines = open(path, "r").read().split("\n") for line in lines: is_import_line = False if line[0:6] == "import" and line[0:8] != "import ." and "," not in line: # to-do: handle "," case @@ -191,11 +193,11 @@ def get_imports_path(user_code_path: List) -> List: if space_idx == -1 and dot_idx == -1: pip_name = line[start:] elif space_idx > 0 and dot_idx == -1: - pip_name = line[start: start + space_idx] + pip_name = line[start : start + space_idx] elif space_idx == -1 and dot_idx > 0: - pip_name = line[start: start + dot_idx] + pip_name = line[start : start + dot_idx] elif space_idx > 0 and dot_idx > 0: - pip_name = line[start: start + min(space_idx, dot_idx)] + pip_name = line[start : start + min(space_idx, dot_idx)] list_pip_name.append(pip_name) list_pip_name = list(set(list_pip_name).difference(set(pip_name_exceptions))) for item in list_pip_name: @@ -216,10 +218,11 @@ def get_imports_path(user_code_path: List) -> List: quit() import inspect + for i in list_pip_name: try: pip_dir_path = inspect.getsourcefile(eval(i)) - pip_dir_path = pip_dir_path[0:pip_dir_path.rfind("/")] + pip_dir_path = pip_dir_path[0 : pip_dir_path.rfind("/")] for path, dir_list, file_list in os.walk(pip_dir_path): for file_name in file_list: file_path = os.path.join(path, file_name) diff --git a/neural_coder/utils/line_operation.py b/neural_coder/utils/line_operation.py index dcfcaf1bf38..abac3fba6ff 100644 --- a/neural_coder/utils/line_operation.py +++ b/neural_coder/utils/line_operation.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + # get line's indent level def get_line_indent_level(line: str) -> int: if list(set(line)) == [" "]: @@ -68,21 +69,25 @@ def single_line_comment_or_empty_line_detection(line: str) -> bool: return this_line_is_single_line_comment_or_empty_line -# determine if line is a eval func of model_name, +# determine if line is a eval func of model_name, # like "xxx = model_name(yyy)" or "model_name(yyy)" or "model_name.some_func(yyy)" def is_eval_func_model_name(model_name: str, line: str) -> str: - line_ = line.replace(' ', '') + line_ = line.replace(" ", "") # model(input) judge_1 = line_.find(model_name + "(") > -1 - judge_2 = (line_.find("=") > 0 and - line_.find("=") < line_.find(model_name) and - line_[line_.find("=")+1:line_.find("(")] == model_name) or line_.find(model_name) == 0 + judge_2 = ( + line_.find("=") > 0 + and line_.find("=") < line_.find(model_name) + and line_[line_.find("=") + 1 : line_.find("(")] == model_name + ) or line_.find(model_name) == 0 # model.some_func(input) judge_3 = line_.find(model_name + ".") > -1 judge_4 = line_.find("(") > -1 - judge_5 = (line_.find("=") > 0 and - line_.find("=") < line_.find(model_name) and - line_[line_.find("=")+1:line_.find(".")] == model_name) or line_.find(model_name) == 0 + judge_5 = ( + line_.find("=") > 0 + and line_.find("=") < line_.find(model_name) + and line_[line_.find("=") + 1 : line_.find(".")] == model_name + ) or line_.find(model_name) == 0 exclude_function_list = [ "__init__", "to", @@ -99,7 +104,7 @@ def is_eval_func_model_name(model_name: str, line: str) -> str: "features", "freeze_feature_encoder", ] - judge_6 = line_[line_.find(".")+1:line_.find("(")] not in exclude_function_list + judge_6 = line_[line_.find(".") + 1 : line_.find("(")] not in exclude_function_list judge_7 = "model.config" not in line and "model.features" not in line judge_8 = "model(**inputs)" in line @@ -115,29 +120,29 @@ def is_eval_func_model_name(model_name: str, line: str) -> str: # get lhs of line of format "xxx = yyy" def get_line_left_hand_side(line: str) -> str: - line_ = line.replace(' ', '') - lhs = line_[:line_.find("=")] + line_ = line.replace(" ", "") + lhs = line_[: line_.find("=")] return lhs # determine if line is for format "xxx = yyy(zzz)" and get lhs and rhs of "=" def of_definition_format(line: str): - line_ = line.replace(' ', '') + line_ = line.replace(" ", "") is_def = False lhs = "" rhs = "" if "=" in line_ and "(" in line_ and line_.find("=") < line_.find("("): is_def = True - lhs = line_[:line_.find("=")] - rhs = line_[line_.find("=")+1:line_.find("(")] + lhs = line_[: line_.find("=")] + rhs = line_[line_.find("=") + 1 : line_.find("(")] if "." not in rhs: pass else: - rhs = rhs[rhs.find(".")+1:] + rhs = rhs[rhs.find(".") + 1 :] return is_def, lhs, rhs # get the line without comment def get_line_wo_comment(line: str): - line = line[:line.find("#")].rstrip() + line = line[: line.find("#")].rstrip() return line diff --git a/neural_coder/utils/numa_launcher.py b/neural_coder/utils/numa_launcher.py index defe7bc2c4d..1f50a8398a5 100644 --- a/neural_coder/utils/numa_launcher.py +++ b/neural_coder/utils/numa_launcher.py @@ -13,39 +13,36 @@ # limitations under the License. from __future__ import absolute_import, division, print_function, unicode_literals -import sys + +import glob +import logging +import os import platform +import re import subprocess -import os +import sys +from argparse import REMAINDER, ArgumentParser, RawTextHelpFormatter +from datetime import datetime from os.path import expanduser -import re -import glob + import numpy as np -from argparse import ArgumentParser, REMAINDER -from argparse import RawTextHelpFormatter -import logging import psutil -from datetime import datetime -format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +format_str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" logging.basicConfig(level=logging.INFO, format=format_str) logger = logging.getLogger(__name__) -class CPUinfo(): - ''' - Get CPU inforamation, such as cores list and NUMA information. - ''' +class CPUinfo: + """Get CPU inforamation, such as cores list and NUMA information.""" def __init__(self): - self.cpuinfo = [] if platform.system() == "Windows": raise RuntimeError("Windows platform is not supported!!!") elif platform.system() == "Linux": args = ["lscpu", "--parse=CPU,Core,Socket,Node"] - lscpu_info = subprocess.check_output( - args, universal_newlines=True).split("\n") + lscpu_info = subprocess.check_output(args, universal_newlines=True).split("\n") # Get information about cpu, core, socket and node for line in lscpu_info: @@ -58,15 +55,15 @@ def __init__(self): def get_socket_info(self): self.sockets = int(max([line[2] for line in self.cpuinfo])) + 1 self.socket_physical_cores = [] # socket_id is index - self.socket_logical_cores = [] # socket_id is index + self.socket_logical_cores = [] # socket_id is index self.physical_core_socket_map = {} # phyical core to numa node id - self.logical_core_socket_map = {} # logical core to numa node id + self.logical_core_socket_map = {} # logical core to numa node id self.nodes = int(max([line[3] for line in self.cpuinfo])) + 1 self.node_physical_cores = [] # node_id is index - self.node_logical_cores = [] # node_id is index + self.node_logical_cores = [] # node_id is index self.physical_core_node_map = {} # phyical core to numa node id - self.logical_core_node_map = {} # logical core to numa node id + self.logical_core_node_map = {} # logical core to numa node id for socket_id in range(self.sockets): cur_socket_physical_core = [] @@ -75,8 +72,7 @@ def get_socket_info(self): if socket_id == int(line[2]): if int(line[1]) not in cur_socket_physical_core: cur_socket_physical_core.append(int(line[1])) - self.physical_core_socket_map[int( - line[1])] = int(socket_id) + self.physical_core_socket_map[int(line[1])] = int(socket_id) cur_socket_logical_core.append(int(line[0])) self.logical_core_socket_map[int(line[0])] = int(socket_id) self.socket_physical_cores.append(cur_socket_physical_core) @@ -86,12 +82,11 @@ def get_socket_info(self): cur_node_physical_core = [] cur_node_logical_core = [] for line in self.cpuinfo: - nid = line[3] if line[3] != '' else '0' + nid = line[3] if line[3] != "" else "0" if node_id == int(nid): if int(line[1]) not in cur_node_physical_core: cur_node_physical_core.append(int(line[1])) - self.physical_core_node_map[int( - line[1])] = int(node_id) + self.physical_core_node_map[int(line[1])] = int(node_id) cur_node_logical_core.append(int(line[0])) self.logical_core_node_map[int(line[0])] = int(node_id) self.node_physical_cores.append(cur_node_physical_core) @@ -126,28 +121,26 @@ def get_all_logical_cores(self): return np.array(self.node_logical_cores).flatten().tolist() def numa_aware_check(self, core_list): - ''' - Check whether all cores in core_list are in the same NUMA node. cross NUMA will reduce perforamnce. + """Check whether all cores in core_list are in the same NUMA node. + + cross NUMA will reduce perforamnce. We strongly advice to not use cores on different nodes. - ''' + """ cores_numa_map = self.logical_core_node_map if len(core_list) < 1: return True numa_ids = [] for core in core_list: numa_id = cores_numa_map[core] - if not numa_id in numa_ids: + if numa_id not in numa_ids: numa_ids.append(numa_id) if len(numa_ids) > 1: - logger.warning("Numa Aware: cores:{} on different NUMA nodes:{}".format( - str(core_list), str(numa_ids))) + logger.warning("Numa Aware: cores:{} on different NUMA nodes:{}".format(str(core_list), str(numa_ids))) return numa_ids -class Launcher(): - r""" - Base class for launcher - """ +class Launcher: + r"""Base class for launcher.""" def __init__(self): self.cpuinfo = CPUinfo() @@ -156,22 +149,25 @@ def launch(self, args): pass def add_lib_preload(self, lib_type=None): - ''' - Enale TCMalloc/JeMalloc/intel OpenMP - ''' + """Enale TCMalloc/JeMalloc/intel OpenMP.""" library_paths = [] if "CONDA_PREFIX" in os.environ: library_paths.append(os.environ["CONDA_PREFIX"] + "/lib/") if "VIRTUAL_ENV" in os.environ: library_paths.append(os.environ["VIRTUAL_ENV"] + "/lib/") - library_paths += ["{}/.local/lib/".format(expanduser("~")), "/usr/local/lib/", - "/usr/local/lib64/", "/usr/lib/", "/usr/lib64/"] + library_paths += [ + "{}/.local/lib/".format(expanduser("~")), + "/usr/local/lib/", + "/usr/local/lib64/", + "/usr/lib/", + "/usr/lib64/", + ] lib_find = False lib_set = False for item in os.getenv("LD_PRELOAD", "").split(":"): - if item.endswith('lib{}.so'.format(lib_type)): + if item.endswith("lib{}.so".format(lib_type)): lib_set = True break if not lib_set: @@ -180,8 +176,7 @@ def add_lib_preload(self, lib_type=None): matches = glob.glob(library_file) if len(matches) > 0: if "LD_PRELOAD" in os.environ: - os.environ["LD_PRELOAD"] = matches[0] + \ - ":" + os.environ["LD_PRELOAD"] + os.environ["LD_PRELOAD"] = matches[0] + ":" + os.environ["LD_PRELOAD"] else: os.environ["LD_PRELOAD"] = matches[0] lib_find = True @@ -189,43 +184,47 @@ def add_lib_preload(self, lib_type=None): return lib_set or lib_find def set_memory_allocator(self, enable_tcmalloc=True, enable_jemalloc=False, use_default_allocator=False): - ''' - Enable TCMalloc/JeMalloc with LD_PRELOAD and set configuration for JeMalloc. + """Enable TCMalloc/JeMalloc with LD_PRELOAD and set configuration for JeMalloc. + By default, PTMalloc will be used for PyTorch, but TCMalloc and JeMalloc can get better memory resue and reduce page fault to improve performance. - ''' + """ if enable_tcmalloc and enable_jemalloc: - logger.error( - "Unable to enable TCMalloc and JEMalloc at the same time") + logger.error("Unable to enable TCMalloc and JEMalloc at the same time") exit(-1) if enable_tcmalloc: find_tc = self.add_lib_preload(lib_type="tcmalloc") if not find_tc: - logger.warning("Unable to find the {} library file lib{}.so in $CONDA_PREFIX/lib \ + logger.warning( + "Unable to find the {} library file lib{}.so in $CONDA_PREFIX/lib \ or $VIRTUAL_ENV/lib" - " or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib \ + " or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib \ or /usr/lib64 or " - "{}/.local/lib/ so the LD_PRELOAD environment variable will not be set." - "you can use 'conda install -c conda-forge gperftools' to install tcmalloc" - .format("TCmalloc", "tcmalloc", expanduser("~"))) + "{}/.local/lib/ so the LD_PRELOAD environment variable will not be set." + "you can use 'conda install -c conda-forge gperftools' to install tcmalloc".format( + "TCmalloc", "tcmalloc", expanduser("~") + ) + ) else: logger.info("Use TCMalloc memory allocator") elif enable_jemalloc: find_je = self.add_lib_preload(lib_type="jemalloc") if not find_je: - logger.warning("Unable to find the {} library file lib{}.so in \ + logger.warning( + "Unable to find the {} library file lib{}.so in \ $CONDA_PREFIX/lib or $VIRTUAL_ENV/lib" - " or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or \ + " or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or \ /usr/lib or /usr/lib64 or " - "{}/.local/lib/ so the LD_PRELOAD environment variable will not be set." - "you can use 'conda install -c conda-forge jemalloc' to install jemalloc" - .format("JeMalloc", "jemalloc", expanduser("~"))) + "{}/.local/lib/ so the LD_PRELOAD environment variable will not be set." + "you can use 'conda install -c conda-forge jemalloc' to install jemalloc".format( + "JeMalloc", "jemalloc", expanduser("~") + ) + ) else: logger.info("Use JeMalloc memory allocator") - self.set_env( - 'MALLOC_CONF', "oversize_threshold:1,background_thread:true,metadata_thp:auto") + self.set_env("MALLOC_CONF", "oversize_threshold:1,background_thread:true,metadata_thp:auto") elif use_default_allocator: pass @@ -239,11 +238,14 @@ def set_memory_allocator(self, enable_tcmalloc=True, enable_jemalloc=False, use_ if find_je: logger.info("Use JeMalloc memory allocator") return - logger.warning("Neither TCMalloc nor JeMalloc is found in $CONDA_PREFIX/lib or $VIRTUAL_ENV/lib" - " or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib64 or " - "{}/.local/lib/ so the LD_PRELOAD environment variable will not be set. \ - This may drop the performance" - .format(expanduser("~"))) + logger.warning( + "Neither TCMalloc nor JeMalloc is found in $CONDA_PREFIX/lib or $VIRTUAL_ENV/lib" + " or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib64 or " + "{}/.local/lib/ so the LD_PRELOAD environment variable will not be set. \ + This may drop the performance".format( + expanduser("~") + ) + ) def logger_env(self, env_name=""): if env_name in os.environ: @@ -255,8 +257,11 @@ def set_env(self, env_name, env_value=None): if env_name not in os.environ: os.environ[env_name] = env_value elif os.environ[env_name] != env_value: - logger.warning("{} in environment variable is {} while the value you set is {}".format( - env_name, os.environ[env_name], env_value)) + logger.warning( + "{} in environment variable is {} while the value you set is {}".format( + env_name, os.environ[env_name], env_value + ) + ) logger.warning("Resetting {} to {}".format(env_name, env_value)) os.environ[env_name] = env_value self.logger_env(env_name) @@ -265,41 +270,46 @@ def set_env(self, env_name, env_value=None): # In scenario that use all cores on all nodes, including logical cores, # setting KMP_AFFINITY disables logical cores. # In this case, KMP_AFFINITY should not be set. - def set_multi_thread_and_allocator(self, ncore_per_instance, disable_iomp=False, - set_kmp_affinity=True, enable_tcmalloc=True, - enable_jemalloc=False, use_default_allocator=False): - ''' - Set multi-thread configuration and enable Intel openMP and TCMalloc/JeMalloc. - By default, GNU openMP and PTMalloc are used in PyTorch. + def set_multi_thread_and_allocator( + self, + ncore_per_instance, + disable_iomp=False, + set_kmp_affinity=True, + enable_tcmalloc=True, + enable_jemalloc=False, + use_default_allocator=False, + ): + """Set multi-thread configuration and enable Intel openMP and TCMalloc/JeMalloc. + + By default, GNU openMP and PTMalloc are used in PyTorch. but Intel openMP and TCMalloc/JeMalloc are better alternatives to get performance benifit. - ''' - self.set_memory_allocator( - enable_tcmalloc, enable_jemalloc, use_default_allocator) + """ + self.set_memory_allocator(enable_tcmalloc, enable_jemalloc, use_default_allocator) self.set_env("OMP_NUM_THREADS", str(ncore_per_instance)) if not disable_iomp: find_iomp = self.add_lib_preload(lib_type="iomp5") if not find_iomp: - logger.warning("Unable to find the {} library file lib{}.so \ + logger.warning( + "Unable to find the {} library file lib{}.so \ in $CONDA_PREFIX/lib or $VIRTUAL_ENV/lib" - " or /.local/lib/ or /usr/local/lib/ or \ + " or /.local/lib/ or /usr/local/lib/ or \ /usr/local/lib64/ or /usr/lib or /usr/lib64 or " - "{}/.local/lib/ so the LD_PRELOAD environment variable will not be set." - "you can use 'conda install intel-openm' to install intel openMP" - .format("iomp", "iomp5", expanduser("~"))) + "{}/.local/lib/ so the LD_PRELOAD environment variable will not be set." + "you can use 'conda install intel-openm' to install intel openMP".format( + "iomp", "iomp5", expanduser("~") + ) + ) else: logger.info("Using Intel OpenMP") if set_kmp_affinity: - self.set_env("KMP_AFFINITY", - "granularity=fine,compact,1,0") + self.set_env("KMP_AFFINITY", "granularity=fine,compact,1,0") self.set_env("KMP_BLOCKTIME", "1") self.logger_env("LD_PRELOAD") class MultiInstanceLauncher(Launcher): - r""" - Launcher for single instance and multi-instance - """ + r"""Launcher for single instance and multi-instance.""" def launch(self, args): processes = [] @@ -308,12 +318,14 @@ def launch(self, args): if args.core_list: # user specify what cores will be used by params cores = [int(x) for x in args.core_list.split(",")] if args.ncore_per_instance == -1: - logger.error( - "please specify the '--ncore_per_instance' if you have pass the --core_list params") + logger.error("please specify the '--ncore_per_instance' if you have pass the --core_list params") exit(-1) elif args.ninstances > 1 and args.ncore_per_instance * args.ninstances < len(cores): - logger.warning("only first {} cores will be used, but you specify {} cores in core_list".format( - args.ncore_per_instance * args.ninstances, len(cores))) + logger.warning( + "only first {} cores will be used, but you specify {} cores in core_list".format( + args.ncore_per_instance * args.ninstances, len(cores) + ) + ) else: args.ninstances = len(cores) // args.ncore_per_instance @@ -339,9 +351,12 @@ def launch(self, args): args.throughput_mode = True elif args.ncore_per_instance == -1 and args.ninstances != -1: if args.ninstances > len(cores): - logger.error("there are {} total cores but you specify {} ninstances; \ + logger.error( + "there are {} total cores but you specify {} ninstances; \ please make sure ninstances <= total_cores)".format( - len(cores), args.ninstances)) + len(cores), args.ninstances + ) + ) exit(-1) else: args.ncore_per_instance = len(cores) // args.ninstances @@ -349,37 +364,39 @@ def launch(self, args): args.ninstances = len(cores) // args.ncore_per_instance else: if args.ninstances * args.ncore_per_instance > len(cores): - logger.error( - "Please make sure ninstances * ncore_per_instance <= total_cores") + logger.error("Please make sure ninstances * ncore_per_instance <= total_cores") exit(-1) if args.latency_mode: logger.warning( - '--latency_mode is exclusive to --ninstances, \ + "--latency_mode is exclusive to --ninstances, \ --ncore_per_instance, --node_id and --use_logical_core. \ - They won\'t take effect even they are set explicitly.') + They won't take effect even they are set explicitly." + ) args.ncore_per_instance = 4 cores = self.cpuinfo.get_all_physical_cores() args.ninstances = len(cores) // args.ncore_per_instance if args.throughput_mode: logger.warning( - '--throughput_mode is exclusive to --ninstances, \ + "--throughput_mode is exclusive to --ninstances, \ --ncore_per_instance, --node_id and --use_logical_core. \ - They won\'t take effect even they are set explicitly.') + They won't take effect even they are set explicitly." + ) args.ninstances = self.cpuinfo.node_nums() cores = self.cpuinfo.get_all_physical_cores() args.ncore_per_instance = len(cores) // args.ninstances if args.ninstances > 1 and args.instance_idx != -1: - logger.info("assigning {} cores for instance {}".format( - args.ncore_per_instance, args.instance_idx)) - - self.set_multi_thread_and_allocator(args.ncore_per_instance, - args.disable_iomp, - set_kmp_affinity, - args.enable_tcmalloc, - args.enable_jemalloc, - args.use_default_allocator) + logger.info("assigning {} cores for instance {}".format(args.ncore_per_instance, args.instance_idx)) + + self.set_multi_thread_and_allocator( + args.ncore_per_instance, + args.disable_iomp, + set_kmp_affinity, + args.enable_tcmalloc, + args.enable_jemalloc, + args.use_default_allocator, + ) os.environ["LAUNCH_CMD"] = "#" for i in range(args.ninstances): cmd = [] @@ -388,30 +405,30 @@ def launch(self, args): cmd = ["numactl"] cores = sorted(cores) if args.instance_idx == -1: # sequentially assign ncores_per_instance to ninstances - core_list = cores[i * args.ncore_per_instance: ( - i + 1) * args.ncore_per_instance] + core_list = cores[i * args.ncore_per_instance : (i + 1) * args.ncore_per_instance] else: # assign ncores_per_instance from instance_idx - core_list = cores[args.instance_idx * args.ncore_per_instance: ( - args.instance_idx + 1) * args.ncore_per_instance] + core_list = cores[ + args.instance_idx * args.ncore_per_instance : (args.instance_idx + 1) * args.ncore_per_instance + ] core_ranges = [] for core in core_list: if len(core_ranges) == 0: - range_elem = {'start': core, 'end': core} + range_elem = {"start": core, "end": core} core_ranges.append(range_elem) else: - if core - core_ranges[-1]['end'] == 1: - core_ranges[-1]['end'] = core + if core - core_ranges[-1]["end"] == 1: + core_ranges[-1]["end"] = core else: - range_elem = {'start': core, 'end': core} + range_elem = {"start": core, "end": core} core_ranges.append(range_elem) for r in core_ranges: - cur_process_cores = cur_process_cores + \ - "{}-{},".format(r['start'], r['end']) + cur_process_cores = cur_process_cores + "{}-{},".format(r["start"], r["end"]) cur_process_cores = cur_process_cores[:-1] numa_params = "-C {} ".format(cur_process_cores) - numa_params += "-m {}".format(",".join( - [str(numa_id) for numa_id in self.cpuinfo.numa_aware_check(core_list)])) + numa_params += "-m {}".format( + ",".join([str(numa_id) for numa_id in self.cpuinfo.numa_aware_check(core_list)]) + ) cmd.extend(numa_params.split()) with_python = not args.no_python if with_python: @@ -420,17 +437,16 @@ def launch(self, args): if args.module: cmd.append("-m") cmd.append(args.program) - log_name = args.log_file_prefix + \ - "_instance_{}_cores_".format( - i) + cur_process_cores.replace(',', '_') + ".log" + log_name = ( + args.log_file_prefix + "_instance_{}_cores_".format(i) + cur_process_cores.replace(",", "_") + ".log" + ) log_name = os.path.join(args.log_path, log_name) cmd.extend(args.program_args) os.environ["LAUNCH_CMD"] += " ".join(cmd) + ",#" cmd_s = " ".join(cmd) cmd_s = "{} 2>&1 | tee -a {}".format(cmd_s, args.log_path) logger.info(cmd_s) - process = subprocess.Popen( - cmd_s, env=os.environ, shell=True) # nosec + process = subprocess.Popen(cmd_s, env=os.environ, shell=True) # nosec processes.append(process) if args.instance_idx != -1: # launches single instance, instance_idx, only @@ -440,25 +456,22 @@ def launch(self, args): for process in processes: process.wait() if process.returncode != 0: - raise subprocess.CalledProcessError( - returncode=process.returncode, cmd=cmd_s) + raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd_s) class DistributedTrainingLauncher(Launcher): - r""" - Launcher for distributed traning with MPI launcher - """ + r"""Launcher for distributed traning with MPI launcher.""" def get_mpi_pin_domain(self, nproc_per_node, ccl_worker_count, total_cores): - ''' - I_MPI_PIN_DOMAIN specify the cores used for every MPI process. + """I_MPI_PIN_DOMAIN specify the cores used for every MPI process. + The first ccl_worker_count cores of every rank for ccl communication and the other cores will be used to do computation. For example: on CascadeLake 8280 CPU, 2 ranks on one node. ccl_worker_count=4 CCL_WORKER_COUNT=4 CCL_WORKER_AFFINITY="0,1,2,3,28,29,30,31" I_MPI_PIN_DOMAIN=[0xffffff0,0xffffff0000000] - ''' + """ ppn = nproc_per_node cores_per_rank = total_cores // ppn pin_domain = "[" @@ -467,20 +480,21 @@ def get_mpi_pin_domain(self, nproc_per_node, ccl_worker_count, total_cores): begin = proc * cores_per_rank + ccl_worker_count end = proc * cores_per_rank + cores_per_rank - 1 for i in range(begin, end + 1): - domain_binary |= (1 << i) + domain_binary |= 1 << i pin_domain += hex(domain_binary) + "," pin_domain += "]" return pin_domain def get_ccl_worker_affinity(self, nproc_per_node, ccl_worker_count, total_cores): - ''' - Computation and communication use different cores when using oneCCL - backend for distributed training. we use first ccl_worker_count cores of + """Computation and communication use different cores when using oneCCL + backend for distributed training. + + we use first ccl_worker_count cores of every rank for ccl communication - ''' + """ ppn = nproc_per_node cores_per_rank = total_cores // ppn - affinity = '' + affinity = "" for proc in range(ppn): for ccl_worker in range(ccl_worker_count): affinity += str(proc * cores_per_rank + ccl_worker) + "," @@ -488,15 +502,16 @@ def get_ccl_worker_affinity(self, nproc_per_node, ccl_worker_count, total_cores) return affinity def launch(self, args): - ''' - Set ENVs and launch MPI process for distributed training. - ''' + """Set ENVs and launch MPI process for distributed training.""" if args.nnodes > 1 and not os.path.exists(args.hostfile): - raise ValueError("hostfile is necessary when you use multi-node distributed training," - "Please create hostfile which include the ip list you used for distributed running") + raise ValueError( + "hostfile is necessary when you use multi-node distributed training," + "Please create hostfile which include the ip list you used for distributed running" + ) elif args.nnodes > 1: - ipv4_addr_pattern \ - = r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" + ipv4_addr_pattern = ( + r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" + ) ip_list = [] with open(args.hostfile) as f: for line in f: @@ -509,8 +524,9 @@ def launch(self, args): # ip_list.append(line) ip_list.append(line) if len(ip_list) < args.nnodes: - logger.error("The number of IP {} should greater than nnodes parameters {}".format( - len(ip_list), args.nnodes)) + logger.error( + "The number of IP {} should greater than nnodes parameters {}".format(len(ip_list), args.nnodes) + ) exit(-1) master_check = False dic = psutil.net_if_addrs() @@ -522,22 +538,26 @@ def launch(self, args): if not master_check: logger.error( "MASTER_ADDR is incorrect. Please make sure the first line {} \ - in your hostfile is ip address of the current node".format(ip_list[0])) + in your hostfile is ip address of the current node".format( + ip_list[0] + ) + ) exit(-1) logger.info("Begin to validate the ip connect") args.master_addr = ip_list[0] for ip in ip_list[1:]: completed_process = subprocess.run( - "ssh -o PasswordAuthentication=no {} ':'".format(ip), shell=True) # nosec + "ssh -o PasswordAuthentication=no {} ':'".format(ip), shell=True + ) # nosec if completed_process.returncode != 0: logger.error( "Passwordless SSH login to {} failed, please \ - make sure you have setup SSH public key right") + make sure you have setup SSH public key right" + ) exit(-1) else: - logger.info("connection from master node {} to slave node {} is OK".format( - args.master_addr, ip)) + logger.info("connection from master node {} to slave node {} is OK".format(args.master_addr, ip)) total_cores_per_node = self.cpuinfo.physical_core_nums() if args.use_logical_core: @@ -546,30 +566,31 @@ def launch(self, args): # set distributed related environmental variables self.set_env("MASTER_ADDR", args.master_addr) self.set_env("MASTER_PORT", str(args.master_port)) - mpi_pin_domain = self.get_mpi_pin_domain( - args.nproc_per_node, args.ccl_worker_count, total_cores_per_node) + mpi_pin_domain = self.get_mpi_pin_domain(args.nproc_per_node, args.ccl_worker_count, total_cores_per_node) self.set_env("I_MPI_PIN_DOMAIN", mpi_pin_domain) ppn = args.nproc_per_node cores_per_rank = total_cores_per_node // ppn opm_num_threads = cores_per_rank - args.ccl_worker_count - self.set_multi_thread_and_allocator(opm_num_threads, - args.disable_iomp, - True, - args.enable_tcmalloc, - args.enable_jemalloc, - args.use_default_allocator) + self.set_multi_thread_and_allocator( + opm_num_threads, + args.disable_iomp, + True, + args.enable_tcmalloc, + args.enable_jemalloc, + args.use_default_allocator, + ) self.set_env("CCL_WORKER_COUNT", str(args.ccl_worker_count)) - ccl_affinity = self.get_ccl_worker_affinity( - args.nproc_per_node, args.ccl_worker_count, total_cores_per_node) + ccl_affinity = self.get_ccl_worker_affinity(args.nproc_per_node, args.ccl_worker_count, total_cores_per_node) self.set_env("CCL_WORKER_AFFINITY", ccl_affinity) os.environ["LAUNCH_CMD"] = "#" - cmd = ['mpiexec.hydra'] + cmd = ["mpiexec.hydra"] mpi_config = "-l -np {} -ppn {} -genv I_MPI_PIN_DOMAIN={} -genv OMP_NUM_THREADS={} ".format( - args.nnodes * args.nproc_per_node, args.nproc_per_node, mpi_pin_domain, opm_num_threads) + args.nnodes * args.nproc_per_node, args.nproc_per_node, mpi_pin_domain, opm_num_threads + ) mpi_config += args.more_mpi_params if args.nnodes > 1: mpi_config += " -hostfile {}".format(args.hostfile) @@ -590,91 +611,144 @@ def launch(self, args): def add_distributed_training_params(parser): - cpuinfo = CPUinfo() node_nums = cpuinfo.node_nums() - group = parser.add_argument_group( - "Distributed Training Parameters With oneCCL backend") - group.add_argument("--nnodes", metavar='\b', type=int, default=1, - help="The number of nodes to use for distributed " - "training") - group.add_argument("--nproc_per_node", metavar='\b', type=int, default=node_nums, - help="The number of processes to launch on each node") + group = parser.add_argument_group("Distributed Training Parameters With oneCCL backend") + group.add_argument( + "--nnodes", metavar="\b", type=int, default=1, help="The number of nodes to use for distributed " "training" + ) + group.add_argument( + "--nproc_per_node", + metavar="\b", + type=int, + default=node_nums, + help="The number of processes to launch on each node", + ) # ccl control - group.add_argument("--ccl_worker_count", metavar='\b', default=4, type=int, - help="Core numbers per rank used for ccl communication") + group.add_argument( + "--ccl_worker_count", metavar="\b", default=4, type=int, help="Core numbers per rank used for ccl communication" + ) # mpi control - group.add_argument("--master_addr", metavar='\b', default="127.0.0.1", type=str, - help="Master node (rank 0)'s address, should be either " - "the IP address or the hostname of node 0, for " - "single node multi-proc training, the " - "--master_addr can simply be 127.0.0.1") - group.add_argument("--master_port", metavar='\b', default=29500, type=int, - help="Master node (rank 0)'s free port that needs to " - "be used for communication during distributed " - "training") - group.add_argument("--hostfile", metavar='\b', default="hostfile", type=str, - help="Hostfile is necessary for multi-node multi-proc " - "training. hostfile includes the node address list " - "node address which should be either the IP address" - "or the hostname.") - group.add_argument("--more_mpi_params", metavar='\b', default="", type=str, - help="User can pass more parameters for mpiexec.hydra " - "except for -np -ppn -hostfile and -genv I_MPI_PIN_DOMAIN") + group.add_argument( + "--master_addr", + metavar="\b", + default="127.0.0.1", + type=str, + help="Master node (rank 0)'s address, should be either " + "the IP address or the hostname of node 0, for " + "single node multi-proc training, the " + "--master_addr can simply be 127.0.0.1", + ) + group.add_argument( + "--master_port", + metavar="\b", + default=29500, + type=int, + help="Master node (rank 0)'s free port that needs to " + "be used for communication during distributed " + "training", + ) + group.add_argument( + "--hostfile", + metavar="\b", + default="hostfile", + type=str, + help="Hostfile is necessary for multi-node multi-proc " + "training. hostfile includes the node address list " + "node address which should be either the IP address" + "or the hostname.", + ) + group.add_argument( + "--more_mpi_params", + metavar="\b", + default="", + type=str, + help="User can pass more parameters for mpiexec.hydra " + "except for -np -ppn -hostfile and -genv I_MPI_PIN_DOMAIN", + ) def add_memory_allocator_params(parser): - group = parser.add_argument_group("Memory Allocator Parameters") # allocator control - group.add_argument("--enable_tcmalloc", action='store_true', default=False, - help="Enable tcmalloc allocator") - group.add_argument("--enable_jemalloc", action='store_true', default=False, - help="Enable jemalloc allocator") - group.add_argument("--use_default_allocator", action='store_true', default=False, - help="Use default memory allocator") + group.add_argument("--enable_tcmalloc", action="store_true", default=False, help="Enable tcmalloc allocator") + group.add_argument("--enable_jemalloc", action="store_true", default=False, help="Enable jemalloc allocator") + group.add_argument( + "--use_default_allocator", action="store_true", default=False, help="Use default memory allocator" + ) def add_multi_instance_params(parser): - group = parser.add_argument_group("Multi-instance Parameters") # multi-instance control - group.add_argument("--ncore_per_instance", metavar='\b', default=-1, type=int, - help="Cores per instance") - group.add_argument("--ninstances", metavar='\b', default=-1, type=int, - help="For multi-instance,\ \ - you should give the cores number you used for per instance.") - group.add_argument("--instance_idx", metavar='\b', default="-1", type=int, - help="Specify instance index to assign ncores_per_instance for instance_idx; \ + group.add_argument("--ncore_per_instance", metavar="\b", default=-1, type=int, help="Cores per instance") + group.add_argument( + "--ninstances", + metavar="\b", + default=-1, + type=int, + help="For multi-instance,\ \ + you should give the cores number you used for per instance.", + ) + group.add_argument( + "--instance_idx", + metavar="\b", + default="-1", + type=int, + help="Specify instance index to assign ncores_per_instance for instance_idx; \ otherwise ncore_per_instance will be assigned sequentially to ninstances. \ Please refer to https://github.com/intel/intel-extension-for-pytorch/\ - blob/master/docs/tutorials/performance_tuning/launch_script.md") - group.add_argument("--latency_mode", action='store_true', default=False, - help="By detault 4 core per instance and use all physical cores") - group.add_argument("--throughput_mode", action='store_true', default=False, - help="By default one instance per node and use all physical cores") - group.add_argument("--node_id", metavar='\b', default=-1, type=int, - help="node id for multi-instance, by default all nodes will be used") - group.add_argument("--use_logical_core", action='store_true', default=False, - help="Whether only use physical cores") - group.add_argument("--disable_numactl", action='store_true', default=False, - help="Disable numactl") - group.add_argument("--core_list", metavar='\b', default=None, type=str, - help="Specify the core list as 'core_id, core_id, ....', \ - otherwise, all the cores will be used.") - group.add_argument("--log_path", metavar='\b', default="", type=str, - help="The log file directory. Default path is '', \ - which means disable logging to files.") - group.add_argument("--log_file_prefix", metavar='\b', default="run", type=str, - help="log file prefix") + blob/master/docs/tutorials/performance_tuning/launch_script.md", + ) + group.add_argument( + "--latency_mode", + action="store_true", + default=False, + help="By detault 4 core per instance and use all physical cores", + ) + group.add_argument( + "--throughput_mode", + action="store_true", + default=False, + help="By default one instance per node and use all physical cores", + ) + group.add_argument( + "--node_id", + metavar="\b", + default=-1, + type=int, + help="node id for multi-instance, by default all nodes will be used", + ) + group.add_argument("--use_logical_core", action="store_true", default=False, help="Whether only use physical cores") + group.add_argument("--disable_numactl", action="store_true", default=False, help="Disable numactl") + group.add_argument( + "--core_list", + metavar="\b", + default=None, + type=str, + help="Specify the core list as 'core_id, core_id, ....', \ + otherwise, all the cores will be used.", + ) + group.add_argument( + "--log_path", + metavar="\b", + default="", + type=str, + help="The log file directory. Default path is '', \ + which means disable logging to files.", + ) + group.add_argument("--log_file_prefix", metavar="\b", default="run", type=str, help="log file prefix") def add_kmp_iomp_params(parser): - group = parser.add_argument_group("IOMP Parameters") - group.add_argument("--disable_iomp", action='store_true', default=False, - help="By default, we use Intel OpenMP and libiomp5.so will be add to LD_PRELOAD") + group.add_argument( + "--disable_iomp", + action="store_true", + default=False, + help="By default, we use Intel OpenMP and libiomp5.so will be add to LD_PRELOAD", + ) def parse_args(): @@ -684,40 +758,53 @@ def parse_args(): """ parser = ArgumentParser( description="This is a script for launching PyTorch training and inference on Intel Xeon CPU " - "with optimal configurations. Now, single instance inference/training, multi-instance " - "inference/training and distributed training with oneCCL backend is enabled. " - "To get the peak performance on Intel Xeon CPU, the script optimizes the configuration " - "of thread and memory management. For thread management, the script configures thread " - "affinity and the preload of Intel OMP library. For memory management, it configures " - "NUMA binding and preload optimized memory allocation library (e.g. tcmalloc, jemalloc) " - "\n################################# Basic usage ############################# \n" - "\n 1. single instance\n" - "\n >>> python -m intel_extension_for_pytorch.cpu.launch python_script args \n" - "\n2. multi-instance \n" - "\n >>> python -m intel_extension_for_pytorch.cpu.launch --ninstances xxx \ + "with optimal configurations. Now, single instance inference/training, multi-instance " + "inference/training and distributed training with oneCCL backend is enabled. " + "To get the peak performance on Intel Xeon CPU, the script optimizes the configuration " + "of thread and memory management. For thread management, the script configures thread " + "affinity and the preload of Intel OMP library. For memory management, it configures " + "NUMA binding and preload optimized memory allocation library (e.g. tcmalloc, jemalloc) " + "\n################################# Basic usage ############################# \n" + "\n 1. single instance\n" + "\n >>> python -m intel_extension_for_pytorch.cpu.launch python_script args \n" + "\n2. multi-instance \n" + "\n >>> python -m intel_extension_for_pytorch.cpu.launch --ninstances xxx \ --ncore_per_instance xx python_script args\n" - "\n3. Single-Node multi-process distributed training\n" - "\n >>> python -m intel_extension_for_pytorch.cpu.launch --distributed python_script args\n" - "\n4. Multi-Node multi-process distributed training: (e.g. two nodes)\n" - "\n rank 0: *(IP: 192.168.10.10, and has a free port: 295000)*\n" - "\n >>> python -m intel_extension_for_pytorch.cpu.launch --distributed --nproc_per_node=2\n" - "\n --nnodes=2 --hostfile hostfile python_script args\n" - "\n############################################################################# \n", - formatter_class=RawTextHelpFormatter) - - parser.add_argument("--multi_instance", action='store_true', default=False, - help="Enable multi-instance, by default one instance per node") - - parser.add_argument('--distributed', action='store_true', default=False, - help='Enable distributed training.') - parser.add_argument("-m", "--module", default=False, action="store_true", - help="Changes each process to interpret the launch script " - "as a python module, executing with the same behavior as" - "'python -m'.") - - parser.add_argument("--no_python", default=False, action="store_true", - help="Do not prepend the --program script with \"python\" - just exec " - "it directly. Useful when the script is not a Python script.") + "\n3. Single-Node multi-process distributed training\n" + "\n >>> python -m intel_extension_for_pytorch.cpu.launch --distributed python_script args\n" + "\n4. Multi-Node multi-process distributed training: (e.g. two nodes)\n" + "\n rank 0: *(IP: 192.168.10.10, and has a free port: 295000)*\n" + "\n >>> python -m intel_extension_for_pytorch.cpu.launch --distributed --nproc_per_node=2\n" + "\n --nnodes=2 --hostfile hostfile python_script args\n" + "\n############################################################################# \n", + formatter_class=RawTextHelpFormatter, + ) + + parser.add_argument( + "--multi_instance", + action="store_true", + default=False, + help="Enable multi-instance, by default one instance per node", + ) + + parser.add_argument("--distributed", action="store_true", default=False, help="Enable distributed training.") + parser.add_argument( + "-m", + "--module", + default=False, + action="store_true", + help="Changes each process to interpret the launch script " + "as a python module, executing with the same behavior as" + "'python -m'.", + ) + + parser.add_argument( + "--no_python", + default=False, + action="store_true", + help='Do not prepend the --program script with "python" - just exec ' + "it directly. Useful when the script is not a Python script.", + ) add_memory_allocator_params(parser) add_kmp_iomp_params(parser) @@ -725,12 +812,14 @@ def parse_args(): add_distributed_training_params(parser) add_multi_instance_params(parser) # positional - parser.add_argument("program", type=str, - help="The full path to the proram/script to be launched. " - "followed by all the arguments for the script") + parser.add_argument( + "program", + type=str, + help="The full path to the proram/script to be launched. " "followed by all the arguments for the script", + ) # rest from the training program - parser.add_argument('program_args', nargs=REMAINDER) + parser.add_argument("program_args", nargs=REMAINDER) return parser.parse_args() @@ -741,14 +830,37 @@ def exec_launcher(ncore_per_instance, ninstances, program, program_args, log_pat # args = parse_args() import argparse + args = argparse.Namespace( - multi_instance=True, distributed=False, module=False, no_python=False, enable_tcmalloc=False, - enable_jemalloc=False, use_default_allocator=False, disable_iomp=False, nnodes=1, nproc_per_node=2, - ccl_worker_count=4, master_addr='127.0.0.1', master_port=29500, hostfile='hostfile', more_mpi_params='', - ncore_per_instance=ncore_per_instance, ninstances=ninstances, instance_idx=-1, latency_mode=False, - throughput_mode=False, node_id=-1, - use_logical_core=False, disable_numactl=False, core_list=None, log_path=log_path, log_file_prefix='run', - program=program, program_args=program_args) + multi_instance=True, + distributed=False, + module=False, + no_python=False, + enable_tcmalloc=False, + enable_jemalloc=False, + use_default_allocator=False, + disable_iomp=False, + nnodes=1, + nproc_per_node=2, + ccl_worker_count=4, + master_addr="127.0.0.1", + master_port=29500, + hostfile="hostfile", + more_mpi_params="", + ncore_per_instance=ncore_per_instance, + ninstances=ninstances, + instance_idx=-1, + latency_mode=False, + throughput_mode=False, + node_id=-1, + use_logical_core=False, + disable_numactl=False, + core_list=None, + log_path=log_path, + log_file_prefix="run", + program=program, + program_args=program_args, + ) # if args.log_path: # path = os.path.dirname(args.log_path if args.log_path.endswith('/') else args.log_path + '/') @@ -763,19 +875,16 @@ def exec_launcher(ncore_per_instance, ninstances, program, program_args, log_pat # logger.addHandler(fileHandler) if args.distributed and args.multi_instance: - raise RuntimeError( - "Either args.distributed or args.multi_instance should be set") + raise RuntimeError("Either args.distributed or args.multi_instance should be set") if args.latency_mode and args.throughput_mode: - raise RuntimeError( - "Either args.latency_mode or args.throughput_mode should be set") + raise RuntimeError("Either args.latency_mode or args.throughput_mode should be set") if args.nnodes > 1: args.distributed = True if not args.no_python and not args.program.endswith(".py"): - logger.error( - "For non Python script, you should use '--no_python' parameter.") + logger.error("For non Python script, you should use '--no_python' parameter.") exit() # Verify LD_PRELOAD @@ -787,8 +896,7 @@ def exec_launcher(ncore_per_instance, ninstances, program, program_args, log_pat if len(matches) > 0: lst_valid.append(item) else: - logger.warning( - "{} doesn't exist. Removing it from LD_PRELOAD.".format(item)) + logger.warning("{} doesn't exist. Removing it from LD_PRELOAD.".format(item)) if len(lst_valid) > 0: os.environ["LD_PRELOAD"] = ":".join(lst_valid) else: @@ -802,4 +910,4 @@ def exec_launcher(ncore_per_instance, ninstances, program, program_args, log_pat launcher.launch(args) for x in sorted(set(os.environ.keys()) - env_before): - logger.debug('{0}={1}'.format(x, os.environ[x])) + logger.debug("{0}={1}".format(x, os.environ[x])) diff --git a/neural_coder/utils/pdf_report.py b/neural_coder/utils/pdf_report.py index ab1d4f26116..5b7b42fa376 100644 --- a/neural_coder/utils/pdf_report.py +++ b/neural_coder/utils/pdf_report.py @@ -400,7 +400,7 @@ # bc.categoryAxis.categoryNames = ["Default: " + str(format(TCO_raw, ',')) + " (sample/$)", # "Optimized: " + str(format(TCO_accelerated, ',')) + " (sample/$)"] # drawing.add(bc) - + # # add label # lab = Label() # lab.setOrigin(0, 0) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 2cf56182e99..5ee86bf561a 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -14,13 +14,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques.""" from .version import __version__ + # we need to set a global 'NA' backend, or Model can't be used -from .config import DistillationConfig, PostTrainingQuantConfig, \ - WeightPruningConfig, QuantizationAwareTrainingConfig, \ - MixedPrecisionConfig +from .config import ( + DistillationConfig, + PostTrainingQuantConfig, + WeightPruningConfig, + QuantizationAwareTrainingConfig, + MixedPrecisionConfig, +) from .contrib import * from .model import * from .metric import * diff --git a/neural_compressor/adaptor/__init__.py b/neural_compressor/adaptor/__init__.py index 6cfe19e402f..017ce8369f5 100644 --- a/neural_compressor/adaptor/__init__.py +++ b/neural_compressor/adaptor/__init__.py @@ -22,7 +22,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) __all__ = ["FRAMEWORKS"] diff --git a/neural_compressor/adaptor/adaptor.py b/neural_compressor/adaptor/adaptor.py index 0eda5e19bcb..a0aabda849c 100644 --- a/neural_compressor/adaptor/adaptor.py +++ b/neural_compressor/adaptor/adaptor.py @@ -17,185 +17,183 @@ from abc import abstractmethod -'''The framework backends supported by neural_compressor, including tensorflow, mxnet and pytorch. +"""The framework backends supported by neural_compressor, including tensorflow, mxnet and pytorch. User could add new backend support by implementing new Adaptor subclass under this directory. The naming convention of new Adaptor subclass should be something like ABCAdaptor, user could choose this framework backend by setting "abc" string in framework field of yaml. FRAMEWORKS variable is used to store all implemented Adaptor subclasses of framework backends. -''' +""" FRAMEWORKS = {} def adaptor_registry(cls): - '''The class decorator used to register all Adaptor subclasses. - - Args: - cls (class): The class of register. - ''' - assert cls.__name__.endswith( - 'Adaptor'), "The name of subclass of Adaptor should end with \'Adaptor\' substring." - if cls.__name__[:-len('Adaptor')].lower() in FRAMEWORKS: - raise ValueError('Cannot have two frameworks with the same name.') - FRAMEWORKS[cls.__name__[:-len('Adaptor')].lower()] = cls + """The class decorator used to register all Adaptor subclasses. + + Args: + cls (class): The class of register. + """ + assert cls.__name__.endswith("Adaptor"), "The name of subclass of Adaptor should end with 'Adaptor' substring." + if cls.__name__[: -len("Adaptor")].lower() in FRAMEWORKS: + raise ValueError("Cannot have two frameworks with the same name.") + FRAMEWORKS[cls.__name__[: -len("Adaptor")].lower()] = cls return cls class Adaptor(object): - '''The base class of framework adaptor layer. - - ''' + """The base class of framework adaptor layer.""" def __init__(self, framework_specific_info): pass @abstractmethod def quantize(self, tune_cfg, model, dataloader, q_func=None): - '''The function is used to do calibration and quanitization in post-training quantization. - - Args: - tune_cfg(dict): The chosen tuning configuration. - model (object): The model to do calibration. - dataloader(object): The dataloader used to load calibration dataset. - q_func (optional): training function for quantization aware training mode. - ''' + """The function is used to do calibration and quanitization in post-training quantization. + + Args: + tune_cfg(dict): The chosen tuning configuration. + model (object): The model to do calibration. + dataloader(object): The dataloader used to load calibration dataset. + q_func (optional): training function for quantization aware training mode. + """ raise NotImplementedError @abstractmethod - def evaluate(self, model, dataloader, postprocess=None, - metric=None, measurer=None, iteration=-1, tensorboard=False): - '''The function is used to run evaluation on validation dataset. - - Args: - model (object): The model to do calibration. - dataloader (generator): generate the data and labels. - postprocess (object, optional): process the result from the model - metric (object, optional): Depends on model category. Defaults to None. - measurer (object, optional): for precise benchmark measurement. - iteration(int, optional): control steps of mini-batch - tensorboard (boolean, optional): for tensorboard inspect tensor. - ''' + def evaluate( + self, model, dataloader, postprocess=None, metric=None, measurer=None, iteration=-1, tensorboard=False + ): + """The function is used to run evaluation on validation dataset. + + Args: + model (object): The model to do calibration. + dataloader (generator): generate the data and labels. + postprocess (object, optional): process the result from the model + metric (object, optional): Depends on model category. Defaults to None. + measurer (object, optional): for precise benchmark measurement. + iteration(int, optional): control steps of mini-batch + tensorboard (boolean, optional): for tensorboard inspect tensor. + """ raise NotImplementedError @abstractmethod def query_fw_capability(self, model): - '''The function is used to return framework tuning capability. + """The function is used to return framework tuning capability. - Args: - model (object): The model to query quantization tuning capability. - ''' + Args: + model (object): The model to query quantization tuning capability. + """ raise NotImplementedError @abstractmethod def query_fused_patterns(self, model): - '''The function is used to run fused patterns in framework. + """The function is used to run fused patterns in framework. - Args: - model (object): The model to do calibration. + Args: + model (object): The model to do calibration. - Return: - [['conv', 'relu'], ['conv', 'relu', 'bn']] - ''' + Return: + [['conv', 'relu'], ['conv', 'relu', 'bn']] + """ raise NotImplementedError @abstractmethod - def inspect_tensor(self, model, dataloader, op_list=[], iteration_list=[], - inspect_type='activation', save_to_disk=False): - '''The function is used by tune strategy class for dumping tensor info. - - Args: - model (object): The model to inspect. - dataloader (object): The dataloader used to feed into. - op_list (list): The op name in the fp32 model for dumpping. - iteration_list (list): The iteration list containing iterations to dump. - inspect_type (str): The valid value are 'weight', 'activation', 'all'. - save_to_disk (bool): Save to disk or memory. - - Return: - Numpy Array Dict - { - 'weight': { - 'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...}, - 'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...}, - ... - }, - 'activation': [ - # iter 0 - { - 'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...} - 'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...} - ... - }, - # iter 1 - ... - ] - } - ''' + def inspect_tensor( + self, model, dataloader, op_list=[], iteration_list=[], inspect_type="activation", save_to_disk=False + ): + """The function is used by tune strategy class for dumping tensor info. + + Args: + model (object): The model to inspect. + dataloader (object): The dataloader used to feed into. + op_list (list): The op name in the fp32 model for dumpping. + iteration_list (list): The iteration list containing iterations to dump. + inspect_type (str): The valid value are 'weight', 'activation', 'all'. + save_to_disk (bool): Save to disk or memory. + + Return: + Numpy Array Dict + { + 'weight': { + 'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...}, + 'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...}, + ... + }, + 'activation': [ + # iter 0 + { + 'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...} + 'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...} + ... + }, + # iter 1 + ... + ] + } + """ raise NotImplementedError @abstractmethod def set_tensor(self, model, tensor_dict): - '''The function is used by tune strategy class for setting tensor back to model. - - Args: - model (object): The model to set tensor. Usually it is quantized model. - tensor_dict (dict): The tensor dict to set. Note the numpy array contains float - value, adaptor layer has the responsibility to quantize to - int8 or int32 to set into the quantized model if needed. - The dict format is something like: - { - 'weight0_name': numpy.array, - 'bias0_name': numpy.array, - ... - } - ''' + """The function is used by tune strategy class for setting tensor back to model. + + Args: + model (object): The model to set tensor. Usually it is quantized model. + tensor_dict (dict): The tensor dict to set. Note the numpy array contains float + value, adaptor layer has the responsibility to quantize to + int8 or int32 to set into the quantized model if needed. + The dict format is something like: + { + 'weight0_name': numpy.array, + 'bias0_name': numpy.array, + ... + } + """ raise NotImplementedError def quantize_input(self, model): - ''' quantize the model to be able to take quantized input + """Quantize the model to be able to take quantized input. - Args: - model (object): The model to quantize input + Args: + model (object): The model to quantize input - Return: - model (object): The quantized input model - scale (float): The scale for dataloader to generate quantized input - ''' - return model, 1. + Return: + model (object): The quantized input model + scale (float): The scale for dataloader to generate quantized input + """ + return model, 1.0 @abstractmethod def _pre_eval_hook(self, model, *args, **kwargs): - '''The function is used to do some preprocession before evaluation phase. + """The function is used to do some preprocession before evaluation phase. Return: model - ''' + """ raise NotImplementedError @abstractmethod def _post_eval_hook(self, model, *args, **kwargs): - '''The function is used to do some post process after complete evaluation. - ''' + """The function is used to do some post process after complete evaluation.""" raise NotImplementedError @abstractmethod def save(self, model, path): - '''The function is used by tune strategy class for saving model. + """The function is used by tune strategy class for saving model. - Args: - model (object): The model to saved. - path (string): The path where to save. - ''' + Args: + model (object): The model to saved. + path (string): The path where to save. + """ raise NotImplementedError @abstractmethod def convert(self, model, source, destinatin): - '''The function is used to convert a source model format to another. + """The function is used to convert a source model format to another. - Args: - model (neural_compressor.model): base model to be converted. - source (string): The source model format. - destination (string): The destination model format. - ''' + Args: + model (neural_compressor.model): base model to be converted. + source (string): The source model format. + destination (string): The destination model format. + """ raise NotImplementedError diff --git a/neural_compressor/adaptor/keras.py b/neural_compressor/adaptor/keras.py index b80abf5d6d0..4a7e3bf36be 100644 --- a/neural_compressor/adaptor/keras.py +++ b/neural_compressor/adaptor/keras.py @@ -15,73 +15,93 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import copy import json -import yaml import math -import numpy as np +import os from collections import OrderedDict, UserDict -from .query import QueryBackendCapability -from .adaptor import adaptor_registry, Adaptor -from ..utils.utility import LazyImport, CpuInfo, singleton, Dequantize, dump_elapsed_time -from ..utils.utility import Statistics, GLOBAL_STATE, MODE, version1_lt_version2 -from ..utils import logger + +import numpy as np +import yaml + from ..conf.dotdict import deep_get from ..data.dataloaders.base_dataloader import BaseDataLoader -tf = LazyImport('tensorflow') +from ..utils import logger +from ..utils.utility import ( + GLOBAL_STATE, + MODE, + CpuInfo, + Dequantize, + LazyImport, + Statistics, + dump_elapsed_time, + singleton, + version1_lt_version2, +) +from .adaptor import Adaptor, adaptor_registry +from .query import QueryBackendCapability + +tf = LazyImport("tensorflow") + def _add_supported_quantized_objects(custom_objects): - """Map all the quantized objects.""" - from neural_compressor.adaptor.keras_utils.quantizer import Quantize, DeQuantize - from neural_compressor.adaptor.keras_utils.quantizer import FakeQuant - from neural_compressor.adaptor.keras_utils.conv2d import QConv2D - from neural_compressor.adaptor.keras_utils.depthwise_conv2d import QDepthwiseConv2D - from neural_compressor.adaptor.keras_utils.separable_conv2d import QSeparableConv2D - from neural_compressor.adaptor.keras_utils.dense import QDense - from neural_compressor.adaptor.keras_utils.pool2d import QMaxPool2D, QAvgPool2D - custom_objects["Quantize"] = Quantize - custom_objects["DeQuantize"] = DeQuantize - custom_objects["FakeQuant"] = FakeQuant - custom_objects["QConv2D"] = QConv2D - custom_objects["QDepthwiseConv2D"] = QDepthwiseConv2D - custom_objects["QSeparableConv2D"] = QSeparableConv2D - custom_objects["QDense"] = QDense - custom_objects["QMaxPool2D"] = QMaxPool2D - custom_objects["QAvgPool2D"] = QAvgPool2D - custom_objects["QMaxPooling2D"] = QMaxPool2D - custom_objects["QAveragePooling2D"] = QAvgPool2D - return custom_objects + """Map all the quantized objects.""" + from neural_compressor.adaptor.keras_utils.conv2d import QConv2D + from neural_compressor.adaptor.keras_utils.dense import QDense + from neural_compressor.adaptor.keras_utils.depthwise_conv2d import QDepthwiseConv2D + from neural_compressor.adaptor.keras_utils.pool2d import QAvgPool2D, QMaxPool2D + from neural_compressor.adaptor.keras_utils.quantizer import DeQuantize, FakeQuant, Quantize + from neural_compressor.adaptor.keras_utils.separable_conv2d import QSeparableConv2D + + custom_objects["Quantize"] = Quantize + custom_objects["DeQuantize"] = DeQuantize + custom_objects["FakeQuant"] = FakeQuant + custom_objects["QConv2D"] = QConv2D + custom_objects["QDepthwiseConv2D"] = QDepthwiseConv2D + custom_objects["QSeparableConv2D"] = QSeparableConv2D + custom_objects["QDense"] = QDense + custom_objects["QMaxPool2D"] = QMaxPool2D + custom_objects["QAvgPool2D"] = QAvgPool2D + custom_objects["QMaxPooling2D"] = QMaxPool2D + custom_objects["QAveragePooling2D"] = QAvgPool2D + return custom_objects + @adaptor_registry class KerasAdaptor(Adaptor): - '''The keras class of framework adaptor layer. + """The keras class of framework adaptor layer.""" - ''' def __init__(self, framework_specific_info): super(KerasAdaptor, self).__init__(framework_specific_info) self.framework_specific_info = framework_specific_info - self.approach = deep_get(self.framework_specific_info, 'approach', False) - self.quantize_config = {'op_wise_config': {}} - self.device = self.framework_specific_info['device'] - self.backend = self.framework_specific_info['backend'] - #self.work_dir = os.path.abspath(self.framework_specific_info['workspace_path']) - self.recipes = deep_get(self.framework_specific_info, 'recipes', {}) - #os.makedirs(self.work_dir, exist_ok=True) - self.supported_op = ['Conv2D', 'Dense', 'SeparableConv2D', 'DepthwiseConv2D', 'AveragePooling2D', - 'MaxPooling2D', 'AvgPool2D', 'MaxPool2D'] + self.approach = deep_get(self.framework_specific_info, "approach", False) + self.quantize_config = {"op_wise_config": {}} + self.device = self.framework_specific_info["device"] + self.backend = self.framework_specific_info["backend"] + # self.work_dir = os.path.abspath(self.framework_specific_info['workspace_path']) + self.recipes = deep_get(self.framework_specific_info, "recipes", {}) + # os.makedirs(self.work_dir, exist_ok=True) + self.supported_op = [ + "Conv2D", + "Dense", + "SeparableConv2D", + "DepthwiseConv2D", + "AveragePooling2D", + "MaxPooling2D", + "AvgPool2D", + "MaxPool2D", + ] self.pre_optimized_object = None self.pre_optimized_model = None self.pre_optimizer_handle = None self.bf16_ops = [] self.fp32_ops = [] - self.query_handler = KerasQuery(local_config_file=os.path.join( - os.path.dirname(__file__), 'keras.yaml')) + self.query_handler = KerasQuery(local_config_file=os.path.join(os.path.dirname(__file__), "keras.yaml")) self.fp32_results = [] self.fp32_preds_as_label = False - self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK) + self.benchmark = GLOBAL_STATE.STATE == MODE.BENCHMARK self.callbacks = [] self.optype_statistics = None @@ -91,52 +111,49 @@ def _check_itex(self): try: import intel_extension_for_tensorflow except: - raise ImportError("The Intel® Extension for TensorFlow is not installed. "\ - "Please install it to run models on ITEX backend") + raise ImportError( + "The Intel® Extension for TensorFlow is not installed. " + "Please install it to run models on ITEX backend" + ) def tuning_cfg_to_fw(self, tuning_cfg): - self.quantize_config['calib_iteration'] = tuning_cfg['calib_iteration'] - self.quantize_config['device'] = self.device - self.quantize_config['advance'] = deep_get(tuning_cfg, 'advance') + self.quantize_config["calib_iteration"] = tuning_cfg["calib_iteration"] + self.quantize_config["device"] = self.device + self.quantize_config["advance"] = deep_get(tuning_cfg, "advance") fp32_ops = [] bf16_ops = [] - bf16_type = set(self.query_handler.get_op_types_by_precision(precision='bf16')) - dispatched_op_names = [j[0] for j in tuning_cfg['op']] - invalid_op_names = [i for i in self.quantize_config['op_wise_config'] - if i not in dispatched_op_names] + bf16_type = set(self.query_handler.get_op_types_by_precision(precision="bf16")) + dispatched_op_names = [j[0] for j in tuning_cfg["op"]] + invalid_op_names = [i for i in self.quantize_config["op_wise_config"] if i not in dispatched_op_names] for op_name in invalid_op_names: - self.quantize_config['op_wise_config'].pop(op_name) + self.quantize_config["op_wise_config"].pop(op_name) - for each_op_info in tuning_cfg['op']: + for each_op_info in tuning_cfg["op"]: op_name = each_op_info[0] - if tuning_cfg['op'][each_op_info]['activation']['dtype'] == 'bf16': + if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16": if each_op_info[1] in bf16_type: bf16_ops.append(op_name) continue - if tuning_cfg['op'][each_op_info]['activation']['dtype'] == 'fp32': - if op_name in self.quantize_config['op_wise_config']: - self.quantize_config['op_wise_config'].pop(op_name) + if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "fp32": + if op_name in self.quantize_config["op_wise_config"]: + self.quantize_config["op_wise_config"].pop(op_name) fp32_ops.append(op_name) continue is_perchannel = False bit = None - if 'weight' in tuning_cfg['op'][each_op_info]: - is_perchannel = tuning_cfg['op'][each_op_info]['weight'][ - 'granularity'] == 'per_channel' - #bit = tuning_cfg['op'][each_op_info]['weight']['bit'] + if "weight" in tuning_cfg["op"][each_op_info]: + is_perchannel = tuning_cfg["op"][each_op_info]["weight"]["granularity"] == "per_channel" + # bit = tuning_cfg['op'][each_op_info]['weight']['bit'] weight_bit = bit if bit else 7.0 - algorithm = tuning_cfg['op'][each_op_info]['activation']['algorithm'] + algorithm = tuning_cfg["op"][each_op_info]["activation"]["algorithm"] is_asymmetric = False - if 'activation' in tuning_cfg['op'][each_op_info]: - is_asymmetric = tuning_cfg['op'][each_op_info]['activation']['scheme'] == 'asym' - self.quantize_config['op_wise_config'][op_name] = (is_perchannel, - algorithm, - is_asymmetric, - weight_bit) + if "activation" in tuning_cfg["op"][each_op_info]: + is_asymmetric = tuning_cfg["op"][each_op_info]["activation"]["scheme"] == "asym" + self.quantize_config["op_wise_config"][op_name] = (is_perchannel, algorithm, is_asymmetric, weight_bit) self.bf16_ops = bf16_ops self.bf16_ops.pop(-1) self.fp32_ops = fp32_ops @@ -153,32 +170,35 @@ def _check_quantize_format(self, model): name_op_map = {} for idx, layer in enumerate(copy.deepcopy(fp32_layers)): - name_op_map[layer['config']['name']] = layer + name_op_map[layer["config"]["name"]] = layer for idx, layer in enumerate(copy.deepcopy(fp32_layers)): - layer_config = layer['config'] - if layer['class_name'] in self.supported_op: - if 'inbound_nodes' in layer: - check_layer = name_op_map[layer['inbound_nodes'][0][0][0]] + layer_config = layer["config"] + if layer["class_name"] in self.supported_op: + if "inbound_nodes" in layer: + check_layer = name_op_map[layer["inbound_nodes"][0][0][0]] else: check_layer = fp32_layers[idx - 1] - if check_layer['class_name'] in ['Activation'] and \ - check_layer['config']['activation'] in ['relu']: - self.conv_format[layer['config']['name']] = 'u8' + if check_layer["class_name"] in ["Activation"] and check_layer["config"]["activation"] in ["relu"]: + self.conv_format[layer["config"]["name"]] = "u8" else: - self.conv_format[layer['config']['name']] = 's8' + self.conv_format[layer["config"]["name"]] = "s8" return model def _fuse_bn(self, model): json_model = copy.deepcopy(json.loads(model.to_json())) config = json_model["config"] fp32_layers = config["layers"] - def fuse_conv_bn(conv_weight, bn_weight, conv_type='Conv2D', eps=1.0e-5): - assert conv_type in ['Conv2D', 'DepthwiseConv2D', 'SeparableConv2D'], \ - 'only support Conv2D, DepthwiseConv2D, SeparableConv2D...' + + def fuse_conv_bn(conv_weight, bn_weight, conv_type="Conv2D", eps=1.0e-5): + assert conv_type in [ + "Conv2D", + "DepthwiseConv2D", + "SeparableConv2D", + ], "only support Conv2D, DepthwiseConv2D, SeparableConv2D..." if len(bn_weight) > 3: - if conv_type == 'DepthwiseConv2D': - gamma = bn_weight[0].reshape(1, 1, bn_weight[0].shape[0], 1) + if conv_type == "DepthwiseConv2D": + gamma = bn_weight[0].reshape(1, 1, bn_weight[0].shape[0], 1) var = bn_weight[3].reshape(1, 1, bn_weight[3].shape[0], 1) else: gamma = bn_weight[0].reshape(1, 1, 1, bn_weight[0].shape[0]) @@ -186,10 +206,10 @@ def fuse_conv_bn(conv_weight, bn_weight, conv_type='Conv2D', eps=1.0e-5): beta = bn_weight[1] mean = bn_weight[2] else: - gamma = 1. + gamma = 1.0 beta = bn_weight[0] mean = bn_weight[1] - if conv_type == 'DepthwiseConv2D': + if conv_type == "DepthwiseConv2D": var = bn_weight[2].reshape(1, 1, bn_weight[2].shape[0], 1) else: var = bn_weight[2].reshape(1, 1, 1, bn_weight[2].shape[0]) @@ -197,11 +217,11 @@ def fuse_conv_bn(conv_weight, bn_weight, conv_type='Conv2D', eps=1.0e-5): if len(conv_weight) == 1: weight = conv_weight[0] bias = np.zeros_like(beta) - elif len(conv_weight) == 2 and conv_type == 'SeparableConv2D': + elif len(conv_weight) == 2 and conv_type == "SeparableConv2D": depth_weight = conv_weight[0] weight = conv_weight[1] bias = np.zeros_like(beta) - elif len(conv_weight) == 2 and conv_type != 'SeparableConv2D': + elif len(conv_weight) == 2 and conv_type != "SeparableConv2D": weight = conv_weight[0] bias = conv_weight[1] elif len(conv_weight) == 3: @@ -212,153 +232,163 @@ def fuse_conv_bn(conv_weight, bn_weight, conv_type='Conv2D', eps=1.0e-5): weight = weight * scale_value bias = beta + (bias - mean) * scale_value.reshape(-1) bias = bias.reshape(-1) - return [depth_weight, weight, bias] if conv_type == 'SeparableConv2D' \ - else [weight, bias] + return [depth_weight, weight, bias] if conv_type == "SeparableConv2D" else [weight, bias] node_map = {} for idx, layer in enumerate(copy.deepcopy(fp32_layers)): - layer_config = layer['config'] - if 'inbound_nodes' in layer: - node_map[layer['name']] = layer + layer_config = layer["config"] + if "inbound_nodes" in layer: + node_map[layer["name"]] = layer fuse_layers = [] fold_conv = [] for idx, layer in enumerate(copy.deepcopy(fp32_layers)): - layer_config = layer['config'] - if 'inbound_nodes' in layer: - if layer['class_name'] in ['BatchNormalization']: - bn_inbound_node = node_map[layer_config['name']]['inbound_nodes'][0][0] + layer_config = layer["config"] + if "inbound_nodes" in layer: + if layer["class_name"] in ["BatchNormalization"]: + bn_inbound_node = node_map[layer_config["name"]]["inbound_nodes"][0][0] if bn_inbound_node[0] in self.conv_weights.keys(): conv_weight = self.conv_weights[bn_inbound_node[0]] conv_layer = node_map[bn_inbound_node[0]] - bn_weight = self.bn_weights[layer_config['name']] + bn_weight = self.bn_weights[layer_config["name"]] self.layer_weights[bn_inbound_node[0]] = fuse_conv_bn( - conv_weight, bn_weight, conv_layer['class_name'], layer['config']['epsilon']) + conv_weight, bn_weight, conv_layer["class_name"], layer["config"]["epsilon"] + ) fold_conv.append(bn_inbound_node[0]) else: fuse_layers.append(layer) - elif len(layer['inbound_nodes']): + elif len(layer["inbound_nodes"]): new_bound_nodes = [] # OpLambda node will have different bound node - if layer['class_name'] in ['TFOpLambda', 'SlicingOpLambda']: + if layer["class_name"] in ["TFOpLambda", "SlicingOpLambda"]: fuse_layers.append(layer) else: - for bound_node in layer['inbound_nodes'][0]: + for bound_node in layer["inbound_nodes"][0]: if bound_node[0] in self.bn_weights.keys(): - bn_inbound_node = node_map[bound_node[0]]['inbound_nodes'][0][0] + bn_inbound_node = node_map[bound_node[0]]["inbound_nodes"][0][0] if bn_inbound_node[0] in self.conv_weights.keys(): new_bound_nodes.append(bn_inbound_node) else: new_bound_nodes.append(bound_node) else: new_bound_nodes.append(bound_node) - layer['inbound_nodes'] = [new_bound_nodes] + layer["inbound_nodes"] = [new_bound_nodes] fuse_layers.append(layer) else: fuse_layers.append(layer) else: - if idx > 0 and layer['class_name'] in ['BatchNormalization'] and \ - fp32_layers[idx - 1]['class_name'] in ['Conv2D']: - conv_name = fp32_layers[idx - 1]['config']['name'] + if ( + idx > 0 + and layer["class_name"] in ["BatchNormalization"] + and fp32_layers[idx - 1]["class_name"] in ["Conv2D"] + ): + conv_name = fp32_layers[idx - 1]["config"]["name"] conv_weight = self.conv_weights[conv_name] - bn_weight = self.bn_weights[layer_config['name']] - conv_type = fp32_layers[idx - 1]['class_name'] + bn_weight = self.bn_weights[layer_config["name"]] + conv_type = fp32_layers[idx - 1]["class_name"] self.layer_weights[conv_name] = fuse_conv_bn( - conv_weight, bn_weight, conv_type, layer['config']['epsilon']) + conv_weight, bn_weight, conv_type, layer["config"]["epsilon"] + ) fold_conv.append(conv_name) else: fuse_layers.append(layer) # bn folding will have a shift bias for idx, layer in enumerate(fuse_layers): - layer_config = layer['config'] - if layer['class_name'] in ['Conv2D', 'DepthwiseConv2D', 'SeparableConv2D'] and \ - layer_config['name'] in fold_conv: - layer_config['use_bias'] = True - - json_model['config']['layers'] = fuse_layers + layer_config = layer["config"] + if ( + layer["class_name"] in ["Conv2D", "DepthwiseConv2D", "SeparableConv2D"] + and layer_config["name"] in fold_conv + ): + layer_config["use_bias"] = True + + json_model["config"]["layers"] = fuse_layers fused_model = self._restore_model_from_json(json_model) return fused_model @dump_elapsed_time("Pass quantize model") def quantize(self, tune_cfg, model, dataloader, q_func=None): - '''Execute the quantize process on the specified model. - - Args: - tune_cfg(dict): The chosen tuning configuration. - model (object): The model to do quantization. - dataloader(object): The dataloader used to load quantization dataset. - q_func (optional): training function for quantization aware training mode. - ''' + """Execute the quantize process on the specified model. + + Args: + tune_cfg(dict): The chosen tuning configuration. + model (object): The model to do quantization. + dataloader(object): The dataloader used to load quantization dataset. + q_func (optional): training function for quantization aware training mode. + """ self.tuning_cfg_to_fw(tune_cfg) # just convert the input model to mixed_bfloat16 - if self.bf16_ops and not self.quantize_config['op_wise_config']: + if self.bf16_ops and not self.quantize_config["op_wise_config"]: converted_model = self.convert_bf16() return converted_model - - if self.backend == 'itex': + + if self.backend == "itex": self._check_itex() logger.debug("Dump quantization configurations:") logger.debug(self.quantize_config) - calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) + calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) if isinstance(dataloader, BaseDataLoader): batch_size = dataloader.batch_size for i in range(batch_size): if calib_sampling_size % (batch_size - i) == 0: calib_batch_size = batch_size - i if i != 0: # pragma: no cover - logger.warning("Reset `calibration.dataloader.batch_size` field " - "to {}".format(calib_batch_size) + - " to make sure the sampling_size is " - "divisible exactly by batch size") + logger.warning( + "Reset `calibration.dataloader.batch_size` field " + "to {}".format(calib_batch_size) + " to make sure the sampling_size is " + "divisible exactly by batch size" + ) break tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size)) dataloader.batch(calib_batch_size) - self.quantize_config['calib_iteration'] = tmp_iterations + self.quantize_config["calib_iteration"] = tmp_iterations - else: # pragma: no cover - if hasattr(dataloader, 'batch_size') and \ - calib_sampling_size % dataloader.batch_size != 0: - iter = self.quantize_config['calib_iteration'] + else: # pragma: no cover + if hasattr(dataloader, "batch_size") and calib_sampling_size % dataloader.batch_size != 0: + iter = self.quantize_config["calib_iteration"] logger.warning( - "Please note that calibration sampling size {} " \ - "isn't divisible exactly by batch size {}. " \ - "So the real sampling size is {}.". - format(calib_sampling_size, dataloader.batch_size, - dataloader.batch_size * iter)) + "Please note that calibration sampling size {} " + "isn't divisible exactly by batch size {}. " + "So the real sampling size is {}.".format( + calib_sampling_size, dataloader.batch_size, dataloader.batch_size * iter + ) + ) q_layers = [] self.inbound_nodes_map = {} for idx, layer in enumerate(copy.deepcopy(self.fp32_layers)): - layer_config = layer["config"] - if layer["class_name"] in self.supported_op and \ - layer['config']['name'] in self.quantize_config['op_wise_config']: - op_config = self.quantize_config['op_wise_config'][layer['config']['name']] - mode = 'per_channel' if op_config[0] else 'per_tensor' - fake_q_name = 'fake_quant_' + str(idx) - fake_q_layer = {'class_name': 'FakeQuant', - 'name': fake_q_name, - 'T': self.conv_format[layer['config']['name']], - 'config': {'mode': 'per_tensor', 'name': fake_q_name}, - } - if 'inbound_nodes' in layer: - fake_q_layer['inbound_nodes'] = layer['inbound_nodes'] - layer['inbound_nodes'] = [[[fake_q_name, 0, 0, {}]]] - self.inbound_nodes_map[fake_q_name] = layer - - q_layers.append(fake_q_layer) - q_layers.append(layer) - else: - q_layers.append(layer) - + layer_config = layer["config"] + if ( + layer["class_name"] in self.supported_op + and layer["config"]["name"] in self.quantize_config["op_wise_config"] + ): + op_config = self.quantize_config["op_wise_config"][layer["config"]["name"]] + mode = "per_channel" if op_config[0] else "per_tensor" + fake_q_name = "fake_quant_" + str(idx) + fake_q_layer = { + "class_name": "FakeQuant", + "name": fake_q_name, + "T": self.conv_format[layer["config"]["name"]], + "config": {"mode": "per_tensor", "name": fake_q_name}, + } + if "inbound_nodes" in layer: + fake_q_layer["inbound_nodes"] = layer["inbound_nodes"] + layer["inbound_nodes"] = [[[fake_q_name, 0, 0, {}]]] + self.inbound_nodes_map[fake_q_name] = layer + + q_layers.append(fake_q_layer) + q_layers.append(layer) + else: + q_layers.append(layer) + json_model = copy.deepcopy(json.loads(self.pre_optimized_object.to_json())) - json_model['config']['layers'] = q_layers + json_model["config"]["layers"] = q_layers quantized_model = self._restore_model_from_json(json_model) - converted_model = self._calibrate(quantized_model, dataloader, - self.quantize_config['calib_iteration']) + converted_model = self._calibrate(quantized_model, dataloader, self.quantize_config["calib_iteration"]) from neural_compressor.model.keras_model import KerasModel + converted_model = KerasModel(converted_model) return converted_model @@ -372,18 +402,17 @@ def _calibrate(self, model, dataloader, calib_interation): config = json_model["config"] layers = config["layers"] for layer in layers: - if layer['class_name'] == 'FakeQuant': - min_value = layer['config']['min_value'] - max_value = layer['config']['max_value'] - if layer['config']['name'] not in results: - results[layer['config']['name']] = { - 'min': [min_value], 'max': [max_value]} + if layer["class_name"] == "FakeQuant": + min_value = layer["config"]["min_value"] + max_value = layer["config"]["max_value"] + if layer["config"]["name"] not in results: + results[layer["config"]["name"]] = {"min": [min_value], "max": [max_value]} else: - results[layer['config']['name']]['min'].append(min_value) - results[layer['config']['name']]['max'].append(max_value) - if idx + 1 == calib_interation: + results[layer["config"]["name"]]["min"].append(min_value) + results[layer["config"]["name"]]["max"].append(max_value) + if idx + 1 == calib_interation: break - + # insert the calibrated min/max to Q/DQ json_model = copy.deepcopy(json.loads(model.to_json())) config = json_model["config"] @@ -391,100 +420,105 @@ def _calibrate(self, model, dataloader, calib_interation): q_layers = [] # quantize_mode = self._check_quantize_mode(json_model) inbound_reverse_map = {} - for idx, layer in enumerate(layers): - layer_config = copy.deepcopy(layer['config']) - if layer['class_name'] == 'FakeQuant': - min_value = min(results[layer['config']['name']]['min']) - max_value = max(results[layer['config']['name']]['max']) - quantize_layer = {'class_name': 'Quantize', - 'name': 'quantize_' + str(idx), - 'config': {'min_range': min_value, - 'max_range': max_value, - 'T': layer_config['T'], - 'name': 'quantize_' + str(idx), - }} - dequantize_layer = {'class_name': 'DeQuantize', - 'name': 'dequantize_' + str(idx), - 'config': {'min_range': min_value, - 'max_range': max_value, - # 'mode': quantize_mode, - 'name': 'dequantize_' + str(idx), - }} - if 'inbound_nodes' in layer: - quantize_layer['inbound_nodes'] = layer['inbound_nodes'] - dequantize_layer['inbound_nodes'] = [[['quantize_' + str(idx), 0, 0, {}]]] - # find the conv/dense layer from fake quant map and + for idx, layer in enumerate(layers): + layer_config = copy.deepcopy(layer["config"]) + if layer["class_name"] == "FakeQuant": + min_value = min(results[layer["config"]["name"]]["min"]) + max_value = max(results[layer["config"]["name"]]["max"]) + quantize_layer = { + "class_name": "Quantize", + "name": "quantize_" + str(idx), + "config": { + "min_range": min_value, + "max_range": max_value, + "T": layer_config["T"], + "name": "quantize_" + str(idx), + }, + } + dequantize_layer = { + "class_name": "DeQuantize", + "name": "dequantize_" + str(idx), + "config": { + "min_range": min_value, + "max_range": max_value, + # 'mode': quantize_mode, + "name": "dequantize_" + str(idx), + }, + } + if "inbound_nodes" in layer: + quantize_layer["inbound_nodes"] = layer["inbound_nodes"] + dequantize_layer["inbound_nodes"] = [[["quantize_" + str(idx), 0, 0, {}]]] + # find the conv/dense layer from fake quant map and # change the conv/dense node inbound to dequantize - layer_name = self.inbound_nodes_map[layer['name']]['name'] - inbound_reverse_map[layer_name] = [[['dequantize_' + str(idx), 0, 0, {}]]] - + layer_name = self.inbound_nodes_map[layer["name"]]["name"] + inbound_reverse_map[layer_name] = [[["dequantize_" + str(idx), 0, 0, {}]]] + q_layers.append(quantize_layer) q_layers.append(dequantize_layer) - elif layer['class_name'] in self.supported_op and \ - layer['config']['name'] in self.quantize_config['op_wise_config']: + elif ( + layer["class_name"] in self.supported_op + and layer["config"]["name"] in self.quantize_config["op_wise_config"] + ): # index 0 is weight, index 1 is bias - q_layer_name = 'Q' + layer['class_name'] + q_layer_name = "Q" + layer["class_name"] # this is for inbounds search - q_name = layer['config']['name'] + q_name = layer["config"]["name"] # for layers that have weights - if layer['config']['name'] in self.layer_weights: - kernel = self.layer_weights[layer['config']['name']][0] - dim = list(range(0, kernel.ndim)) + if layer["config"]["name"] in self.layer_weights: + kernel = self.layer_weights[layer["config"]["name"]][0] + dim = list(range(0, kernel.ndim)) t_dim = [dim.pop(-1)] t_dim.extend(dim) channel_size = kernel.shape[-1] kernel_channel = kernel.transpose(t_dim).reshape(channel_size, -1) - layer_config['min_value'] = json.dumps(\ - np.min(kernel_channel, axis=1).tolist()) - layer_config['max_value'] = json.dumps(\ - np.max(kernel_channel, axis=1).tolist()) + layer_config["min_value"] = json.dumps(np.min(kernel_channel, axis=1).tolist()) + layer_config["max_value"] = json.dumps(np.max(kernel_channel, axis=1).tolist()) else: # default value, but never expected to be used # cause no kernel weights for this layer - layer_config['min_value'] = json.dumps([-10000]) - layer_config['max_value'] = json.dumps([10000]) - layer_config['name'] = q_name - q_layer = {'class_name': q_layer_name, - 'name': q_name, - 'config': layer_config} - if 'inbound_nodes' in layer: - q_layer['inbound_nodes'] = inbound_reverse_map[layer['name']] + layer_config["min_value"] = json.dumps([-10000]) + layer_config["max_value"] = json.dumps([10000]) + layer_config["name"] = q_name + q_layer = {"class_name": q_layer_name, "name": q_name, "config": layer_config} + if "inbound_nodes" in layer: + q_layer["inbound_nodes"] = inbound_reverse_map[layer["name"]] q_layers.append(q_layer) else: q_layers.append(layer) - json_model['config']['layers'] = q_layers + json_model["config"]["layers"] = q_layers quantized_model = self._restore_model_from_json(json_model) return quantized_model def convert_bf16(self): - '''Execute the BF16 conversion. - ''' - tf.keras.mixed_precision.set_global_policy('mixed_bfloat16') + """Execute the BF16 conversion.""" + tf.keras.mixed_precision.set_global_policy("mixed_bfloat16") json_model = copy.deepcopy(json.loads(self.pre_optimized_object.to_json())) - for layer in json_model['config']['layers']: - if layer['config']['name'] in self.bf16_ops: - layer['config']['dtype'] = 'mixed_bfloat16' - + for layer in json_model["config"]["layers"]: + if layer["config"]["name"] in self.bf16_ops: + layer["config"]["dtype"] = "mixed_bfloat16" + converted_model = self._restore_model_from_json(json_model) - tf.keras.mixed_precision.set_global_policy('float32') - + tf.keras.mixed_precision.set_global_policy("float32") + from neural_compressor.model.keras_model import KerasModel + converted_model = KerasModel(converted_model) return converted_model - #(TODO) choose the properly quantize mode + # (TODO) choose the properly quantize mode def _check_quantize_mode(self, json_model): config = json_model["config"] layers = config["layers"] - for idx, layer in enumerate(layers): - if 'ReLU' in layer['class_name']: - return 'MIN_FIRST' - return 'SCALED' + for idx, layer in enumerate(layers): + if "ReLU" in layer["class_name"]: + return "MIN_FIRST" + return "SCALED" def _restore_model_from_json(self, json_model): from tensorflow.keras.models import model_from_json + custom_objects = {} # We need to keep a dictionary of custom objects as our quantized library # is not recognized by keras. @@ -502,31 +536,39 @@ def _set_weights(self, qmodel, layer_weights): qlayer.set_weights(layer_weights[qlayer.name]) else: hit_layer = False - for sub_layer in qlayer.submodules: + for sub_layer in qlayer.submodules: if sub_layer.name in layer_weights: qlayer.set_weights(layer_weights[sub_layer.name]) hit_layer = True break if not hit_layer: - raise ValueError('Can not match the module weights....') + raise ValueError("Can not match the module weights....") return qmodel @dump_elapsed_time(customized_msg="Model inference") - def evaluate(self, model, dataloader, postprocess=None, - metrics=None, measurer=None, iteration=-1, - tensorboard=False, fp32_baseline=False): - '''The function is used to run evaluation on validation dataset. - - Args: - model (object): The model to do calibration. - dataloader (generator): generate the data and labels. - postprocess (object, optional): process the result from the model - metric (object, optional): Depends on model category. Defaults to None. - measurer (object, optional): for precise benchmark measurement. - iteration(int, optional): control steps of mini-batch - tensorboard (boolean, optional): for tensorboard inspect tensor. - fp32_baseline (boolen, optional): only for compare_label=False pipeline - ''' + def evaluate( + self, + model, + dataloader, + postprocess=None, + metrics=None, + measurer=None, + iteration=-1, + tensorboard=False, + fp32_baseline=False, + ): + """The function is used to run evaluation on validation dataset. + + Args: + model (object): The model to do calibration. + dataloader (generator): generate the data and labels. + postprocess (object, optional): process the result from the model + metric (object, optional): Depends on model category. Defaults to None. + measurer (object, optional): for precise benchmark measurement. + iteration(int, optional): control steps of mini-batch + tensorboard (boolean, optional): for tensorboard inspect tensor. + fp32_baseline (boolen, optional): only for compare_label=False pipeline + """ # use keras object keras_model = model.model logger.info("Start to evaluate the Keras model.") @@ -541,15 +583,15 @@ def evaluate(self, model, dataloader, postprocess=None, predictions = keras_model.predict_on_batch(inputs) if self.fp32_preds_as_label: - self.fp32_results.append(predictions) if fp32_baseline else \ - results.append(predictions) + self.fp32_results.append(predictions) if fp32_baseline else results.append(predictions) if postprocess is not None: predictions, labels = postprocess((predictions, labels)) if metrics: for metric in metrics: - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): + if not hasattr(metric, "compare_label") or ( + hasattr(metric, "compare_label") and metric.compare_label + ): metric.update(predictions, labels) if idx + 1 == iteration: break @@ -559,22 +601,22 @@ def evaluate(self, model, dataloader, postprocess=None, return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] def query_fw_capability(self, model): - '''The function is used to return framework tuning capability. - - Args: - model (object): The model to query quantization tuning capability. - ''' - fp32_config = {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}} - bf16_config = {'weight': {'dtype': 'bf16'}, 'activation': {'dtype': 'bf16'}} - int8_type = self.query_handler.get_op_types_by_precision(precision='int8') + """The function is used to return framework tuning capability. + + Args: + model (object): The model to query quantization tuning capability. + """ + fp32_config = {"weight": {"dtype": "fp32"}, "activation": {"dtype": "fp32"}} + bf16_config = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}} + int8_type = self.query_handler.get_op_types_by_precision(precision="int8") op_capability = self.query_handler.get_quantization_capability() - conv_config = copy.deepcopy(op_capability['int8']['Conv2D']) - conv_config = copy.deepcopy(op_capability['int8']['SeparableConv2D']) - conv_config = copy.deepcopy(op_capability['int8']['DepthwiseConv2D']) - dense_config = copy.deepcopy(op_capability['int8']['Dense']) - maxpool_config = copy.deepcopy(op_capability['int8']['MaxPooling2D']) - avgpool_config = copy.deepcopy(op_capability['int8']['AveragePooling2D']) - other_config = copy.deepcopy(op_capability['int8']['default']) + conv_config = copy.deepcopy(op_capability["int8"]["Conv2D"]) + conv_config = copy.deepcopy(op_capability["int8"]["SeparableConv2D"]) + conv_config = copy.deepcopy(op_capability["int8"]["DepthwiseConv2D"]) + dense_config = copy.deepcopy(op_capability["int8"]["Dense"]) + maxpool_config = copy.deepcopy(op_capability["int8"]["MaxPooling2D"]) + avgpool_config = copy.deepcopy(op_capability["int8"]["AveragePooling2D"]) + other_config = copy.deepcopy(op_capability["int8"]["default"]) # # get fp32 layer weights keras_object = model._model_object @@ -583,9 +625,11 @@ def query_fw_capability(self, model): self.layer_weights = {} for layer in keras_object.layers: if layer.get_weights(): - if isinstance(layer, tf.keras.layers.Conv2D) or \ - isinstance(layer, tf.keras.layers.DepthwiseConv2D) or \ - isinstance(layer, tf.keras.layers.SeparableConv2D): + if ( + isinstance(layer, tf.keras.layers.Conv2D) + or isinstance(layer, tf.keras.layers.DepthwiseConv2D) + or isinstance(layer, tf.keras.layers.SeparableConv2D) + ): self.conv_weights[layer.name] = copy.deepcopy(layer.get_weights()) elif isinstance(layer, tf.keras.layers.BatchNormalization): self.bn_weights[layer.name] = copy.deepcopy(layer.get_weights()) @@ -593,6 +637,7 @@ def query_fw_capability(self, model): self.pre_optimized_object = self._pre_optimize(keras_object) from neural_compressor.model.keras_model import KerasModel + self.pre_optimized_model = KerasModel(self.pre_optimized_object) json_model = copy.deepcopy(json.loads(self.pre_optimized_object.to_json())) config = json_model["config"] @@ -600,22 +645,22 @@ def query_fw_capability(self, model): quantizable_op_details = OrderedDict() for details in self.fp32_layers: - node_op = details['class_name'] - node_name = details['config']['name'] - if node_op == 'Conv2D': + node_op = details["class_name"] + node_name = details["config"]["name"] + if node_op == "Conv2D": quantizable_op_details[(node_name, node_op)] = [conv_config, bf16_config, fp32_config] - elif node_op == 'Dense': + elif node_op == "Dense": quantizable_op_details[(node_name, node_op)] = [dense_config, bf16_config, fp32_config] - elif node_op in {'AveragePooling2D', 'AvgPool2D'}: + elif node_op in {"AveragePooling2D", "AvgPool2D"}: quantizable_op_details[(node_name, node_op)] = [avgpool_config, bf16_config, fp32_config] - elif node_op in {'MaxPooling2D', 'MaxPool2D'}: + elif node_op in {"MaxPooling2D", "MaxPool2D"}: quantizable_op_details[(node_name, node_op)] = [maxpool_config, bf16_config, fp32_config] else: quantizable_op_details[(node_name, node_op)] = [bf16_config, fp32_config] capability = { - 'opwise': copy.deepcopy(quantizable_op_details), - 'optypewise': self.get_optype_wise_ability(quantizable_op_details), + "opwise": copy.deepcopy(quantizable_op_details), + "optypewise": self.get_optype_wise_ability(quantizable_op_details), } logger.debug("Dump framework quantization capability:") logger.debug(capability) @@ -624,6 +669,7 @@ def query_fw_capability(self, model): def get_optype_wise_ability(self, quantizable_op_details): """Get the op type wise capability by generating the union value of each op type. + Returns: [string dict]: the key is op type while the value is the detail configurations of activation and weight for this op type. @@ -631,143 +677,143 @@ def get_optype_wise_ability(self, quantizable_op_details): res = OrderedDict() for op in quantizable_op_details: if op[1] not in res: - res[op[1]] = {'activation': quantizable_op_details[op][0]['activation']} - if 'weight' in quantizable_op_details[op][0]: - res[op[1]]['weight'] = quantizable_op_details[op][0]['weight'] + res[op[1]] = {"activation": quantizable_op_details[op][0]["activation"]} + if "weight" in quantizable_op_details[op][0]: + res[op[1]]["weight"] = quantizable_op_details[op][0]["weight"] return res - def inspect_tensor(self, model, dataloader, op_list=[], iteration_list=[], - inspect_type='activation', save_to_disk=False): - '''The function is used by tune strategy class for dumping tensor info. - - Args: - model (object): The model to inspect. - dataloader (object): The dataloader used to feed into. - op_list (list): The op name in the fp32 model for dumpping. - iteration_list (list): The iteration list containing iterations to dump. - inspect_type (str): The valid value are 'weight', 'activation', 'all'. - save_to_disk (bool): Save to disk or memory. - - Return: - Numpy Array Dict - { - 'weight': { - 'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...}, - 'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...}, - ... - }, - 'activation': [ - # iter 0 - { - 'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...} - 'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...} - ... - }, - # iter 1 - ... - ] - } - ''' - assert inspect_type in ['weight', 'activation', 'all'], \ - 'Inspect type only support weight, activation or all' - from keras import backend as K # pylint: disable=E0401 + def inspect_tensor( + self, model, dataloader, op_list=[], iteration_list=[], inspect_type="activation", save_to_disk=False + ): + """The function is used by tune strategy class for dumping tensor info. + + Args: + model (object): The model to inspect. + dataloader (object): The dataloader used to feed into. + op_list (list): The op name in the fp32 model for dumpping. + iteration_list (list): The iteration list containing iterations to dump. + inspect_type (str): The valid value are 'weight', 'activation', 'all'. + save_to_disk (bool): Save to disk or memory. + + Return: + Numpy Array Dict + { + 'weight': { + 'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...}, + 'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...}, + ... + }, + 'activation': [ + # iter 0 + { + 'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...} + 'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...} + ... + }, + # iter 1 + ... + ] + } + """ + assert inspect_type in ["weight", "activation", "all"], "Inspect type only support weight, activation or all" + from keras import backend as K # pylint: disable=E0401 + tensor_out = {} inp = model.input outputs = [(layer.name, layer.output) for layer in model.layers] outputs = [(name, out) for (name, out) in outputs if name in op_list] if len(op_list) else outputs - if inspect_type == 'weight' or inspect_type == 'all': + if inspect_type == "weight" or inspect_type == "all": weights = [(layer.name, layer.get_weights()) for layer in model.layers if layer.get_weights()] weights = [(name, wei) for (name, wei) in weights if name in op_list] if len(op_list) else weights - tensor_out['weight'] = weights + tensor_out["weight"] = weights functors = [(name, K.function([inp], [out])) for (name, out) in outputs] iterations = max(iteration_list) if iteration_list is not None else -1 observer_dict = {} ret = {} - if inspect_type == 'activation' or inspect_type == 'all': + if inspect_type == "activation" or inspect_type == "all": activation_list = [] for idx, (inputs, labels) in enumerate(dataloader): layer_outs = [(name, func([inputs])) for (name, func) in functors] iter_map = {} - for (name, out_list) in layer_outs: - iter_map[name] = dict([(name+':'+str(i), out) for i, out in enumerate(out_list)]) + for name, out_list in layer_outs: + iter_map[name] = dict([(name + ":" + str(i), out) for i, out in enumerate(out_list)]) activation_list.append(iter_map) if idx == iterations: break - tensor_out['activation'] = [acti for idx, acti in enumerate(activation_list) if idx in iteration_list] + tensor_out["activation"] = [acti for idx, acti in enumerate(activation_list) if idx in iteration_list] return tensor_out def set_tensor(self, model, tensor_dict): - '''The function is used by tune strategy class for setting tensor back to model. - - Args: - model (object): The model to set tensor. Usually it is quantized model. - tensor_dict (dict): The tensor dict to set. Note the numpy array contains float - value, adaptor layer has the responsibility to quantize to - int8 or int32 to set into the quantized model if needed. - The dict format is something like: - { - 'weight0_name': numpy.array, - 'bias0_name': numpy.array, - ... - } - ''' + """The function is used by tune strategy class for setting tensor back to model. + + Args: + model (object): The model to set tensor. Usually it is quantized model. + tensor_dict (dict): The tensor dict to set. Note the numpy array contains float + value, adaptor layer has the responsibility to quantize to + int8 or int32 to set into the quantized model if needed. + The dict format is something like: + { + 'weight0_name': numpy.array, + 'bias0_name': numpy.array, + ... + } + """ pass def quantize_input(self, model): - ''' quantize the model to be able to take quantized input + """Quantize the model to be able to take quantized input. - Args: - model (object): The model to quantize input + Args: + model (object): The model to quantize input - Return: - model (object): The quantized input model - scale (float): The scale for dataloader to generate quantized input - ''' - return model, 1. + Return: + model (object): The quantized input model + scale (float): The scale for dataloader to generate quantized input + """ + return model, 1.0 def _pre_eval_hook(self, model, *args, **kwargs): - '''The function is used to do some preprocession before evaluation phase. + """The function is used to do some preprocession before evaluation phase. Return: model - ''' + """ return model def _post_eval_hook(self, model, *args, **kwargs): - '''The function is used to do some post process after complete evaluation. - ''' + """The function is used to do some post process after complete evaluation.""" pass def save(self, model, path): - '''The function is used by tune strategy class for saving model. + """The function is used by tune strategy class for saving model. - Args: - model (object): The model to saved. - path (string): The path where to save. - ''' + Args: + model (object): The model to saved. + path (string): The path where to save. + """ model.save(path) def convert(self, model, source, destinatin): - '''The function is used to convert a source model format to another. + """The function is used to convert a source model format to another. - Args: - model (neural_compressor.model): base model to be converted. - source (string): The source model format. - destination (string): The destination model format. - ''' + Args: + model (neural_compressor.model): base model to be converted. + source (string): The source model format. + destination (string): The destination model format. + """ pass def _pre_hook_for_hvd(self, dataloader=None): """Pre hook for Horovod.""" import horovod.tensorflow as hvd + self.hvd = hvd self.hvd.init() @dump_elapsed_time(customized_msg="Model training") - def train(self, model, dataloader, optimizer_tuple, - criterion_tuple, hooks, postprocess, **kwargs): + def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, postprocess, **kwargs): """Model training API. Args: @@ -784,40 +830,46 @@ def train(self, model, dataloader, optimizer_tuple, """ # check model is savedmodel or not import tensorflow as tf + from neural_compressor.model.tensorflow_model import get_model_type + tf.random.set_seed(1) self.model_type = get_model_type(model._model) optimizer = optimizer_tuple[0](**optimizer_tuple[1]) criterion = criterion_tuple[0](**criterion_tuple[1]) - start_epochs = kwargs['kwargs'].get('start_epoch', None) - end_epochs = kwargs['kwargs'].get('end_epoch', None) - epochs = kwargs['kwargs'].get('epoch', None) - iters = kwargs['kwargs'].get('iteration', None) - callbacks = kwargs['kwargs'].get('callbacks', None) - execution_mode = kwargs['kwargs'].get('execution_mode', None) - distributed = getattr(dataloader, 'distributed', False) + start_epochs = kwargs["kwargs"].get("start_epoch", None) + end_epochs = kwargs["kwargs"].get("end_epoch", None) + epochs = kwargs["kwargs"].get("epoch", None) + iters = kwargs["kwargs"].get("iteration", None) + callbacks = kwargs["kwargs"].get("callbacks", None) + execution_mode = kwargs["kwargs"].get("execution_mode", None) + distributed = getattr(dataloader, "distributed", False) if isinstance(model._model, tf.keras.Model): input_model = model._model else: input_model = tf.keras.models.load_model(model._model) # hooks = callbacks['tf_pruning'](model, input_model, hooks) - hooks['on_train_begin']() # on_train_begin hook + hooks["on_train_begin"]() # on_train_begin hook train_loss_results = [] if distributed: try: len_dataloader = len(dataloader) except: - logger.info("The length of the distributed training dataloader is unknown." - "When the iteration of training dataloader in each process is " - "inconsistent, an error may occur.") + logger.info( + "The length of the distributed training dataloader is unknown." + "When the iteration of training dataloader in each process is " + "inconsistent, an error may occur." + ) else: list_len_dataloader = self.hvd.allgather_object(len_dataloader) if self.hvd.rank() == 0: - for i in range(len(list_len_dataloader)-1): - if list_len_dataloader[i] != list_len_dataloader[i+1]: - raise AttributeError("The traning dataloader's iteration is" - "different between processes, please reset dataloader's batch_size.") + for i in range(len(list_len_dataloader) - 1): + if list_len_dataloader[i] != list_len_dataloader[i + 1]: + raise AttributeError( + "The traning dataloader's iteration is" + "different between processes, please reset dataloader's batch_size." + ) def training_step(x, y, first_batch): with tf.GradientTape() as tape: @@ -827,42 +879,45 @@ def training_step(x, y, first_batch): tape = self.hvd.DistributedGradientTape(tape) if distributed else tape # Get gradient - grads = tape.gradient(loss_value, input_model.trainable_variables) # pylint: disable=no-member + grads = tape.gradient(loss_value, input_model.trainable_variables) # pylint: disable=no-member # Optimize the model - optimizer.apply_gradients(zip(grads, input_model.trainable_variables)) # pylint: disable=no-member + optimizer.apply_gradients(zip(grads, input_model.trainable_variables)) # pylint: disable=no-member if distributed and first_batch: self.hvd.broadcast_variables(input_model.variables, root_rank=0) self.hvd.broadcast_variables(optimizer.variables(), root_rank=0) return loss_value - training_step = training_step if execution_mode=='eager' else tf.function(training_step) + training_step = training_step if execution_mode == "eager" else tf.function(training_step) if start_epochs is not None and end_epochs is not None: epochs = end_epochs - start_epochs - + for epoch in range(epochs): cnt = 0 epoch_loss_avg = tf.keras.metrics.Mean() # Training loop for iter, data in enumerate(dataloader): x, y = postprocess(data) if postprocess is not None else data - hooks['on_step_begin'](iter) # on_step_begin hook + hooks["on_step_begin"](iter) # on_step_begin hook cnt += 1 - loss_value = training_step(x, y, iter==0) + loss_value = training_step(x, y, iter == 0) # Track progress epoch_loss_avg.update_state(loss_value) # Add current batch loss - hooks['on_before_optimizer_step']() - hooks['on_after_optimizer_step']() + hooks["on_before_optimizer_step"]() + hooks["on_after_optimizer_step"]() if iters is not None and cnt >= iters: break model._sess = None # End epoch train_loss_results.append(epoch_loss_avg.result()) if distributed: - logger.info("Epoch-{:03d} training on rank {!s} have been done." \ - .format(epoch+1, self.hvd.allgather_object(self.hvd.rank()))) - logger.info("Epoch {:03d}: Loss: {:.3f}".format(epoch+1, epoch_loss_avg.result())) - - hooks['on_train_end']() # on_train_end hook + logger.info( + "Epoch-{:03d} training on rank {!s} have been done.".format( + epoch + 1, self.hvd.allgather_object(self.hvd.rank()) + ) + ) + logger.info("Epoch {:03d}: Loss: {:.3f}".format(epoch + 1, epoch_loss_avg.result())) + + hooks["on_train_end"]() # on_train_end hook model._sess = None if distributed: @@ -873,8 +928,7 @@ def training_step(x, y, first_batch): else: input_model.save(model._model) rank_list = self.hvd.allgather_object(self.hvd.rank()) - logger.info(f"rank 0 has saved the pruned model to '{model._model}'," - f"all ranks {rank_list} ready.") + logger.info(f"rank 0 has saved the pruned model to '{model._model}'," f"all ranks {rank_list} ready.") else: if isinstance(model._model, tf.keras.Model): model._model = input_model @@ -898,8 +952,9 @@ def _one_shot_query(self): except Exception as e: logger.info("Fail to parse {} due to {}.".format(self.cfg, str(e))) self.cur_config = None - raise ValueError("Please check if the format of {} follows Neural Compressor yaml schema.". - format(self.cfg)) + raise ValueError( + "Please check if the format of {} follows Neural Compressor yaml schema.".format(self.cfg) + ) def _get_specified_version_cfg(self, data): """Get the configuration for the current runtime. @@ -914,10 +969,10 @@ def _get_specified_version_cfg(self, data): """ default_config = None for sub_data in data: - if sub_data['version']['name'] == self.version: + if sub_data["version"]["name"] == self.version: return sub_data - if sub_data['version']['name'] == 'default': + if sub_data["version"]["name"] == "default": default_config = sub_data return default_config @@ -928,7 +983,7 @@ def get_version(self): Returns: [string]: version string. """ - return self.cur_config['version']['name'] + return self.cur_config["version"]["name"] def get_precisions(self): """Get supported precisions for current backend. @@ -936,7 +991,7 @@ def get_precisions(self): Returns: [string list]: the precisions' name. """ - return self.cur_config['precisions']['names'] + return self.cur_config["precisions"]["names"] def get_op_types(self): """Get the supported op types by all precisions. @@ -945,7 +1000,7 @@ def get_op_types(self): [dictionary list]: A list composed of dictionary which key is precision and value is the op types. """ - return self.cur_config['ops'] + return self.cur_config["ops"] def get_quantization_capability(self): """Get the supported op types' quantization capability. @@ -954,10 +1009,10 @@ def get_quantization_capability(self): [dictionary list]: A list composed of dictionary which key is precision and value is a dict that describes all op types' quantization capability. """ - return self.cur_config['capabilities'] + return self.cur_config["capabilities"] def get_op_types_by_precision(self, precision): - """Get op types per precision + """Get op types per precision. Args: precision (string): precision name @@ -965,5 +1020,5 @@ def get_op_types_by_precision(self, precision): Returns: [string list]: A list composed of op type. """ - assert precision in list(self.cur_config['ops'].keys()) - return self.cur_config['ops'][precision] + assert precision in list(self.cur_config["ops"].keys()) + return self.cur_config["ops"][precision] diff --git a/neural_compressor/adaptor/keras.yaml b/neural_compressor/adaptor/keras.yaml index f41e826b13e..055d3a28530 100644 --- a/neural_compressor/adaptor/keras.yaml +++ b/neural_compressor/adaptor/keras.yaml @@ -16,33 +16,33 @@ - version: name: 'default' - + precisions: &common_precisions names: int8, fp32 valid_mixed_precisions: [] - + ops: &common_ops int8: ['Conv2D', 'SeparableConv2D', 'DepthwiseConv2D', 'Dense', 'AveragePooling2D', 'MaxPooling2D', 'AvgPool2D', 'MaxPool2D'] - bf16: ['Dense', 'Conv1D', 'Conv2D', 'Conv3D', 'SeparableConv1D', 'SeparableConv2D', 'Conv1DTranspose', - 'Conv2DTranspose', 'Conv3DTranspose', 'DepthwiseConv2D', 'AveragePooling2D', 'MaxPooling2D', - 'AvgPool2D', 'MaxPool2D', 'MaxPooling1D', 'MaxPooling3D', 'AveragePooling1D', 'AveragePooling3D', - 'GlobalMaxPooling1D', 'GlobalMaxPooling2D', 'GlobalMaxPooling3D', 'GlobalAveragePooling1D', - 'GlobalAveragePooling2D', 'GlobalAveragePooling3D','SimpleRNN', 'TimeDistributed', 'ConvLSTM1D', - 'ConvLSTM2D', 'ConvLSTM3D', 'TextVectorization', 'Discretization', 'CategoryEncoding', 'Hashing', - 'StringLookup', 'IntegerLookup', 'Resizing', 'Rescaling', 'CenterCrop', 'RandomCrop', 'RandomFlip', - 'RandomTranslation', 'Activation', 'RandomRotation', 'RandomZoom', 'RandomHeight', 'RandomWidth', - 'RandomContrast', 'RandomBrightness', 'Normalization', 'BatchNormalization', 'LayerNormalization', - 'UnitNormalization', 'GroupNormalization', 'Dropout', 'SpatialDropout1D', 'SpatialDropout2D', - 'SpatialDropout3D', 'GaussianDropout', 'GaussianNoise', 'ActivityRegularization', 'AlphaDropout', - 'MultiHeadAttention', 'Attention', 'AdditiveAttention', 'Reshape', 'Flatten', 'RepeatVector', - 'Permute', 'Cropping1D', 'Cropping2D', 'Cropping3D', 'UpSampling1D', 'UpSampling2D', 'UpSampling3D', - 'ZeroPadding1D', 'ZeroPadding2D', 'ZeroPadding3D', 'Concatenate', 'Average', 'Maximum', 'Minimum', - 'Add', 'Subtract', 'Multiply', 'Dot', 'LocallyConnected1D', 'LocallyConnected2D', 'Embedding', + bf16: ['Dense', 'Conv1D', 'Conv2D', 'Conv3D', 'SeparableConv1D', 'SeparableConv2D', 'Conv1DTranspose', + 'Conv2DTranspose', 'Conv3DTranspose', 'DepthwiseConv2D', 'AveragePooling2D', 'MaxPooling2D', + 'AvgPool2D', 'MaxPool2D', 'MaxPooling1D', 'MaxPooling3D', 'AveragePooling1D', 'AveragePooling3D', + 'GlobalMaxPooling1D', 'GlobalMaxPooling2D', 'GlobalMaxPooling3D', 'GlobalAveragePooling1D', + 'GlobalAveragePooling2D', 'GlobalAveragePooling3D','SimpleRNN', 'TimeDistributed', 'ConvLSTM1D', + 'ConvLSTM2D', 'ConvLSTM3D', 'TextVectorization', 'Discretization', 'CategoryEncoding', 'Hashing', + 'StringLookup', 'IntegerLookup', 'Resizing', 'Rescaling', 'CenterCrop', 'RandomCrop', 'RandomFlip', + 'RandomTranslation', 'Activation', 'RandomRotation', 'RandomZoom', 'RandomHeight', 'RandomWidth', + 'RandomContrast', 'RandomBrightness', 'Normalization', 'BatchNormalization', 'LayerNormalization', + 'UnitNormalization', 'GroupNormalization', 'Dropout', 'SpatialDropout1D', 'SpatialDropout2D', + 'SpatialDropout3D', 'GaussianDropout', 'GaussianNoise', 'ActivityRegularization', 'AlphaDropout', + 'MultiHeadAttention', 'Attention', 'AdditiveAttention', 'Reshape', 'Flatten', 'RepeatVector', + 'Permute', 'Cropping1D', 'Cropping2D', 'Cropping3D', 'UpSampling1D', 'UpSampling2D', 'UpSampling3D', + 'ZeroPadding1D', 'ZeroPadding2D', 'ZeroPadding3D', 'Concatenate', 'Average', 'Maximum', 'Minimum', + 'Add', 'Subtract', 'Multiply', 'Dot', 'LocallyConnected1D', 'LocallyConnected2D', 'Embedding', 'Masking', 'Lambda', 'ReLU', 'Softmax', 'LeakyReLU', 'PReLU', 'ELU', 'ThresholdedReLU' ] fp32: ['*'] # '*' means all op types - + capabilities: &common_capabilities int8: { 'Conv2D': { diff --git a/neural_compressor/adaptor/keras_utils/__init__.py b/neural_compressor/adaptor/keras_utils/__init__.py index ed04d17bdbe..369707c0ef6 100644 --- a/neural_compressor/adaptor/keras_utils/__init__.py +++ b/neural_compressor/adaptor/keras_utils/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/neural_compressor/adaptor/keras_utils/conv2d.py b/neural_compressor/adaptor/keras_utils/conv2d.py index 3e768aa7c72..d1b72a196eb 100644 --- a/neural_compressor/adaptor/keras_utils/conv2d.py +++ b/neural_compressor/adaptor/keras_utils/conv2d.py @@ -16,67 +16,94 @@ # limitations under the License. import json -import tensorflow as tf -from tensorflow.keras import activations -from tensorflow.keras import constraints -from tensorflow.keras import initializers -from tensorflow.keras import regularizers +import tensorflow as tf from tensorflow import quantization +from tensorflow.keras import activations, constraints, initializers, regularizers from neural_compressor.adaptor.tf_utils.util import version1_gte_version2 -if version1_gte_version2(tf.__version__, '2.13.0'): - from keras.src.layers.convolutional.base_conv import Conv # pylint: disable=E0401 + +if version1_gte_version2(tf.__version__, "2.13.0"): + from keras.src.layers.convolutional.base_conv import Conv # pylint: disable=E0401 else: - from keras.layers.convolutional.base_conv import Conv # pylint: disable=E0401 + from keras.layers.convolutional.base_conv import Conv # pylint: disable=E0401 + class QConv2D(Conv): - def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1), groups=1, activation=None, - use_bias=True, kernel_initializer='glorot_uniform', - bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, - activity_regularizer=None, kernel_constraint=None, bias_constraint=None, - min_value=-10000, max_value=10000, **kwargs): - super(QConv2D, self).__init__(rank=2, filters=filters, kernel_size=kernel_size, - strides=strides, padding=padding, data_format=data_format, - dilation_rate=dilation_rate, groups=groups, - activation=activations.get(activation), - use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), **kwargs) + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + min_value=-10000, + max_value=10000, + **kwargs + ): + super(QConv2D, self).__init__( + rank=2, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + groups=groups, + activation=activations.get(activation), + use_bias=use_bias, + kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), + **kwargs + ) self.min_value = json.loads(min_value) self.max_value = json.loads(max_value) def call(self, inputs): - # add the Q/DQ here - kernel, _, _ = quantization.quantize(self.kernel, self.min_value, - self.max_value, tf.qint8, - axis=3, mode='SCALED') - kernel = quantization.dequantize(kernel, self.min_value, - self.max_value, axis=3, mode='SCALED',) - outputs = tf.keras.backend.conv2d( - inputs, - kernel, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) + # add the Q/DQ here + kernel, _, _ = quantization.quantize( + self.kernel, self.min_value, self.max_value, tf.qint8, axis=3, mode="SCALED" + ) + kernel = quantization.dequantize( + kernel, + self.min_value, + self.max_value, + axis=3, + mode="SCALED", + ) + outputs = tf.keras.backend.conv2d( + inputs, + kernel, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate, + ) - if self.use_bias: - outputs = tf.keras.backend.bias_add( - outputs, self.bias, data_format=self.data_format) + if self.use_bias: + outputs = tf.keras.backend.bias_add(outputs, self.bias, data_format=self.data_format) - if self.activation is not None: - return self.activation(outputs) + if self.activation is not None: + return self.activation(outputs) - return outputs + return outputs @classmethod def from_config(cls, config): return cls(**config) - - diff --git a/neural_compressor/adaptor/keras_utils/dense.py b/neural_compressor/adaptor/keras_utils/dense.py index cf5a6bf70d4..b97e9759b70 100644 --- a/neural_compressor/adaptor/keras_utils/dense.py +++ b/neural_compressor/adaptor/keras_utils/dense.py @@ -16,57 +16,67 @@ # limitations under the License. import json + import tensorflow as tf -from tensorflow.keras import activations -from tensorflow.keras import backend -from tensorflow.keras import constraints -from tensorflow.keras import initializers -from tensorflow.keras import regularizers -from tensorflow.keras.layers import Dense from tensorflow import quantization +from tensorflow.keras import activations, backend, constraints, initializers, regularizers +from tensorflow.keras.layers import Dense + class QDense(Dense): - def __init__(self, - units, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - min_value=-10000, - max_value=10000, - **kwargs): - super(QDense, self).__init__( - units=units, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - self.min_value = json.loads(min_value) - self.max_value = json.loads(max_value) + def __init__( + self, + units, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + min_value=-10000, + max_value=10000, + **kwargs + ): + super(QDense, self).__init__( + units=units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs + ) + self.min_value = json.loads(min_value) + self.max_value = json.loads(max_value) def call(self, inputs): - # add the Q/DQ here - kernel, _, _ = quantization.quantize(self.kernel, self.min_value, - self.max_value, tf.qint8, - axis=1, mode='SCALED',) - kernel = quantization.dequantize(kernel, self.min_value, - self.max_value, axis=1, mode='SCALED',) - outputs = tf.keras.backend.dot(inputs, kernel) - - if self.use_bias: - outputs = tf.keras.backend.bias_add(outputs, self.bias) - if self.activation is not None: - outputs = self.activation(outputs) - return outputs + # add the Q/DQ here + kernel, _, _ = quantization.quantize( + self.kernel, + self.min_value, + self.max_value, + tf.qint8, + axis=1, + mode="SCALED", + ) + kernel = quantization.dequantize( + kernel, + self.min_value, + self.max_value, + axis=1, + mode="SCALED", + ) + outputs = tf.keras.backend.dot(inputs, kernel) + if self.use_bias: + outputs = tf.keras.backend.bias_add(outputs, self.bias) + if self.activation is not None: + outputs = self.activation(outputs) + return outputs diff --git a/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py b/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py index 91438e5edff..d22c0588c0e 100644 --- a/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py +++ b/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py @@ -16,21 +16,20 @@ # limitations under the License. import json -import tensorflow as tf -from tensorflow.keras import activations -from tensorflow.keras import constraints -from tensorflow.keras import initializers -from tensorflow.keras import regularizers +import tensorflow as tf from tensorflow import quantization +from tensorflow.keras import activations, constraints, initializers, regularizers from neural_compressor.adaptor.tf_utils.util import version1_gte_version2 -if version1_gte_version2(tf.__version__, '2.13.0'): - from keras.src.utils import tf_utils, conv_utils # pylint: disable=E0401 - from keras.src.layers.convolutional.base_depthwise_conv import DepthwiseConv # pylint: disable=E0401 + +if version1_gte_version2(tf.__version__, "2.13.0"): + from keras.src.layers.convolutional.base_depthwise_conv import DepthwiseConv # pylint: disable=E0401 + from keras.src.utils import conv_utils, tf_utils # pylint: disable=E0401 else: - from keras.utils import tf_utils, conv_utils # pylint: disable=E0401 - from keras.layers.convolutional.base_depthwise_conv import DepthwiseConv # pylint: disable=E0401 + from keras.layers.convolutional.base_depthwise_conv import DepthwiseConv # pylint: disable=E0401 + from keras.utils import conv_utils, tf_utils # pylint: disable=E0401 + class QDepthwiseConv2D(DepthwiseConv): def __init__( @@ -77,34 +76,38 @@ def __init__( self.max_value = json.loads(max_value) def call(self, inputs): - # add the Q/DQ here - kernel, _, _ = quantization.quantize(self.depthwise_kernel, self.min_value, - self.max_value, tf.qint8, - axis=3, mode='SCALED') - kernel = quantization.dequantize(kernel, self.min_value, - self.max_value, axis=3, mode='SCALED',) - outputs = tf.keras.backend.depthwise_conv2d( - inputs, - kernel, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) + # add the Q/DQ here + kernel, _, _ = quantization.quantize( + self.depthwise_kernel, self.min_value, self.max_value, tf.qint8, axis=3, mode="SCALED" + ) + kernel = quantization.dequantize( + kernel, + self.min_value, + self.max_value, + axis=3, + mode="SCALED", + ) + outputs = tf.keras.backend.depthwise_conv2d( + inputs, + kernel, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate, + ) - if self.use_bias: - outputs = tf.keras.backend.bias_add( - outputs, self.bias, data_format=self.data_format) + if self.use_bias: + outputs = tf.keras.backend.bias_add(outputs, self.bias, data_format=self.data_format) - if self.activation is not None: - return self.activation(outputs) + if self.activation is not None: + return self.activation(outputs) - return outputs + return outputs @classmethod def from_config(cls, config): return cls(**config) - @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if self.data_format == "channels_first": diff --git a/neural_compressor/adaptor/keras_utils/pool2d.py b/neural_compressor/adaptor/keras_utils/pool2d.py index f7fb6c53235..409c16b9305 100644 --- a/neural_compressor/adaptor/keras_utils/pool2d.py +++ b/neural_compressor/adaptor/keras_utils/pool2d.py @@ -16,49 +16,44 @@ # limitations under the License. import json + import tensorflow as tf -from tensorflow.keras import activations -from tensorflow.keras import backend -from tensorflow.keras import constraints -from tensorflow.keras import initializers -from tensorflow.keras import regularizers -from tensorflow.keras.layers import MaxPooling2D -from tensorflow.keras.layers import AveragePooling2D from tensorflow import quantization +from tensorflow.keras import activations, backend, constraints, initializers, regularizers +from tensorflow.keras.layers import AveragePooling2D, MaxPooling2D + class QAvgPool2D(AveragePooling2D): - def __init__(self, - pool_size=(2, 2), - strides=None, - padding="valid", - data_format=None, - min_value=-10000, - max_value=10000, - **kwargs): - super(QAvgPool2D, self).__init__( - pool_size=pool_size, - strides=strides, - padding=padding, - data_format=data_format, - **kwargs) - self.min_value = json.loads(min_value) - self.max_value = json.loads(max_value) + def __init__( + self, + pool_size=(2, 2), + strides=None, + padding="valid", + data_format=None, + min_value=-10000, + max_value=10000, + **kwargs + ): + super(QAvgPool2D, self).__init__( + pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, **kwargs + ) + self.min_value = json.loads(min_value) + self.max_value = json.loads(max_value) class QMaxPool2D(MaxPooling2D): - def __init__(self, - pool_size=(2, 2), - strides=None, - padding="valid", - data_format=None, - min_value=-10000, - max_value=10000, - **kwargs): - super(QMaxPool2D, self).__init__( - pool_size=pool_size, - strides=strides, - padding=padding, - data_format=data_format, - **kwargs) - self.min_value = json.loads(min_value) - self.max_value = json.loads(max_value) + def __init__( + self, + pool_size=(2, 2), + strides=None, + padding="valid", + data_format=None, + min_value=-10000, + max_value=10000, + **kwargs + ): + super(QMaxPool2D, self).__init__( + pool_size=pool_size, strides=strides, padding=padding, data_format=data_format, **kwargs + ) + self.min_value = json.loads(min_value) + self.max_value = json.loads(max_value) diff --git a/neural_compressor/adaptor/keras_utils/quantizer.py b/neural_compressor/adaptor/keras_utils/quantizer.py index 78f0f9aa24b..b395870b48f 100644 --- a/neural_compressor/adaptor/keras_utils/quantizer.py +++ b/neural_compressor/adaptor/keras_utils/quantizer.py @@ -19,17 +19,18 @@ import tensorflow as tf from tensorflow.keras.layers import Layer + class FakeQuant(Layer): - def __init__(self, mode='per_tensor', T='s8', **kwargs): + def __init__(self, mode="per_tensor", T="s8", **kwargs): super(FakeQuant, self).__init__(**kwargs) self.mode = mode self.T = T - self.axis = 1 if mode == 'per_channel' else 0 + self.axis = 1 if mode == "per_channel" else 0 self.min_value = tf.constant(np.finfo(np.float32).max, dtype=tf.float32) self.max_value = tf.constant(np.finfo(np.float32).min, dtype=tf.float32) def call(self, inputs): - if self.mode == 'per_tensor': + if self.mode == "per_tensor": self.min_value = tf.math.reduce_min(inputs) self.max_value = tf.math.reduce_max(inputs) else: @@ -40,20 +41,31 @@ def call(self, inputs): @classmethod def from_config(cls, config): return cls(**config) - + def get_config(self): - return {'mode': self.mode, - 'min_value': self.min_value.numpy(), - 'max_value': self.max_value.numpy(), - 'T': self.T, - 'name': self.name} + return { + "mode": self.mode, + "min_value": self.min_value.numpy(), + "max_value": self.max_value.numpy(), + "T": self.T, + "name": self.name, + } + class Quantize(Layer): - def __init__(self, min_range, max_range, T='s8', mode='SCALED', - round_mode='HALF_AWAY_FROM_ZERO', narrow_range=False, - axis=None, **kwargs): + def __init__( + self, + min_range, + max_range, + T="s8", + mode="SCALED", + round_mode="HALF_AWAY_FROM_ZERO", + narrow_range=False, + axis=None, + **kwargs + ): super(Quantize, self).__init__(**kwargs) - T_map = {'s8': tf.qint8, 'u8': tf.quint8} + T_map = {"s8": tf.qint8, "u8": tf.quint8} self.min_range = float(min_range) self.max_range = float(max_range) self.T = T_map[T] @@ -63,16 +75,28 @@ def __init__(self, min_range, max_range, T='s8', mode='SCALED', self.axis = axis def call(self, inputs): - outputs, _, _ = tf.quantization.quantize(inputs, self.min_range, - self.max_range, self.T, - mode=self.mode, round_mode=self.round_mode, - narrow_range=self.narrow_range, axis=self.axis) + outputs, _, _ = tf.quantization.quantize( + inputs, + self.min_range, + self.max_range, + self.T, + mode=self.mode, + round_mode=self.round_mode, + narrow_range=self.narrow_range, + axis=self.axis, + ) return outputs def get_config(self): - return {'min_range': self.min_range, 'max_range': self.max_range, - 'T': self.T, 'mode': self.mode, 'round_mode': self.round_mode, - 'narrow': self.narrow_range, 'axis': self.axis} + return { + "min_range": self.min_range, + "max_range": self.max_range, + "T": self.T, + "mode": self.mode, + "round_mode": self.round_mode, + "narrow": self.narrow_range, + "axis": self.axis, + } @classmethod def from_config(cls, config): @@ -80,8 +104,7 @@ def from_config(cls, config): class DeQuantize(Layer): - def __init__(self, min_range, max_range, mode='SCALED', - narrow_range=False, axis=None, **kwargs): + def __init__(self, min_range, max_range, mode="SCALED", narrow_range=False, axis=None, **kwargs): super(DeQuantize, self).__init__(**kwargs) self.min_range = min_range self.max_range = max_range @@ -90,13 +113,24 @@ def __init__(self, min_range, max_range, mode='SCALED', self.axis = axis def call(self, inputs): - return tf.quantization.dequantize(inputs, float(self.min_range), - float(self.max_range), mode=self.mode, - narrow_range=self.narrow_range, axis=self.axis) + return tf.quantization.dequantize( + inputs, + float(self.min_range), + float(self.max_range), + mode=self.mode, + narrow_range=self.narrow_range, + axis=self.axis, + ) + def get_config(self): - return {'min_range': self.min_range, 'max_range': self.max_range, - 'mode': self.mode, 'narrow': self.narrow_range, 'axis': self.axis, - 'dtype': self.dtype} + return { + "min_range": self.min_range, + "max_range": self.max_range, + "mode": self.mode, + "narrow": self.narrow_range, + "axis": self.axis, + "dtype": self.dtype, + } @classmethod def from_config(cls, config): diff --git a/neural_compressor/adaptor/keras_utils/separable_conv2d.py b/neural_compressor/adaptor/keras_utils/separable_conv2d.py index 4fb363f4300..36013412e67 100644 --- a/neural_compressor/adaptor/keras_utils/separable_conv2d.py +++ b/neural_compressor/adaptor/keras_utils/separable_conv2d.py @@ -16,20 +16,20 @@ # limitations under the License. import json + import tensorflow as tf -from tensorflow.keras import activations -from tensorflow.keras import constraints -from tensorflow.keras import initializers -from tensorflow.keras import regularizers from tensorflow import quantization +from tensorflow.keras import activations, constraints, initializers, regularizers from neural_compressor.adaptor.tf_utils.util import version1_gte_version2 -if version1_gte_version2(tf.__version__, '2.13.0'): - from keras.src.utils import conv_utils # pylint: disable=E0401 - from keras.src.layers.convolutional.base_separable_conv import SeparableConv # pylint: disable=E0401 + +if version1_gte_version2(tf.__version__, "2.13.0"): + from keras.src.layers.convolutional.base_separable_conv import SeparableConv # pylint: disable=E0401 + from keras.src.utils import conv_utils # pylint: disable=E0401 else: - from keras.utils import conv_utils # pylint: disable=E0401 - from keras.layers.convolutional.base_separable_conv import SeparableConv # pylint: disable=E0401 + from keras.layers.convolutional.base_separable_conv import SeparableConv # pylint: disable=E0401 + from keras.utils import conv_utils # pylint: disable=E0401 + class QSeparableConv2D(SeparableConv): def __init__( @@ -85,40 +85,40 @@ def __init__( self.max_value = json.loads(max_value) def call(self, inputs): - if self.data_format == "channels_last": - strides = (1,) + self.strides + (1,) - else: - strides = (1, 1) + self.strides - # (TODO) it's ugly that we can't get the point_wise min/max here - depthwise_kernel, _, _ = quantization.quantize(self.depthwise_kernel, - self.min_value, self.max_value, tf.qint8, - axis=3, mode='SCALED') - depthwise_kernel = quantization.dequantize(depthwise_kernel, self.min_value, - self.max_value, axis=3, mode='SCALED',) + if self.data_format == "channels_last": + strides = (1,) + self.strides + (1,) + else: + strides = (1, 1) + self.strides + # (TODO) it's ugly that we can't get the point_wise min/max here + depthwise_kernel, _, _ = quantization.quantize( + self.depthwise_kernel, self.min_value, self.max_value, tf.qint8, axis=3, mode="SCALED" + ) + depthwise_kernel = quantization.dequantize( + depthwise_kernel, + self.min_value, + self.max_value, + axis=3, + mode="SCALED", + ) - outputs = tf.compat.v1.nn.separable_conv2d( - inputs, - depthwise_kernel, - self.pointwise_kernel, - strides=strides, - padding=self.padding.upper(), - rate=self.dilation_rate, - data_format=conv_utils.convert_data_format( - self.data_format, ndim=4 - ), - ) + outputs = tf.compat.v1.nn.separable_conv2d( + inputs, + depthwise_kernel, + self.pointwise_kernel, + strides=strides, + padding=self.padding.upper(), + rate=self.dilation_rate, + data_format=conv_utils.convert_data_format(self.data_format, ndim=4), + ) - if self.use_bias: - outputs = tf.keras.backend.bias_add( - outputs, self.bias, data_format=self.data_format) + if self.use_bias: + outputs = tf.keras.backend.bias_add(outputs, self.bias, data_format=self.data_format) - if self.activation is not None: - return self.activation(outputs) + if self.activation is not None: + return self.activation(outputs) - return outputs + return outputs @classmethod def from_config(cls, config): return cls(**config) - - diff --git a/neural_compressor/adaptor/mxnet.py b/neural_compressor/adaptor/mxnet.py index bf368651353..e9acf094239 100644 --- a/neural_compressor/adaptor/mxnet.py +++ b/neural_compressor/adaptor/mxnet.py @@ -15,19 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging +import math import os +from collections import OrderedDict +from copy import deepcopy + import yaml -import logging -from neural_compressor.adaptor.adaptor import adaptor_registry, Adaptor -from neural_compressor.adaptor.query import QueryBackendCapability -from neural_compressor.utils.utility import (LazyImport, GLOBAL_STATE, MODE, CpuInfo, - dump_elapsed_time, singleton) +from neural_compressor.adaptor.adaptor import Adaptor, adaptor_registry from neural_compressor.adaptor.mxnet_utils.util import * -from collections import OrderedDict +from neural_compressor.adaptor.query import QueryBackendCapability from neural_compressor.data.dataloaders.base_dataloader import BaseDataLoader -from copy import deepcopy -import math +from neural_compressor.utils.utility import GLOBAL_STATE, MODE, CpuInfo, LazyImport, dump_elapsed_time, singleton mx = LazyImport("mxnet") logger = logging.getLogger("neural_compressor") @@ -43,21 +43,19 @@ class MxNetAdaptor(Adaptor): def __init__(self, framework_specific_info): super(MxNetAdaptor, self).__init__(framework_specific_info) - assert check_mx_version('1.6.0'), \ - "Need MXNet version >= 1.6.0, but got version: %s" % (mx.__version__) + assert check_mx_version("1.6.0"), "Need MXNet version >= 1.6.0, but got version: %s" % (mx.__version__) self.pre_optimized_model = None self.quantizable_nodes = [] self._qtensor_to_tensor = {} self._tensor_to_node = {} self.qdataloader = framework_specific_info.get("q_dataloader") - self.query_handler = MXNetQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "mxnet.yaml")) + self.query_handler = MXNetQuery(local_config_file=os.path.join(os.path.dirname(__file__), "mxnet.yaml")) - self.ctx = mx.cpu() if framework_specific_info['device'] == 'cpu' else None - self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK) + self.ctx = mx.cpu() if framework_specific_info["device"] == "cpu" else None + self.benchmark = GLOBAL_STATE.STATE == MODE.BENCHMARK self.optype_statistics = None - assert self.ctx is not None, 'Unsupported device' + assert self.ctx is not None, "Unsupported device" @dump_elapsed_time("Pass quantize model") def quantize(self, tune_cfg, nc_model, dataloader, q_func=None): @@ -85,27 +83,27 @@ def calib_func(tmp_tune_cfg, dataloader): sym_model, dataloader = prepare_model_data(nc_model, self.ctx, dataloader) qsym_model, calib_tensors = quantize_sym_model(sym_model, self.ctx, quant_cfg) - calib_data = self._collect_thresholds(sym_model, dataloader, calib_tensors, - calib_cfg, calib_cache) + calib_data = self._collect_thresholds(sym_model, dataloader, calib_tensors, calib_cfg, calib_cache) qsym_model = calib_model(qsym_model, calib_data, calib_cfg) qsym_model = fuse(qsym_model, self.ctx) # post-quantization fusion - if len(amp_cfg['excluded_sym_names']) < len(self.quantizable_nodes): + if len(amp_cfg["excluded_sym_names"]) < len(self.quantizable_nodes): qsym_model = amp_convert(qsym_model, dataloader.input_desc, amp_cfg) q_nc_model = make_nc_model(nc_model, qsym_model, self.ctx, dataloader.input_desc) - q_nc_model.calib_cache['last'] = calib_data.th_dict + q_nc_model.calib_cache["last"] = calib_data.th_dict q_nc_model.q_config = { - 'mxnet_version': mx.__version__, - 'amp_cfg': amp_cfg, - 'quant_cfg': quant_cfg, - 'calib_cfg': calib_cfg, - 'th_dict': calib_data.th_dict, - 'input_desc': dataloader.input_desc, - 'framework_specific_info': {'device': self.ctx.device_type}} + "mxnet_version": mx.__version__, + "amp_cfg": amp_cfg, + "quant_cfg": quant_cfg, + "calib_cfg": calib_cfg, + "th_dict": calib_data.th_dict, + "input_desc": dataloader.input_desc, + "framework_specific_info": {"device": self.ctx.device_type}, + } return q_nc_model - calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) + calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) if isinstance(dataloader, BaseDataLoader): batch_size = dataloader.batch_size try: @@ -113,34 +111,33 @@ def calib_func(tmp_tune_cfg, dataloader): if calib_sampling_size % (batch_size - i) == 0: calib_batch_size = batch_size - i if i != 0: - logger.warning("Reset `calibration.dataloader.batch_size` field " - "to {}".format(calib_batch_size) + - " to make sure the sampling_size is " - "divisible exactly by batch size") + logger.warning( + "Reset `calibration.dataloader.batch_size` field " + "to {}".format(calib_batch_size) + " to make sure the sampling_size is " + "divisible exactly by batch size" + ) break tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size)) tmp_tune_cfg = deepcopy(tune_cfg) - tmp_tune_cfg['calib_iteration'] = tmp_iterations + tmp_tune_cfg["calib_iteration"] = tmp_iterations dataloader.batch(calib_batch_size) return calib_func(tmp_tune_cfg, dataloader) except Exception: # pragma: no cover - logger.warning( - "Fail to forward with batch size={}, set to {} now.". - format(batch_size, 1)) + logger.warning("Fail to forward with batch size={}, set to {} now.".format(batch_size, 1)) tmp_tune_cfg = deepcopy(tune_cfg) - tmp_tune_cfg['calib_iteration'] = calib_sampling_size + tmp_tune_cfg["calib_iteration"] = calib_sampling_size dataloader.batch(1) return calib_func(tmp_tune_cfg, dataloader) else: - if hasattr(dataloader, 'batch_size') and \ - calib_sampling_size % dataloader.batch_size != 0: - iter = tune_cfg['calib_iteration'] + if hasattr(dataloader, "batch_size") and calib_sampling_size % dataloader.batch_size != 0: + iter = tune_cfg["calib_iteration"] logger.warning( "Please note that calibration sampling size {} " "isn't divisible exactly by batch size {}. " - "So the real sampling size is {}.". - format(calib_sampling_size, dataloader.batch_size, - dataloader.batch_size * iter)) + "So the real sampling size is {}.".format( + calib_sampling_size, dataloader.batch_size, dataloader.batch_size * iter + ) + ) return calib_func(tune_cfg, dataloader) def _collect_thresholds(self, sym_model, calib_data, calib_tensors, calib_cfg, calib_cache): @@ -157,41 +154,51 @@ def _collect_thresholds(self, sym_model, calib_data, calib_tensors, calib_cfg, c Returns: (CalibResult): The results of calibration (pair of thresholds for each tensor). """ - assert calib_cfg['calib_mode'] == 'naive', \ - '`calib_mode` must be set to `naive`, for `collector.min_max_dict` to be used' - - if calib_cache.get('batches', -1) != calib_cfg['batches']: - calib_cache['batches'] = calib_cfg['batches'] - calib_cache['kl'] = {} - calib_cache['minmax'] = {} - - cache_kl = calib_cache['kl'] - cache_minmax = calib_cache['minmax'] - tensors_kl, tensors_minmax = distribute_calib_tensors(calib_tensors, calib_cfg, - self._tensor_to_node) + assert ( + calib_cfg["calib_mode"] == "naive" + ), "`calib_mode` must be set to `naive`, for `collector.min_max_dict` to be used" + + if calib_cache.get("batches", -1) != calib_cfg["batches"]: + calib_cache["batches"] = calib_cfg["batches"] + calib_cache["kl"] = {} + calib_cache["minmax"] = {} + + cache_kl = calib_cache["kl"] + cache_minmax = calib_cache["minmax"] + tensors_kl, tensors_minmax = distribute_calib_tensors(calib_tensors, calib_cfg, self._tensor_to_node) to_collect_kl = tensors_kl - set(cache_kl.keys()) to_collect_minmax = tensors_minmax - set(cache_minmax.keys()) collector = CalibCollector(to_collect_kl, to_collect_minmax) if len(to_collect_kl) + len(to_collect_minmax) > 0: + def b_filter(): - for _ in range(calib_cfg['batches']): + for _ in range(calib_cfg["batches"]): yield True logger.info("Start to collect tensors of the FP32 model.") - batches = run_forward(sym_model, self.ctx, calib_data, b_filter(), - collector, collector.pre_batch, collector.post_batch) + batches = run_forward( + sym_model, self.ctx, calib_data, b_filter(), collector, collector.pre_batch, collector.post_batch + ) logger.info("Get collected tensors of the FP32 model from {} batches.".format(batches)) if len(collector.include_tensors_kl) > 0: - cache_kl.update(collector.calc_kl_th_dict(calib_cfg['quantized_dtype'])) + cache_kl.update(collector.calc_kl_th_dict(calib_cfg["quantized_dtype"])) cache_minmax.update(collector.min_max_dict) return CalibData(cache_kl, cache_minmax, tensors_kl, tensors_minmax) - def evaluate(self, nc_model, data_x, postprocess=None, - metrics=None, measurer=None, iteration=-1, - tensorboard=False, fp32_baseline=False): + def evaluate( + self, + nc_model, + data_x, + postprocess=None, + metrics=None, + measurer=None, + iteration=-1, + tensorboard=False, + fp32_baseline=False, + ): """The function is used to run evaluation on validation dataset. Args: @@ -207,6 +214,7 @@ def evaluate(self, nc_model, data_x, postprocess=None, Returns: acc: evaluate result. """ + def b_filter(): if iteration == -1: while True: @@ -237,24 +245,19 @@ def post_batch(net, batch, outs): if isinstance(data_x, BaseDataLoader) and not self.benchmark: try: sym_model, dataloader = prepare_model_data(nc_model, self.ctx, data_x) - run_forward(sym_model, self.ctx, dataloader, b_filter(), - pre_batch=pre_batch, post_batch=post_batch) + run_forward(sym_model, self.ctx, dataloader, b_filter(), pre_batch=pre_batch, post_batch=post_batch) except Exception: # pragma: no cover - logger.warning( - "Fail to forward with batch size={}, set to {} now.". - format(data_x.batch_size, 1)) + logger.warning("Fail to forward with batch size={}, set to {} now.".format(data_x.batch_size, 1)) data_x.batch(1) sym_model, dataloader = prepare_model_data(nc_model, self.ctx, data_x) - run_forward(sym_model, self.ctx, dataloader, b_filter(), - pre_batch=pre_batch, post_batch=post_batch) + run_forward(sym_model, self.ctx, dataloader, b_filter(), pre_batch=pre_batch, post_batch=post_batch) else: sym_model, dataloader = prepare_model_data(nc_model, self.ctx, data_x) - run_forward(sym_model, self.ctx, dataloader, b_filter(), - pre_batch=pre_batch, post_batch=post_batch) + run_forward(sym_model, self.ctx, dataloader, b_filter(), pre_batch=pre_batch, post_batch=post_batch) acc = [metric.result() for metric in metrics] if metrics is not None else 0 return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] - @dump_elapsed_time('Query quantizable operators') + @dump_elapsed_time("Query quantizable operators") def query_fw_capability(self, nc_model): """Query MXNet quantization capability on the model/op level with the specific model. @@ -269,21 +272,22 @@ def query_fw_capability(self, nc_model): self.pre_optimized_model = sym_model self.quantizable_nodes, self._tensor_to_node, all_op_nodes = query_quantizable_nodes( - sym_model, self.ctx, self.qdataloader) + sym_model, self.ctx, self.qdataloader + ) - config = self.query_handler.get_quantization_capability()['int8'] - bf16_config = self.query_handler.get_quantization_capability().get('bf16', {}) + config = self.query_handler.get_quantization_capability()["int8"] + bf16_config = self.query_handler.get_quantization_capability().get("bf16", {}) valid_precisions = self.query_handler.get_mixed_precision_combination() - use_bf16 = ('bf16' in valid_precisions and CpuInfo().bf16) or os.getenv('FORCE_BF16') == '1' + use_bf16 = ("bf16" in valid_precisions and CpuInfo().bf16) or os.getenv("FORCE_BF16") == "1" if use_bf16: config = combine_capabilities(config, bf16_config) op_type_wise = OrderedDict() op_wise = OrderedDict() for node in self.quantizable_nodes: - op_capability = config.get(node['type'], config['default']) - op_type_wise.setdefault(node['type'], op_capability) - op_wise.setdefault((node['name'], node['type']), op_capability) + op_capability = config.get(node["type"], config["default"]) + op_type_wise.setdefault(node["type"], op_capability) + op_wise.setdefault((node["name"], node["type"]), op_capability) if use_bf16: for node_name, op_name in all_op_nodes.items(): @@ -297,7 +301,7 @@ def query_fw_capability(self, nc_model): for key, cfg in op_wise.items(): op_wise[key] = deepcopy(cfg) - return {'optypewise': op_type_wise, 'opwise': op_wise} + return {"optypewise": op_type_wise, "opwise": op_wise} def _inspect_tensor(self, nc_model, data_x, node_list=[], iteration_list=[]): def b_filter(): @@ -307,22 +311,29 @@ def b_filter(): yield True i = 1 while len(iteration_set) > 0: - run = (i in iteration_list) + run = i in iteration_list iteration_set -= {i} i += 1 yield run sym_model, dataloader = prepare_model_data(nc_model, self.ctx, data_x) collector = TensorCollector(node_list, self._qtensor_to_tensor, self._tensor_to_node) - num_batches = run_forward(sym_model, self.ctx, dataloader, b_filter(), - collector, pre_batch=collector.pre_batch) + num_batches = run_forward(sym_model, self.ctx, dataloader, b_filter(), collector, pre_batch=collector.pre_batch) logger.debug("Inspect batches at {}.".format(num_batches)) self._qtensor_to_tensor = collector.qtensor_to_tensor return collector.tensors_dicts - def inspect_tensor(self, nc_model, data_x, op_list=[], iteration_list=[], - inspect_type='activation', save_to_disk=False, - save_path = None, quantization_cfg = None): + def inspect_tensor( + self, + nc_model, + data_x, + op_list=[], + iteration_list=[], + inspect_type="activation", + save_to_disk=False, + save_path=None, + quantization_cfg=None, + ): """The function is used by tune strategy class for dumping tensor info. Args: @@ -334,7 +345,7 @@ def inspect_tensor(self, nc_model, data_x, op_list=[], iteration_list=[], Returns: dict: includes tensor dicts """ - if inspect_type not in ['all', 'activation']: + if inspect_type not in ["all", "activation"]: raise NotImplementedError() tensor_dict_list = self._inspect_tensor(nc_model, data_x, op_list, iteration_list) @@ -344,68 +355,71 @@ def inspect_tensor(self, nc_model, data_x, op_list=[], iteration_list=[], tensor_dict[key][tensor_name] = tensor # discard is_quantized if is_quantized: assert tensor.dtype in QUANTIZATION_DTYPES - assert 'last' in nc_model.calib_cache - min_th, max_th = nc_model.calib_cache['last'][tensor_name] + assert "last" in nc_model.calib_cache + min_th, max_th = nc_model.calib_cache["last"][tensor_name] tensor_dict[key][tensor_name] = mx.nd.contrib.dequantize( tensor, min_range=mx.nd.array([min_th]).squeeze(), max_range=mx.nd.array([max_th]).squeeze(), - out_type='float32') + out_type="float32", + ) tensor_dict[key][tensor_name] = tensor_dict[key][tensor_name].asnumpy() # transform to format expected by neural_compressor (assume only 1 tensor for now) node = key - assert len(tensors) == 1, 'Multiple tensors from a single node are not supported' + assert len(tensors) == 1, "Multiple tensors from a single node are not supported" tensor = list(tensor_dict[key].values())[0] tensor_dict[key] = {node: tensor} - return {'activation': tensor_dict_list} + return {"activation": tensor_dict_list} def recover_tuned_model(self, nc_model, q_config): """Execute the recover process on the specified model. - Args: - tune_cfg (dict): quantization configuration - nc_model (object): fp32 model - q_config (dict): recover configuration + Args: + tune_cfg (dict): quantization configuration + nc_model (object): fp32 model + q_config (dict): recover configuration - Returns: - MXNetModel: the quantized model + Returns: + MXNetModel: the quantized model """ - if q_config['mxnet_version'] != mx.__version__: # pragma: no cover - logger.warning('Attempting to recover a model generated with a different ' - 'version of MXNet ({})'.format(q_config['mxnet_version'])) + if q_config["mxnet_version"] != mx.__version__: # pragma: no cover + logger.warning( + "Attempting to recover a model generated with a different " + "version of MXNet ({})".format(q_config["mxnet_version"]) + ) - sym_model = prepare_model(nc_model, self.ctx, q_config['input_desc']) - qsym_model, calib_tensors = quantize_sym_model(sym_model, self.ctx, q_config['quant_cfg']) + sym_model = prepare_model(nc_model, self.ctx, q_config["input_desc"]) + qsym_model, calib_tensors = quantize_sym_model(sym_model, self.ctx, q_config["quant_cfg"]) calib_data = CalibData() - calib_data.th_dict = q_config['th_dict'] + calib_data.th_dict = q_config["th_dict"] assert set(calib_tensors).issubset(calib_data.th_dict.keys()) - qsym_model = calib_model(qsym_model, calib_data, q_config['calib_cfg']) + qsym_model = calib_model(qsym_model, calib_data, q_config["calib_cfg"]) qsym_model = fuse(qsym_model, self.ctx) # post-quantization fusion - q_nc_model = make_nc_model(nc_model, qsym_model, self.ctx, q_config['input_desc']) - q_nc_model.calib_cache['last'] = q_config['th_dict'] + q_nc_model = make_nc_model(nc_model, qsym_model, self.ctx, q_config["input_desc"]) + q_nc_model.calib_cache["last"] = q_config["th_dict"] q_nc_model.q_config = q_config return q_nc_model def set_tensor(self, model, tensor_dict): - '''The function is used by tune strategy class for setting tensor back to model. - - Args: - model (object): The model to set tensor. Usually it is quantized model. - tensor_dict (dict): The tensor dict to set. Note the numpy array contains float - value, adaptor layer has the responsibility to quantize to - int8 or int32 to set into the quantized model if needed. - The dict format is something like: - { - 'weight0_name': numpy.array, - 'bias0_name': numpy.array, - ... - } - ''' + """The function is used by tune strategy class for setting tensor back to model. + + Args: + model (object): The model to set tensor. Usually it is quantized model. + tensor_dict (dict): The tensor dict to set. Note the numpy array contains float + value, adaptor layer has the responsibility to quantize to + int8 or int32 to set into the quantized model if needed. + The dict format is something like: + { + 'weight0_name': numpy.array, + 'bias0_name': numpy.array, + ... + } + """ raise NotImplementedError def save(self, model, path): @@ -429,14 +443,16 @@ def _one_shot_query(self): except Exception as e: # pragma: no cover logger.info("Fail to parse {} due to {}.".format(self.cfg, str(e))) self.cur_config = None - raise ValueError("Please check if the format of {} follows Neural Compressor yaml schema.". - format(self.cfg)) + raise ValueError( + "Please check if the format of {} follows Neural Compressor yaml schema.".format(self.cfg) + ) self._update_cfg_with_usr_definition() def _update_cfg_with_usr_definition(self): from neural_compressor.conf.pythonic_config import mxnet_config + if mxnet_config.precisions is not None: - self.cur_config['precisions']['names'] = ','.join(mxnet_config.precisions) + self.cur_config["precisions"]["names"] = ",".join(mxnet_config.precisions) def _get_specified_version_cfg(self, data): """Get the configuration for the current runtime. @@ -451,48 +467,46 @@ def _get_specified_version_cfg(self, data): """ default_config = None for sub_data in data: - if sub_data['version']['name'] == self.version: + if sub_data["version"]["name"] == self.version: return sub_data - if sub_data['version']['name'] == 'default': + if sub_data["version"]["name"] == "default": default_config = sub_data return default_config def get_version(self): - """Get the current backend's version string. - """ - return deepcopy(self.cur_config['version']['name']) + """Get the current backend's version string.""" + return deepcopy(self.cur_config["version"]["name"]) def get_precisions(self): - """Get the supported low precisions, e.g ['int8', 'bf16'] - """ - return deepcopy(self.cur_config['precisions']['names']) + """Get the supported low precisions, e.g ['int8', 'bf16']""" + return deepcopy(self.cur_config["precisions"]["names"]) def get_op_types(self): """Get the op types for specific backend per low precision. - e.g {'1.6.0': {'int8': ['Conv2D', 'fully_connected']}} + + e.g {'1.6.0': {'int8': ['Conv2D', 'fully_connected']}} """ - return deepcopy(self.cur_config['ops']) + return deepcopy(self.cur_config["ops"]) def get_fuse_patterns(self): - """Get the fusion patterns for specified op type for every specific precision - - """ - return deepcopy(self.cur_config['patterns']) + """Get the fusion patterns for specified op type for every specific precision.""" + return deepcopy(self.cur_config["patterns"]) def get_quantization_capability(self): """Get the quantization capability of low precision op types. - e.g, granularity, scheme and etc. + e.g, granularity, scheme and etc. """ - return deepcopy(self.cur_config['capabilities']) + return deepcopy(self.cur_config["capabilities"]) def get_mixed_precision_combination(self): """Get the valid precision combination base on hardware and user' config. - e.g['fp32', 'bf16', 'int8'] + + e.g['fp32', 'bf16', 'int8'] """ - if self.cur_config['precisions']['valid_mixed_precisions']: - return [i.strip() for i in self.cur_config['precisions']['valid_mixed_precisions']] + if self.cur_config["precisions"]["valid_mixed_precisions"]: + return [i.strip() for i in self.cur_config["precisions"]["valid_mixed_precisions"]] - return [i.strip() for i in self.get_precisions().split(',')] + return [i.strip() for i in self.get_precisions().split(",")] diff --git a/neural_compressor/adaptor/mxnet.yaml b/neural_compressor/adaptor/mxnet.yaml index 186d0019abb..3d1f7ad5b69 100644 --- a/neural_compressor/adaptor/mxnet.yaml +++ b/neural_compressor/adaptor/mxnet.yaml @@ -17,8 +17,8 @@ - version: name: 'default' - # (MXNet-specific): - # 'int8' for this version specifies quantization in general (both int8 and uint8), as + # (MXNet-specific): + # 'int8' for this version specifies quantization in general (both int8 and uint8), as # quantization dtype is selected automatically for each quantized node according to the # calibration results: min_value < 0 => int8; min_value >= 0 => uint8. # 'int8' here means 'auto' in MXNet @@ -84,7 +84,7 @@ 'granularity': ['per_tensor'], 'algorithm': ['minmax', 'kl']} } - ], + ], 'Flatten': &capability_flatten [ {'activation': { 'dtype': ['int8'], @@ -233,7 +233,7 @@ patterns: &common_patterns # (MXNet-specific): - # fusion patterns are hardcoded in the framework + # fusion patterns are hardcoded in the framework fp32: [ # conv + bn # conv + act + sum diff --git a/neural_compressor/adaptor/mxnet_utils/__init__.py b/neural_compressor/adaptor/mxnet_utils/__init__.py index f675bc24bec..99dfa0175fe 100644 --- a/neural_compressor/adaptor/mxnet_utils/__init__.py +++ b/neural_compressor/adaptor/mxnet_utils/__init__.py @@ -1,4 +1,4 @@ -"""mxnet util init.""" +"""Mxnet util init.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -15,4 +15,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/neural_compressor/adaptor/mxnet_utils/util.py b/neural_compressor/adaptor/mxnet_utils/util.py index d5cda8e0b15..58e58e6c903 100644 --- a/neural_compressor/adaptor/mxnet_utils/util.py +++ b/neural_compressor/adaptor/mxnet_utils/util.py @@ -1,4 +1,4 @@ -"""mxnet util module.""" +"""Mxnet util module.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -16,31 +16,33 @@ # See the License for the specific language governing permissions and # limitations under the License. +import ctypes +import json import os import re -import json -import ctypes -import numpy as np - from enum import Enum from tempfile import TemporaryDirectory -from neural_compressor.utils.utility import LazyImport + +import numpy as np + from neural_compressor.model.mxnet_model import MXNetModel as NCModel +from neural_compressor.utils.utility import LazyImport mx = LazyImport("mxnet") -QUANTIZE_OP_NAME = 'quantize_output' -QUANTIZE_OP_NAMES = ['_contrib_quantize_v2'] -QUANTIZE_DEFAULT_ALGORITHM = 'minmax' -QUANTIZE_NODE_POSTFIX = '_quantize' -QUANTIZED_NODE_PREFIX = 'quantized_' +QUANTIZE_OP_NAME = "quantize_output" +QUANTIZE_OP_NAMES = ["_contrib_quantize_v2"] +QUANTIZE_DEFAULT_ALGORITHM = "minmax" +QUANTIZE_NODE_POSTFIX = "_quantize" +QUANTIZED_NODE_PREFIX = "quantized_" QUANTIZATION_DTYPES = [np.int8, np.uint8] -NULL_OP_NAMES = ['null'] +NULL_OP_NAMES = ["null"] class OpType(Enum): """Enum op types.""" + NORMAL = 0 QUANTIZE = 1 QUANTIZED = 2 @@ -83,8 +85,8 @@ def check_mx_version(version): Returns: boolean: True if mx.__version__ >= version, else False. """ - d1 = re.split(r'\.', mx.__version__) - d2 = re.split(r'\.', version) + d1 = re.split(r"\.", mx.__version__) + d2 = re.split(r"\.", version) d1 = [int(d1[i]) for i in range(len(d1))] d2 = [int(d2[i]) for i in range(len(d2))] return d1 >= d2 @@ -145,7 +147,7 @@ def fuse(sym_model, ctx): symnet, args, auxs = sym_model framework = get_framework_name(ctx) if framework is not None: - if check_mx_version('2.0.0'): + if check_mx_version("2.0.0"): symnet = symnet.optimize_for(framework) else: symnet = symnet.get_backend_symbol(framework) @@ -161,11 +163,11 @@ def get_framework_name(ctx): Returns: str: framework name. """ - if 'cpu' in ctx.device_type: - if check_mx_version('2.0.0'): - return 'ONEDNN_QUANTIZE' + if "cpu" in ctx.device_type: + if check_mx_version("2.0.0"): + return "ONEDNN_QUANTIZE" else: - return 'MKLDNN_QUANTIZE' + return "MKLDNN_QUANTIZE" return None @@ -203,13 +205,13 @@ def prepare_model(nc_model, ctx, input_desc): model_x.hybridize(static_alloc=False, static_shape=False) model_x(*create_data_example(ctx, input_desc)) with TemporaryDirectory() as tmpdirname: - prefix = os.path.join(tmpdirname, 'tmp') + prefix = os.path.join(tmpdirname, "tmp") model_x.export(prefix, epoch=0, remove_amp_cast=False) sym_model = mx.model.load_checkpoint(prefix, 0) elif isinstance(model_x, tuple) and isinstance(model_x[0], mx.symbol.Symbol): sym_model = model_x else: - raise TypeError('Wrong model type') + raise TypeError("Wrong model type") if not is_model_quantized(sym_model): sym_model = fuse(sym_model, ctx) return _match_array_semantics(sym_model) @@ -240,7 +242,7 @@ def _match_array_semantics(sym_model): Returns: tuple: symbol model (symnet, args, auxs). """ - if check_mx_version('2.0.0') and mx.util.is_np_array(): + if check_mx_version("2.0.0") and mx.util.is_np_array(): symnet, args, auxs = sym_model symnet = symnet.as_np_ndarray() for k, v in args.items(): @@ -270,7 +272,7 @@ def prepare_dataloader(nc_model, ctx, data_x): dataloader = data_x if isinstance(dataloader, mx.io.DataIter): dataloader = DataIterLoader(dataloader) - assert isiterable(dataloader), 'Dataloader must be iterable (mx.gluon.data.DataLoader-like)' + assert isiterable(dataloader), "Dataloader must be iterable (mx.gluon.data.DataLoader-like)" model_x = nc_model.model if isinstance(model_x, mx.gluon.HybridBlock): @@ -286,14 +288,14 @@ def prepare_dataloader(nc_model, ctx, data_x): else: break inputs, _ = model_x._cached_graph - input_desc = [mx.io.DataDesc(name=i.name, shape=d.shape, dtype=d.dtype) - for i, d in zip(inputs, data)] + input_desc = [mx.io.DataDesc(name=i.name, shape=d.shape, dtype=d.dtype) for i, d in zip(inputs, data)] elif isinstance(model_x, tuple) and isinstance(model_x[0], mx.symbol.Symbol): - assert hasattr(data_x, 'provide_data'), \ - 'Dataloader must provide data information (mx.io.DataDesc for each input)' + assert hasattr( + data_x, "provide_data" + ), "Dataloader must provide data information (mx.io.DataDesc for each input)" input_desc = data_x.provide_data else: - raise TypeError('Wrong model type') + raise TypeError("Wrong model type") return DataLoaderWrap(dataloader, input_desc) @@ -344,13 +346,15 @@ def _dequantize_sym_name(sym_name, check_list=None): op_type = OpType.NORMAL if sym_name.endswith(QUANTIZE_NODE_POSTFIX): op_type = OpType.QUANTIZE - name = sym_name[:-len(QUANTIZE_NODE_POSTFIX)] + name = sym_name[: -len(QUANTIZE_NODE_POSTFIX)] elif sym_name.startswith(QUANTIZED_NODE_PREFIX): op_type = OpType.QUANTIZED - name = sym_name[len(QUANTIZED_NODE_PREFIX):] - assert check_list is None or name in check_list, \ - 'name of the quantized symbol must be in the following format: ' \ - '"{}_". Symbol: {}'.format(QUANTIZED_NODE_PREFIX, name) + name = sym_name[len(QUANTIZED_NODE_PREFIX) :] + assert ( + check_list is None or name in check_list + ), "name of the quantized symbol must be in the following format: " '"{}_". Symbol: {}'.format( + QUANTIZED_NODE_PREFIX, name + ) return (name, op_type) @@ -368,12 +372,11 @@ def query_quantizable_nodes(sym_model, ctx, dataloader): assert isinstance(dataloader, DataLoaderWrap) symnet = sym_model[0] - nodes_ops = {n['name']: n['op'].lower() for n in json.loads(symnet.tojson())['nodes']} + nodes_ops = {n["name"]: n["op"].lower() for n in json.loads(symnet.tojson())["nodes"]} - qmodel, calib_tensors = quantize_sym_model(sym_model, ctx, {'quantized_dtype': 'auto', - 'quantize_mode': 'smart'}) + qmodel, calib_tensors = quantize_sym_model(sym_model, ctx, {"quantized_dtype": "auto", "quantize_mode": "smart"}) qsymnet = qmodel[0] - qnodes_ops = {n['name']: n['op'].lower() for n in json.loads(qsymnet.tojson())['nodes']} + qnodes_ops = {n["name"]: n["op"].lower() for n in json.loads(qsymnet.tojson())["nodes"]} # collect fp32 tensors collector = NameCollector() @@ -385,7 +388,7 @@ def query_quantizable_nodes(sym_model, ctx, dataloader): nodes = set(nodes_ops.keys()) for tensor in tensors: node = _tensor_to_node(tensor, nodes) - if node != '': + if node != "": tensor_to_node[tensor] = node elif tensor in calib_tensors: tensor_to_node[tensor] = tensor @@ -398,16 +401,17 @@ def query_quantizable_nodes(sym_model, ctx, dataloader): continue sym_name, op_type = _dequantize_sym_name(qsym.name, nodes_ops.keys()) node_name = _tensor_to_node(sym_name, nodes_ops.keys()) - node_name = sym_name if node_name == '' else node_name - assert qnodes_ops[qsym.name] not in QUANTIZE_OP_NAMES or (op_type == OpType.QUANTIZE), \ - 'Quantize node was not recognised properly. Node name: "{}"'.format(node_name) + node_name = sym_name if node_name == "" else node_name + assert qnodes_ops[qsym.name] not in QUANTIZE_OP_NAMES or ( + op_type == OpType.QUANTIZE + ), 'Quantize node was not recognised properly. Node name: "{}"'.format(node_name) if node_name in calib_nodes: if op_type == OpType.QUANTIZE: quantizable[node_name] = QUANTIZE_OP_NAME elif op_type == OpType.QUANTIZED: quantizable[node_name] = nodes_ops[node_name] - quantizable_nodes = [{'name': name, 'type': op} for (name, op) in quantizable.items()] + quantizable_nodes = [{"name": name, "type": op} for (name, op) in quantizable.items()] op_nodes = {k: v for k, v in nodes_ops.items() if v not in NULL_OP_NAMES} return quantizable_nodes, tensor_to_node, op_nodes @@ -425,15 +429,15 @@ def quantize_sym_model(sym_model, ctx, qconfig): assert isinstance(sym_model, tuple) and isinstance(sym_model[0], mx.symbol.Symbol) symnet, args, auxs = sym_model - if not check_mx_version('1.7.0'): - qconfig.pop('quantize_granularity', None) + if not check_mx_version("1.7.0"): + qconfig.pop("quantize_granularity", None) - arguments = {'sym': symnet, 'offline_params': list(args.keys())} + arguments = {"sym": symnet, "offline_params": list(args.keys())} arguments.update(qconfig) - if check_mx_version('2.0.0'): - arguments['device'] = ctx + if check_mx_version("2.0.0"): + arguments["device"] = ctx else: - arguments['ctx'] = ctx + arguments["ctx"] = ctx qsymnet, calib_tensors = mx.contrib.quantization._quantize_symbol(**arguments) # args = mx.contrib.quantization._quantize_params(qsymnet, args, {}) return ((qsymnet, args, auxs), calib_tensors) @@ -446,17 +450,16 @@ def _tensor_to_node(tensor, nodes): """ assert len(nodes) > 0, "`nodes` cannot be empty" - PATTERNS = {'', - '_output[0-9]*$', - '_[0-9]+$'} + PATTERNS = {"", "_output[0-9]*$", "_[0-9]+$"} mapping = [] for pattern in PATTERNS: - node = re.sub(pattern, '', tensor) + node = re.sub(pattern, "", tensor) if node in nodes and node not in mapping: mapping.append(node) - assert len(mapping) == 1, 'Tensor matched to more than one node. ' \ - 'Tensor: {}, matched: {}'.format(tensor, mapping) - return mapping[0] if len(mapping) > 0 else '' + assert len(mapping) == 1, "Tensor matched to more than one node. " "Tensor: {}, matched: {}".format( + tensor, mapping + ) + return mapping[0] if len(mapping) > 0 else "" def _qtensor_to_tensor(qtensor, tensors): @@ -468,27 +471,30 @@ def _qtensor_to_tensor(qtensor, tensors): """ assert len(tensors) > 0, "`tensors` cannot be empty" - PATTERNS = {'quantize': '', - '_quantize_output0': '', - '_quantize_0': '', - '_0_quantize_output0': '_output', - '_0_quantize_0': '_output', - '_([0-9]+)_quantize_output0': '_output\g<1>', - '_([0-9]+)_quantize_0': '_output\g<1>', - 'quantized_': ''} + PATTERNS = { + "quantize": "", + "_quantize_output0": "", + "_quantize_0": "", + "_0_quantize_output0": "_output", + "_0_quantize_0": "_output", + "_([0-9]+)_quantize_output0": "_output\g<1>", + "_([0-9]+)_quantize_0": "_output\g<1>", + "quantized_": "", + } mapping = [] for pattern, repl in PATTERNS.items(): tensor = re.sub(pattern, repl, qtensor) if tensor in tensors and tensor not in mapping: mapping.append(tensor) - assert len(mapping) == 1, \ - 'Quantized tensor matched more than one fp32 tensor. ' \ - 'Quantized tensor: {}, matched: {}'.format(qtensor, mapping) - return mapping[0] if len(mapping) > 0 else '' + assert ( + len(mapping) == 1 + ), "Quantized tensor matched more than one fp32 tensor. " "Quantized tensor: {}, matched: {}".format( + qtensor, mapping + ) + return mapping[0] if len(mapping) > 0 else "" -def run_forward(sym_model, ctx, dataloader, b_filter, - collector=None, pre_batch=None, post_batch=None): +def run_forward(sym_model, ctx, dataloader, b_filter, collector=None, pre_batch=None, post_batch=None): """Run forward propagation on the model. Args: @@ -503,10 +509,9 @@ def run_forward(sym_model, ctx, dataloader, b_filter, int: batch count. """ assert isinstance(dataloader, DataLoaderWrap) - assert collector is None or (hasattr(collector, 'collect_gluon') and - hasattr(collector, 'collect_module')) + assert collector is None or (hasattr(collector, "collect_gluon") and hasattr(collector, "collect_module")) - if check_mx_version('2.0.0'): + if check_mx_version("2.0.0"): sym_block = make_symbol_block(sym_model, ctx, dataloader.input_desc) if collector is not None: sym_block.register_op_hook(collector.collect_gluon, monitor_all=True) @@ -514,8 +519,7 @@ def run_forward(sym_model, ctx, dataloader, b_filter, else: mod = make_module(sym_model, ctx, dataloader.input_desc) if collector is not None: - mod._exec_group.execs[0].set_monitor_callback( - collector.collect_module, monitor_all=True) + mod._exec_group.execs[0].set_monitor_callback(collector.collect_module, monitor_all=True) return _module_forward(mod, dataloader, b_filter, pre_batch, post_batch) @@ -536,13 +540,12 @@ def make_symbol_block(sym_model, ctx, input_desc): sym_block = mx.gluon.SymbolBlock(symnet, inputs) param_dict = args param_dict.update(auxs) - if check_mx_version('2.0.0'): - sym_block.load_dict(param_dict, cast_dtype=True, dtype_source='saved', allow_missing=True) + if check_mx_version("2.0.0"): + sym_block.load_dict(param_dict, cast_dtype=True, dtype_source="saved", allow_missing=True) else: # params = {'arg:' + name: param for name, param in args.items()} # params.update({'aux:' + name: param for name, param in auxs.items()}) - sym_block.collect_params().load_dict(param_dict, ctx=ctx, cast_dtype=True, - dtype_source='saved') + sym_block.collect_params().load_dict(param_dict, ctx=ctx, cast_dtype=True, dtype_source="saved") return sym_block @@ -555,8 +558,8 @@ def _gluon_forward(net, ctx, dataloader, b_filter, pre_batch=None, post_batch=No batch_num += 1 batch = ensure_list(batch) batch = [ndarray_to_device(d, ctx) for d in batch] - data = batch[:len(dataloader.input_desc)] - label = batch[len(dataloader.input_desc):] + data = batch[: len(dataloader.input_desc)] + label = batch[len(dataloader.input_desc) :] if pre_batch is not None: pre_batch(net, (data, label)) @@ -581,10 +584,7 @@ def make_module(sym_model, ctx, input_desc): assert isinstance(sym_model, tuple) and isinstance(sym_model[0], mx.symbol.Symbol) symnet, args, auxs = sym_model - mod = mx.module.module.Module(symbol=symnet, - data_names=[d.name for d in input_desc], - label_names=None, - context=ctx) + mod = mx.module.module.Module(symbol=symnet, data_names=[d.name for d in input_desc], label_names=None, context=ctx) mod.bind(input_desc, for_training=False) mod.set_params(args, auxs, allow_missing=True) return mod @@ -597,8 +597,8 @@ def _module_forward(module, dataloader, b_filter, pre_batch=None, post_batch=Non if not run: continue batch_num += 1 - data = batch[:len(dataloader.input_desc)] - label = batch[len(dataloader.input_desc):] + data = batch[: len(dataloader.input_desc)] + label = batch[len(dataloader.input_desc) :] if pre_batch is not None: pre_batch(module, (data, label)) @@ -627,35 +627,34 @@ def parse_tune_config(tune_cfg, quantizable_nodes): amp_excluded_nodes = set() for op in quantizable_nodes: - cfg = tune_cfg['op'][(op['name'], op['type'])]['activation'] - if cfg['dtype'] not in ['bf16']: - amp_excluded_nodes.add(op['name']) - if cfg['dtype'] not in ['int8']: - excluded_symbols.append(op['name']) + cfg = tune_cfg["op"][(op["name"], op["type"])]["activation"] + if cfg["dtype"] not in ["bf16"]: + amp_excluded_nodes.add(op["name"]) + if cfg["dtype"] not in ["int8"]: + excluded_symbols.append(op["name"]) # config for quantize node, that might be added after this node # (to quantize its output) - cfg['algorithm'] = QUANTIZE_DEFAULT_ALGORITHM + cfg["algorithm"] = QUANTIZE_DEFAULT_ALGORITHM - if cfg['algorithm'] == 'kl': - calib_kl_nodes.add(op['name']) + if cfg["algorithm"] == "kl": + calib_kl_nodes.add(op["name"]) else: - calib_minmax_nodes.add(op['name']) + calib_minmax_nodes.add(op["name"]) assert len(calib_kl_nodes & calib_minmax_nodes) == 0 - quant_cfg = {'excluded_symbols': excluded_symbols, - 'quantized_dtype': 'auto', - 'quantize_mode': 'smart'} - if check_mx_version('1.7.0'): - quant_cfg['quantize_granularity'] = 'tensor-wise' + quant_cfg = {"excluded_symbols": excluded_symbols, "quantized_dtype": "auto", "quantize_mode": "smart"} + if check_mx_version("1.7.0"): + quant_cfg["quantize_granularity"] = "tensor-wise" - calib_cfg = {'quantized_dtype': quant_cfg['quantized_dtype'], - 'batches': tune_cfg['calib_iteration'], - 'calib_mode': 'naive', - 'calib_kl_nodes': calib_kl_nodes, - 'calib_minmax_nodes': calib_minmax_nodes} + calib_cfg = { + "quantized_dtype": quant_cfg["quantized_dtype"], + "batches": tune_cfg["calib_iteration"], + "calib_mode": "naive", + "calib_kl_nodes": calib_kl_nodes, + "calib_minmax_nodes": calib_minmax_nodes, + } - amp_cfg = {'target_dtype': 'bfloat16', - 'excluded_sym_names': amp_excluded_nodes} + amp_cfg = {"target_dtype": "bfloat16", "excluded_sym_names": amp_excluded_nodes} return quant_cfg, calib_cfg, amp_cfg @@ -675,17 +674,20 @@ def distribute_calib_tensors(calib_tensors, calib_cfg, tensor_to_node): kl_tensors = {} minmax_tensors = {} for cl in calib_tensors: - assert cl in tensor_to_node, '`calib_tensors` entry matched no node. Entry: {}'.format(cl) + assert cl in tensor_to_node, "`calib_tensors` entry matched no node. Entry: {}".format(cl) node = tensor_to_node[cl] - if node in calib_cfg['calib_kl_nodes']: + if node in calib_cfg["calib_kl_nodes"]: kl_tensors[cl] = node - if node in calib_cfg['calib_minmax_nodes']: + if node in calib_cfg["calib_minmax_nodes"]: minmax_tensors[cl] = node kl_tensors = set(kl_tensors.keys()) minmax_tensors = set(minmax_tensors.keys()) - assert len(kl_tensors & minmax_tensors) == 0, 'same `calib_tensors` entries matched both kl ' \ - 'and minmax nodes. Entries: {}'.format(kl_tensors & minmax_tensors) + assert ( + len(kl_tensors & minmax_tensors) == 0 + ), "same `calib_tensors` entries matched both kl " "and minmax nodes. Entries: {}".format( + kl_tensors & minmax_tensors + ) # `rest` are the nodes that require callibration because of some node being excluded # for example: input -> quantize -> conv_1 -> pooling -> conv_2 @@ -693,7 +695,7 @@ def distribute_calib_tensors(calib_tensors, calib_cfg, tensor_to_node): # when conv_1 is excluded, pooling output requires callibration (as it is input of a quantized # node): input -> conv_1 -> pooling -> quantize -> conv_2 rest = calib_tensors - (kl_tensors | minmax_tensors) - minmax_tensors |= rest # assign them to the minmax algorithm by default + minmax_tensors |= rest # assign them to the minmax algorithm by default return (kl_tensors, minmax_tensors) @@ -712,19 +714,19 @@ def calib_model(qsym_model, calib_data, calib_cfg): assert isinstance(qsym_model, tuple) and isinstance(qsym_model[0], mx.symbol.Symbol) qsymnet, qargs, auxs = qsym_model - if check_mx_version('2.0.0'): - return mx.contrib.quantization.calib_graph( - qsymnet, qargs, auxs, calib_data, calib_cfg['calib_mode']) + if check_mx_version("2.0.0"): + return mx.contrib.quantization.calib_graph(qsymnet, qargs, auxs, calib_data, calib_cfg["calib_mode"]) else: return mx.contrib.quantization.calib_graph( - qsymnet, qargs, auxs, calib_data, calib_cfg['calib_mode'], - quantized_dtype=calib_cfg['quantized_dtype']) + qsymnet, qargs, auxs, calib_data, calib_cfg["calib_mode"], quantized_dtype=calib_cfg["quantized_dtype"] + ) def amp_convert(sym_model, input_desc, amp_cfg): """Convert model to support amp.""" - assert check_mx_version('2.0.0'), 'AMP is supported since MXNet 2.0. This error is due to ' \ - 'an error in the configuration file.' + assert check_mx_version("2.0.0"), ( + "AMP is supported since MXNet 2.0. This error is due to " "an error in the configuration file." + ) from mxnet import amp input_dtypes = {i.name: i.dtype for i in input_desc} @@ -733,6 +735,7 @@ def amp_convert(sym_model, input_desc, amp_cfg): class DataLoaderWrap: """DataLoader Wrap.""" + def __init__(self, dataloader, input_desc): """Initialize.""" self.dataloader = dataloader @@ -751,6 +754,7 @@ def __next__(self): class DataIterLoader: """DataIterLoader.""" + def __init__(self, data_iter): """Initialize.""" self.data_iter = data_iter @@ -768,6 +772,7 @@ def __next__(self): class CollectorBase: """Collector Base class.""" + def collect_gluon(self, name, _, arr): """Collect by gluon api.""" raise NotImplementedError() @@ -777,7 +782,7 @@ def collect_module(self, name, arr): name = mx.base.py_str(name) handle = ctypes.cast(arr, mx.base.NDArrayHandle) arr = mx.nd.NDArray(handle, writable=False) - self.collect_gluon(name, '', arr) + self.collect_gluon(name, "", arr) def pre_batch(self, m, b): """Function to call prior to batch inference.""" @@ -790,6 +795,7 @@ def post_batch(self, m, b, o): class CalibCollector(CollectorBase): """Collect the calibration thresholds depending on the algorithm set.""" + def __init__(self, include_tensors_kl, include_tensors_minmax, num_bins=8001): """Initialize.""" self.min_max_dict = {} @@ -801,9 +807,9 @@ def __init__(self, include_tensors_kl, include_tensors_minmax, num_bins=8001): def collect_gluon(self, name, _, arr): """Collect by gluon api.""" if name in self.include_tensors_kl: - alg = 'kl' + alg = "kl" elif name in self.include_tensors_minmax: - alg = 'minmax' + alg = "minmax" else: return @@ -813,16 +819,14 @@ def collect_gluon(self, name, _, arr): # minmax (always) if name in self.min_max_dict: cur_min_max = self.min_max_dict[name] - self.min_max_dict[name] = (min(cur_min_max[0], min_range), - max(cur_min_max[1], max_range)) + self.min_max_dict[name] = (min(cur_min_max[0], min_range), max(cur_min_max[1], max_range)) else: self.min_max_dict[name] = (min_range, max_range) - if alg == 'kl': # histogram only when kl is specified + if alg == "kl": # histogram only when kl is specified arr = arr.asnumpy() if name in self.hist_dict: - self.hist_dict[name] = self._combine_histogram(self.hist_dict[name], arr, - min_range, max_range, th) + self.hist_dict[name] = self._combine_histogram(self.hist_dict[name], arr, min_range, max_range, th) else: hist, hist_edges = np.histogram(arr, bins=self.num_bins, range=(-th, th)) self.hist_dict[name] = (hist, hist_edges, min_range, max_range, th) @@ -830,27 +834,31 @@ def collect_gluon(self, name, _, arr): @staticmethod def _combine_histogram(old_hist, arr, new_min, new_max, new_th): """Combine histogram.""" - if check_mx_version('2.0.0'): + if check_mx_version("2.0.0"): return mx.contrib.quantization._LayerHistogramCollector.combine_histogram( - old_hist, arr, new_min, new_max, new_th) + old_hist, arr, new_min, new_max, new_th + ) else: - return mx.contrib.quantization.combine_histogram(old_hist, arr, new_min, - new_max, new_th) + return mx.contrib.quantization.combine_histogram(old_hist, arr, new_min, new_max, new_th) def calc_kl_th_dict(self, quantized_dtype): """Calculation kl thresholds.""" if len(self.hist_dict) > 0: - if check_mx_version('2.0.0'): + if check_mx_version("2.0.0"): return mx.contrib.quantization._LayerHistogramCollector.get_optimal_thresholds( - self.hist_dict, quantized_dtype) + self.hist_dict, quantized_dtype + ) else: - return mx.contrib.quantization._get_optimal_thresholds( - self.hist_dict, quantized_dtype) + return mx.contrib.quantization._get_optimal_thresholds(self.hist_dict, quantized_dtype) return {} class TensorCollector(CollectorBase): - """Tensors collector. Builds up qtensor_to_tensor mapping.""" + """Tensors collector. + + Builds up qtensor_to_tensor mapping. + """ + def __init__(self, include_nodes, qtensor_to_tensor, tensor_to_node): """Initialize.""" self.tensors_dicts = [] @@ -859,7 +867,7 @@ def __init__(self, include_nodes, qtensor_to_tensor, tensor_to_node): self.tensor_to_node = tensor_to_node rest = set(self.include_nodes) - set(self.tensor_to_node.values()) - assert len(rest) == 0, 'Unexpected tensors set to be collected: {}'.format(rest) + assert len(rest) == 0, "Unexpected tensors set to be collected: {}".format(rest) def collect_gluon(self, name, _, arr): """Collect by gluon api.""" @@ -870,7 +878,7 @@ def collect_gluon(self, name, _, arr): else: qname, name = name, _qtensor_to_tensor(name, self.tensor_to_node) self.qtensor_to_tensor[qname] = name - if name == '': + if name == "": return is_quantized = arr.dtype in QUANTIZATION_DTYPES @@ -885,6 +893,7 @@ def pre_batch(self, m, b): class NameCollector(CollectorBase): """Name collector.""" + def __init__(self): """Initialize.""" self.names = [] @@ -896,6 +905,7 @@ def collect_gluon(self, name, _, arr): class CalibData: """Calibration data class.""" + def __init__(self, cache_kl={}, cache_minmax={}, tensors_kl=[], tensors_minmax=[]): """Initialize.""" self.th_dict = {} diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py index e28f7213272..6640aabaaaf 100644 --- a/neural_compressor/adaptor/onnxrt.py +++ b/neural_compressor/adaptor/onnxrt.py @@ -16,28 +16,26 @@ # limitations under the License. # pylint: disable=no-member -import os import copy import logging +import math +import os +import re +import sys from collections import OrderedDict from collections.abc import KeysView -import yaml +from importlib.util import find_spec +from typing import Dict + import numpy as np +import yaml from packaging.version import Version -from importlib.util import find_spec -from neural_compressor.adaptor.adaptor import adaptor_registry, Adaptor + +from neural_compressor.adaptor.adaptor import Adaptor, adaptor_registry +from neural_compressor.adaptor.ox_utils.util import ONNXRT_BACKENDS, PROVIDERS, to_numpy from neural_compressor.adaptor.query import QueryBackendCapability -from neural_compressor.adaptor.ox_utils.util import PROVIDERS, ONNXRT_BACKENDS -from neural_compressor.utils.utility import LazyImport, dump_elapsed_time, \ - GLOBAL_STATE, MODE -from neural_compressor.utils.utility import Statistics from neural_compressor.data.dataloaders.base_dataloader import BaseDataLoader -from neural_compressor.utils.utility import CpuInfo -from neural_compressor.adaptor.ox_utils.util import to_numpy -import math -import sys -import re -from typing import Dict +from neural_compressor.utils.utility import GLOBAL_STATE, MODE, CpuInfo, LazyImport, Statistics, dump_elapsed_time onnx = LazyImport("onnx") ort = LazyImport("onnxruntime") @@ -47,6 +45,7 @@ logger = logging.getLogger("neural_compressor") + @adaptor_registry class ONNXRUNTIMEAdaptor(Adaptor): """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. @@ -67,82 +66,80 @@ def __init__(self, framework_specific_info): self._check_backend_available(framework_specific_info["backend"]) self.backend = PROVIDERS[framework_specific_info["backend"]] self.performance_only = framework_specific_info.get("performance_only", False) - self.use_bf16 = framework_specific_info.get("use_bf16", False) and \ - self.backend in ort.get_available_providers() + self.use_bf16 = framework_specific_info.get("use_bf16", False) and self.backend in ort.get_available_providers() self.use_fp16 = framework_specific_info.get("use_fp16", False) # get quantization format according to framework_specific_info - if (not self.dynamic and "format" in framework_specific_info and \ - framework_specific_info["format"].lower() == 'qdq') or \ - self.backend == 'TensorrtExecutionProvider': + if ( + not self.dynamic + and "format" in framework_specific_info + and framework_specific_info["format"].lower() == "qdq" + ) or self.backend == "TensorrtExecutionProvider": self.format = "qdq" else: if not self.dynamic: self.format = "qlinearops" else: self.format = "integerops" - if "format" in framework_specific_info and \ - framework_specific_info["format"].lower() == 'qdq': + if "format" in framework_specific_info and framework_specific_info["format"].lower() == "qdq": logger.warning("Dynamic approach doesn't support QDQ format.") - + # get quantization config file according to backend config_file = None - if self.backend == 'CPUExecutionProvider': - config_file = 'onnxrt.yaml' - elif self.backend == 'TensorrtExecutionProvider': - config_file = 'onnxrt_trt.yaml' - elif self.backend == 'CUDAExecutionProvider': - config_file = 'onnxrt_cuda.yaml' - elif self.backend == 'DnnlExecutionProvider': - config_file = 'onnxrt_dnnl.yaml' - elif self.backend == 'DmlExecutionProvider': - config_file = 'onnxrt_dml.yaml' - else: # pragma: no cover - assert False, "{} provider is not supported in current environment, " \ - "supported providers: {}".format(self.backend, - [provider for provider in PROVIDERS.values()]) + if self.backend == "CPUExecutionProvider": + config_file = "onnxrt.yaml" + elif self.backend == "TensorrtExecutionProvider": + config_file = "onnxrt_trt.yaml" + elif self.backend == "CUDAExecutionProvider": + config_file = "onnxrt_cuda.yaml" + elif self.backend == "DnnlExecutionProvider": + config_file = "onnxrt_dnnl.yaml" + elif self.backend == "DmlExecutionProvider": + config_file = "onnxrt_dml.yaml" + else: # pragma: no cover + assert False, "{} provider is not supported in current environment, " "supported providers: {}".format( + self.backend, [provider for provider in PROVIDERS.values()] + ) self.query_handler_ext = None - if framework_specific_info["approach"] == 'post_training_auto_quant' and \ - self.format != "integerops": - # if approach is post_training_auto_quant, + if framework_specific_info["approach"] == "post_training_auto_quant" and self.format != "integerops": + # if approach is post_training_auto_quant, # both static and dynamic quantization will be performed self.query_handler = ONNXRTQuery( - static=True, - format=self.format, - local_config_file=os.path.join(os.path.dirname(__file__), config_file)) + static=True, format=self.format, local_config_file=os.path.join(os.path.dirname(__file__), config_file) + ) self.query_handler_ext = ONNXRTQuery( - dynamic=True, - format=self.format, - local_config_file=os.path.join(os.path.dirname(__file__), config_file)) + dynamic=True, format=self.format, local_config_file=os.path.join(os.path.dirname(__file__), config_file) + ) else: self.query_handler = ONNXRTQuery( - dynamic=self.dynamic, - static=self.static, + dynamic=self.dynamic, + static=self.static, format=self.format, - local_config_file=os.path.join(os.path.dirname(__file__), config_file)) + local_config_file=os.path.join(os.path.dirname(__file__), config_file), + ) self.work_space = framework_specific_info["workspace_path"] - self.reduce_range = framework_specific_info["reduce_range"] if \ - "reduce_range" in framework_specific_info else not CpuInfo().vnni - self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK) + self.reduce_range = ( + framework_specific_info["reduce_range"] if "reduce_range" in framework_specific_info else not CpuInfo().vnni + ) + self.benchmark = GLOBAL_STATE.STATE == MODE.BENCHMARK os.makedirs(self.work_space, exist_ok=True) self.pre_optimized_model = None self.smooth_quant_model = None self.quantizable_op_types = [] for precision in self.query_handler.get_precisions(): - if precision != 'fp32': - if self.device == 'cpu' and precision == 'fp16': + if precision != "fp32": + if self.device == "cpu" and precision == "fp16": continue - self.quantizable_op_types += \ - self.query_handler.get_op_types_by_precision(precision=precision) - - if self.backend == 'TensorrtExecutionProvider': - self.recipes['add_qdq_pair_to_weight'] = True - self.recipes['dedicated_qdq_pair'] = True - self.recipes['graph_optimization_level'] = 'DISABLE_ALL' - self.recipes['optypes_to_exclude_output_quant'] = ['Conv', 'Gemm', 'Add', 'MatMul'] + self.quantizable_op_types += self.query_handler.get_op_types_by_precision(precision=precision) + + if self.backend == "TensorrtExecutionProvider": + self.recipes["add_qdq_pair_to_weight"] = True + self.recipes["dedicated_qdq_pair"] = True + self.recipes["graph_optimization_level"] = "DISABLE_ALL" + self.recipes["optypes_to_exclude_output_quant"] = ["Conv", "Gemm", "Add", "MatMul"] self.static = True self.dynamic = False @@ -150,8 +147,8 @@ def __init__(self, framework_specific_info): self.fp32_results = [] self.fp32_preds_as_label = False - self.quantize_config = {} # adaptor should know current configs at any time - self.quantize_params = {} # adaptor should know current params at any time + self.quantize_config = {} # adaptor should know current configs at any time + self.quantize_params = {} # adaptor should know current params at any time self.min_max = None self.optype_statistics = None @@ -160,9 +157,18 @@ def __init__(self, framework_specific_info): self.sq = None self.cur_sq_args = {} - def smooth_quant(self, model, dataloader, iterations, alpha=0.5, folding=True, - percentile=99.999, op_types=['MatMul', 'Gemm', 'Conv', 'FusedConv'], - scales_per_op=True, record_max_info=False): + def smooth_quant( + self, + model, + dataloader, + iterations, + alpha=0.5, + folding=True, + percentile=99.999, + op_types=["MatMul", "Gemm", "Conv", "FusedConv"], + scales_per_op=True, + record_max_info=False, + ): """Get augmented model with smooth quant. Args: @@ -186,13 +192,13 @@ def smooth_quant(self, model, dataloader, iterations, alpha=0.5, folding=True, from .ox_utils.smooth_quant import ORTSmoothQuant # set params to cur_sq_args - self.cur_sq_args['alpha'] = alpha - self.cur_sq_args['folding'] = folding - self.cur_sq_args['percentile'] = percentile - self.cur_sq_args['op_types'] = op_types - self.cur_sq_args['scales_per_op'] = scales_per_op - self.cur_sq_args['calib_iter'] = iterations - + self.cur_sq_args["alpha"] = alpha + self.cur_sq_args["folding"] = folding + self.cur_sq_args["percentile"] = percentile + self.cur_sq_args["op_types"] = op_types + self.cur_sq_args["scales_per_op"] = scales_per_op + self.cur_sq_args["calib_iter"] = iterations + # pre-optimization self._pre_optimize(model) @@ -204,15 +210,17 @@ def smooth_quant(self, model, dataloader, iterations, alpha=0.5, folding=True, # TODO double-check the smooth_quant_model and pre_optimized_model to make sure there no two fp32 model replicas self.pre_optimized_model = self.smooth_quant_model return self.smooth_quant_model - + def _need_smooth_quant(self, tune_cfg) -> bool: """Check the model needs smooth quant or not.""" - recipe_cfgs = tune_cfg.get('recipe_cfgs', None) - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) \ - and recipe_cfgs['smooth_quant_args'].get('alpha', None): + recipe_cfgs = tune_cfg.get("recipe_cfgs", None) + if ( + recipe_cfgs + and recipe_cfgs.get("smooth_quant", False) + and recipe_cfgs["smooth_quant_args"].get("alpha", None) + ): # update alpha according to tune_cfg - self.cur_sq_args['alpha'] = \ - tune_cfg['recipe_cfgs']['smooth_quant_args']['alpha'] + self.cur_sq_args["alpha"] = tune_cfg["recipe_cfgs"]["smooth_quant_args"]["alpha"] return True else: return False @@ -231,35 +239,40 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): Returns: (dict): quantized model - """ + """ assert q_func is None, "quantization aware training has not been supported on ONNXRUNTIME" if self.smooth_quant_model is not None and model.is_smoothquant_model(): model = self.smooth_quant_model elif self.pre_optimized_model is not None: model = self.pre_optimized_model ort_version = Version(ort.__version__) - if ort_version < ONNXRT152_VERSION: # pragma: no cover + if ort_version < ONNXRT152_VERSION: # pragma: no cover logger.warning("Quantize input needs onnxruntime 1.5.2 or newer.") return model - if model.model.opset_import[0].version < 11: # pragma: no cover + if model.model.opset_import[0].version < 11: # pragma: no cover logger.warning("Quantize input needs model opset 11 or newer.") - if self.backend == 'DnnlExecutionProvider' and \ - any([i.domain in ['', 'ai.onnx'] and \ - i.version < 15 for i in model.model.opset_import]): # pragma: no cover + if self.backend == "DnnlExecutionProvider" and any( + [i.domain in ["", "ai.onnx"] and i.version < 15 for i in model.model.opset_import] + ): # pragma: no cover from onnx import version_converter + from neural_compressor.model.onnx_model import ONNXModel + try: model = self._rename_node(ONNXModel(version_converter.convert_version(model.model, 15))) except: - logging.warning("Fail to upgrade model opset_import to >= 15, "\ - "please upgrate it manually to run with bf16 data type") + logging.warning( + "Fail to upgrade model opset_import to >= 15, " + "please upgrate it manually to run with bf16 data type" + ) exit(0) - + from neural_compressor.adaptor.ox_utils.util import QuantizationMode + if self.format == "qlinearops": format = QuantizationMode.QLinearOps elif self.format == "qdq": - assert ort_version >= ONNXRT170_VERSION, 'QDQ mode needs onnxruntime1.7.0 or newer' + assert ort_version >= ONNXRT170_VERSION, "QDQ mode needs onnxruntime1.7.0 or newer" format = "qdq" else: format = QuantizationMode.IntegerOps @@ -273,30 +286,29 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): try: tmp_model = copy.deepcopy(model) except Exception as e: # pragma: no cover - logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format( - repr(e))) + logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format(repr(e))) tmp_model = model - + # smooth quant the model if needed if self._need_smooth_quant(tune_cfg) and not tmp_model.is_smoothquant_model(): self.sq.model = tmp_model self.sq.record_max_info = False tmp_model = self.sq.transform(**self.cur_sq_args) - iterations = tune_cfg.get('calib_iteration', 1) - calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) + iterations = tune_cfg.get("calib_iteration", 1) + calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) if not self.dynamic: calib_iterations = self._reset_calib_iter(data_loader, calib_sampling_size, iterations) - quantize_params = self._get_quantize_params(tmp_model, data_loader, \ - quantize_config, calib_iterations) + quantize_params = self._get_quantize_params(tmp_model, data_loader, quantize_config, calib_iterations) else: quantize_params = None self.quantize_params = quantize_params - from neural_compressor.adaptor.ox_utils.quantizer import Quantizer from neural_compressor import options + from neural_compressor.adaptor.ox_utils.quantizer import Quantizer - quantizer = Quantizer(tmp_model, + quantizer = Quantizer( + tmp_model, quantize_config, format, self.static, @@ -304,40 +316,42 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): self.quantizable_op_types, self.query_handler.get_fallback_list(), self.reduce_range, - options.onnxrt.qdq_setting.AddQDQPairToWeight if \ - 'add_qdq_pair_to_weight' not in self.recipes else \ - self.recipes.get('add_qdq_pair_to_weight', False), - options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin if \ - 'optypes_to_exclude_output_quant' not in self.recipes else \ - self.recipes.get('optypes_to_exclude_output_quant', []), - options.onnxrt.qdq_setting.DedicatedQDQPair if \ - 'dedicated_qdq_pair' not in self.recipes else \ - self.recipes.get('dedicated_qdq_pair', False), - self.backend) + options.onnxrt.qdq_setting.AddQDQPairToWeight + if "add_qdq_pair_to_weight" not in self.recipes + else self.recipes.get("add_qdq_pair_to_weight", False), + options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin + if "optypes_to_exclude_output_quant" not in self.recipes + else self.recipes.get("optypes_to_exclude_output_quant", []), + options.onnxrt.qdq_setting.DedicatedQDQPair + if "dedicated_qdq_pair" not in self.recipes + else self.recipes.get("dedicated_qdq_pair", False), + self.backend, + ) quantizer.quantize_model() tmp_model.q_config = self._generate_qconfig(model.model, tune_cfg, quantize_params) tmp_model.model = quantizer.model.model - self.quantize_config = quantize_config # update so other methods can know current configs + self.quantize_config = quantize_config # update so other methods can know current configs self._dump_model_op_stats(tmp_model) tmp_model.topological_sort() return tmp_model - + def _check_backend_available(self, backend): """Check backend is available or not.""" if backend not in PROVIDERS: - assert False, "'{}' backend is not supported, " \ - "supported backends include {}".format(backend, \ - [provider for provider in PROVIDERS.keys()]) - - if backend in ["onnxrt_trt_ep", "onnxrt_cuda_ep"] and \ - self.device != "gpu": + assert False, "'{}' backend is not supported, " "supported backends include {}".format( + backend, [provider for provider in PROVIDERS.keys()] + ) + + if backend in ["onnxrt_trt_ep", "onnxrt_cuda_ep"] and self.device != "gpu": logger.warning("Backend `{}` requires a GPU device. Reset device to 'gpu'.".format(backend)) self.device = "gpu" ep = PROVIDERS[backend] if ep not in ort.get_available_providers(): - logger.warning("Specified provider '{}' is not in available provider names. "\ - "Fallback to available providers: '{}'".format(ep, ", ".join(ort.get_available_providers()))) + logger.warning( + "Specified provider '{}' is not in available provider names. " + "Fallback to available providers: '{}'".format(ep, ", ".join(ort.get_available_providers())) + ) def _reset_calib_iter(self, data_loader, cfg_calib_sampling_size, cfg_calib_iter): """Check and reset calibration iterations according to calib_sampleing_size and dataloader batch_size.""" @@ -348,40 +362,42 @@ def _reset_calib_iter(self, data_loader, cfg_calib_sampling_size, cfg_calib_iter if cfg_calib_sampling_size % (batch_size - i) == 0: calib_batch_size = batch_size - i if i != 0: # pragma: no cover - logger.warning("Reset `calibration.dataloader.batch_size` field " - "to {}".format(calib_batch_size) + - " to make sure the sampling_size is " - "divisible exactly by batch size") + logger.warning( + "Reset `calibration.dataloader.batch_size` field " + "to {}".format(calib_batch_size) + " to make sure the sampling_size is " + "divisible exactly by batch size" + ) break tmp_iterations = int(math.ceil(cfg_calib_sampling_size / calib_batch_size)) data_loader.batch(calib_batch_size) calib_iterations = tmp_iterations except Exception as e: # pragma: no cover - if 'Got invalid dimensions for input' in str(e): - logger.warning("Please set sampling_size to a multiple of {}".format( - str(e).partition('Expected: ')[2].partition('\n')[0])) + if "Got invalid dimensions for input" in str(e): + logger.warning( + "Please set sampling_size to a multiple of {}".format( + str(e).partition("Expected: ")[2].partition("\n")[0] + ) + ) exit(0) - logger.warning( - "Fail to forward with batch size={}, set to {} now.". - format(batch_size, 1)) + logger.warning("Fail to forward with batch size={}, set to {} now.".format(batch_size, 1)) data_loader.batch(1) calib_iterations = cfg_calib_sampling_size else: # pragma: no cover - if hasattr(data_loader, 'batch_size') and \ - cfg_calib_sampling_size % data_loader.batch_size != 0: + if hasattr(data_loader, "batch_size") and cfg_calib_sampling_size % data_loader.batch_size != 0: logger.warning( - "Please note that calibration sampling size {} " \ - "isn't divisible exactly by batch size {}. " \ - "So the real sampling size is {}.". - format(cfg_calib_sampling_size, data_loader.batch_size, - data_loader.batch_size * cfg_calib_iter)) + "Please note that calibration sampling size {} " + "isn't divisible exactly by batch size {}. " + "So the real sampling size is {}.".format( + cfg_calib_sampling_size, data_loader.batch_size, data_loader.batch_size * cfg_calib_iter + ) + ) calib_iterations = cfg_calib_iter return calib_iterations def _generate_qconfig(self, model, tune_cfg, quantize_params): tune_cfg = copy.deepcopy(tune_cfg) for node in model.graph.node: - if (node.name, node.op_type) not in tune_cfg['op']: + if (node.name, node.op_type) not in tune_cfg["op"]: continue scale_info = {} if quantize_params: @@ -391,17 +407,16 @@ def _generate_qconfig(self, model, tune_cfg, quantize_params): for output_name in node.output: if output_name in quantize_params: scale_info[output_name] = quantize_params[output_name] - tune_cfg['op'][(node.name, node.op_type)]['scale_info'] = scale_info + tune_cfg["op"][(node.name, node.op_type)]["scale_info"] = scale_info fwk_info = {} - fwk_info['approach'] = "post_training_static_quant" if self.static else \ - "post_training_dynamic_quant" - fwk_info['format'] = self.format - fwk_info['backend'] = ONNXRT_BACKENDS[self.backend] - fwk_info['workspace_path'] = self.work_space - fwk_info['recipes'] = self.recipes - fwk_info['domain'] = self.domain - fwk_info['device'] = self.device - tune_cfg['framework_specific_info'] = fwk_info + fwk_info["approach"] = "post_training_static_quant" if self.static else "post_training_dynamic_quant" + fwk_info["format"] = self.format + fwk_info["backend"] = ONNXRT_BACKENDS[self.backend] + fwk_info["workspace_path"] = self.work_space + fwk_info["recipes"] = self.recipes + fwk_info["domain"] = self.domain + fwk_info["device"] = self.device + tune_cfg["framework_specific_info"] = fwk_info return tune_cfg @dump_elapsed_time("Pass recover model") @@ -418,26 +433,29 @@ def recover(self, model, q_config): self._pre_optimize(model) model = self.pre_optimized_model ort_version = Version(ort.__version__) - if ort_version < ONNXRT152_VERSION: # pragma: no cover + if ort_version < ONNXRT152_VERSION: # pragma: no cover logger.warning("Quantize input needs onnxruntime 1.5.2 or newer.") return model - if model.model.opset_import[0].version < 11: # pragma: no cover + if model.model.opset_import[0].version < 11: # pragma: no cover logger.warning("Quantize input needs model opset 11 or newer.") from neural_compressor.adaptor.ox_utils.util import QuantizationMode + if self.format in ["qlinearops"]: format = QuantizationMode.QLinearOps elif self.format == "qdq": - assert ort_version >= ONNXRT170_VERSION, 'QDQ mode needs onnxruntime1.7.0 or newer' + assert ort_version >= ONNXRT170_VERSION, "QDQ mode needs onnxruntime1.7.0 or newer" format = self.format else: format = QuantizationMode.IntegerOps - from neural_compressor.adaptor.ox_utils.quantizer import Quantizer from neural_compressor import options + from neural_compressor.adaptor.ox_utils.quantizer import Quantizer + self.quantizable_ops = self._query_quantizable_ops(model.model) quantize_params, tune_cfg = self._parse_qconfig(q_config) quantize_config = self._cfg_to_quantize_config(tune_cfg) - quantizer = Quantizer(model.model, + quantizer = Quantizer( + model.model, quantize_config, format, self.static, @@ -445,16 +463,17 @@ def recover(self, model, q_config): self.quantizable_op_types, self.query_handler.get_fallback_list(), self.reduce_range, - options.onnxrt.qdq_setting.AddQDQPairToWeight if \ - not options.onnxrt.qdq_setting.AddQDQPairToWeight else \ - self.recipes.get('add_qdq_pair_to_weight', False), - options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin if \ - options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin is not None else \ - self.recipes.get('optypes_to_exclude_output_quant', []), - options.onnxrt.qdq_setting.DedicatedQDQPair if \ - not options.onnxrt.qdq_setting.DedicatedQDQPair else \ - self.recipes.get('dedicated_qdq_pair', False)) - + options.onnxrt.qdq_setting.AddQDQPairToWeight + if not options.onnxrt.qdq_setting.AddQDQPairToWeight + else self.recipes.get("add_qdq_pair_to_weight", False), + options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin + if options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin is not None + else self.recipes.get("optypes_to_exclude_output_quant", []), + options.onnxrt.qdq_setting.DedicatedQDQPair + if not options.onnxrt.qdq_setting.DedicatedQDQPair + else self.recipes.get("dedicated_qdq_pair", False), + ) + quantizer.quantize_model() model.model = quantizer.model.model model.topological_sort() @@ -464,17 +483,17 @@ def _parse_qconfig(self, q_config): quantize_params = {} tune_cfg = {} for k, v in q_config.items(): - if k == 'op': - tune_cfg['op'] = {} + if k == "op": + tune_cfg["op"] = {} for op_name_type, op_info in v.items(): node_dict = {} for info_name, info_content in op_info.items(): - if info_name != 'scale_info': + if info_name != "scale_info": node_dict[info_name] = info_content else: for tensor_name, param in info_content.items(): quantize_params[tensor_name] = param - tune_cfg['op'][op_name_type] = node_dict + tune_cfg["op"][op_name_type] = node_dict else: tune_cfg[k] = v if len(quantize_params) == 0: @@ -484,21 +503,21 @@ def _parse_qconfig(self, q_config): def _dump_model_op_stats(self, model): fp32_op_list = [] for precision in self.query_handler.get_precisions(): - if precision != 'fp32': + if precision != "fp32": fp32_op_list += self.query_handler.get_op_types_by_precision(precision=precision) qdq_ops = ["QuantizeLinear", "DequantizeLinear", "DynamicQuantizeLinear"] res = {} for op_type in fp32_op_list: - res[op_type] = {'INT8':0, 'BF16': 0, 'FP16': 0, 'FP32':0} + res[op_type] = {"INT8": 0, "BF16": 0, "FP16": 0, "FP32": 0} for op_type in qdq_ops: - res[op_type] = {'INT8':0, 'BF16': 0, 'FP16': 0, 'FP32':0} + res[op_type] = {"INT8": 0, "BF16": 0, "FP16": 0, "FP32": 0} for node in model.model.graph.node: - if node.name.endswith('_quant'): - if node.op_type.startswith('QLinear'): - origin_op_type = node.op_type.split('QLinear')[-1] + if node.name.endswith("_quant"): + if node.op_type.startswith("QLinear"): + origin_op_type = node.op_type.split("QLinear")[-1] else: - origin_op_type = node.op_type.split('Integer')[0] + origin_op_type = node.op_type.split("Integer")[0] if origin_op_type in ["QAttention", "QGemm"]: origin_op_type = origin_op_type[1:] @@ -506,91 +525,107 @@ def _dump_model_op_stats(self, model): origin_op_type = "LSTM" elif origin_op_type == "QEmbedLayerNormalization": origin_op_type = "EmbedLayerNormalization" - res[origin_op_type]['INT8'] += 1 + res[origin_op_type]["INT8"] += 1 elif node.op_type in qdq_ops: - res[node.op_type]['INT8'] += 1 + res[node.op_type]["INT8"] += 1 elif node.op_type in fp32_op_list and node.name in self.quantize_config: if self.quantize_config[node.name] not in self.query_handler.get_fallback_list(): - res[node.op_type]['FP32'] += 1 + res[node.op_type]["FP32"] += 1 else: res[node.op_type][self.quantize_config[node.name].upper()] += 1 elif node.op_type in res: - res[node.op_type]['FP32'] += 1 - - field_names=["Op Type", "Total", "INT8", "BF16", "FP16", "FP32"] - output_data = [[ - op_type, sum(res[op_type].values()), - res[op_type]['INT8'], res[op_type]['BF16'], - res[op_type]['FP16'], res[op_type]['FP32']] - for op_type in res.keys()] - - Statistics(output_data, - header='Mixed Precision Statistics', - field_names=field_names).print_stat() + res[node.op_type]["FP32"] += 1 + + field_names = ["Op Type", "Total", "INT8", "BF16", "FP16", "FP32"] + output_data = [ + [ + op_type, + sum(res[op_type].values()), + res[op_type]["INT8"], + res[op_type]["BF16"], + res[op_type]["FP16"], + res[op_type]["FP32"], + ] + for op_type in res.keys() + ] + + Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() self.optype_statistics = field_names, output_data def _get_quantize_params(self, model, data_loader, quantize_config, iterations): from neural_compressor.adaptor.ox_utils.calibration import ONNXRTAugment from neural_compressor.model.onnx_model import ONNXModel + if not isinstance(model, ONNXModel): model = ONNXModel(model) - black_nodes = [node for node in quantize_config if quantize_config[node]=='fp32'] - white_nodes = [node for node in quantize_config if quantize_config[node]!='fp32'] - - augment = ONNXRTAugment(model, \ - data_loader, self.quantizable_op_types, \ - black_nodes=black_nodes, white_nodes=white_nodes, \ - iterations=list(range(0, iterations)), \ - backend=self.backend, reduce_range=self.reduce_range) + black_nodes = [node for node in quantize_config if quantize_config[node] == "fp32"] + white_nodes = [node for node in quantize_config if quantize_config[node] != "fp32"] + + augment = ONNXRTAugment( + model, + data_loader, + self.quantizable_op_types, + black_nodes=black_nodes, + white_nodes=white_nodes, + iterations=list(range(0, iterations)), + backend=self.backend, + reduce_range=self.reduce_range, + ) self.min_max = augment.dump_minmax(quantize_config) quantize_params = augment.dump_calibration(quantize_config, min_max=self.min_max) return quantize_params - def inspect_tensor(self, model, dataloader, op_list=[], - iteration_list=[], - inspect_type='activation', - save_to_disk=False, - save_path=None, - quantization_cfg=None): - '''The function is used by tune strategy class for dumping tensor info. - ''' + def inspect_tensor( + self, + model, + dataloader, + op_list=[], + iteration_list=[], + inspect_type="activation", + save_to_disk=False, + save_path=None, + quantization_cfg=None, + ): + """The function is used by tune strategy class for dumping tensor info.""" from neural_compressor.adaptor.ox_utils.calibration import ONNXRTAugment from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.utils.utility import dump_data_to_local + if not isinstance(model, ONNXModel): model = ONNXModel(model) if len(op_list) > 0 and isinstance(op_list, KeysView): op_list = [item[0] for item in op_list] - augment = ONNXRTAugment(model, dataloader, [], \ - iterations=iteration_list, - white_nodes=op_list, - backend=self.backend) - tensors = augment.dump_tensor(activation=(inspect_type!='weight'), - weight=(inspect_type!='activation'), - format=self.format) + augment = ONNXRTAugment( + model, dataloader, [], iterations=iteration_list, white_nodes=op_list, backend=self.backend + ) + tensors = augment.dump_tensor( + activation=(inspect_type != "weight"), weight=(inspect_type != "activation"), format=self.format + ) if save_to_disk: if not save_path: save_path = self.work_space - dump_data_to_local(tensors, save_path, 'inspect_result.pkl') + dump_data_to_local(tensors, save_path, "inspect_result.pkl") return tensors def set_tensor(self, model, tensor_dict): from onnx import numpy_helper + + from neural_compressor.adaptor.ox_utils.util import quantize_data_per_channel, quantize_data_with_scale_zero from neural_compressor.model.onnx_model import ONNXModel - from neural_compressor.adaptor.ox_utils.util import quantize_data_with_scale_zero - from neural_compressor.adaptor.ox_utils.util import quantize_data_per_channel + if not isinstance(model, ONNXModel): model = ONNXModel(model) - assert "QuantizeLinear" in [node.op_type for node in model.model.graph.node], \ - 'adaptor.set_tensor only accept int8 model' + assert "QuantizeLinear" in [ + node.op_type for node in model.model.graph.node + ], "adaptor.set_tensor only accept int8 model" input_name_to_nodes = model.input_name_to_nodes for tensor_name, tensor_value in tensor_dict.items(): - if not tensor_name.endswith('_quantized'): - tensor_name += '_quantized' + if not tensor_name.endswith("_quantized"): + tensor_name += "_quantized" not_filter = False scale_tensor, zo_tensor = model.get_scale_zero(tensor_name) if scale_tensor is None or zo_tensor is None: @@ -598,41 +633,39 @@ def set_tensor(self, model, tensor_dict): else: scale_value = numpy_helper.to_array(scale_tensor) zo_value = numpy_helper.to_array(zo_tensor) - assert len(input_name_to_nodes[tensor_name]) == 1, \ - 'quantized filter weight should be input of only one node' - node = input_name_to_nodes[tensor_name][0] #TBD only for conv bias - node_name = node.name.replace('_quant', '') + assert ( + len(input_name_to_nodes[tensor_name]) == 1 + ), "quantized filter weight should be input of only one node" + node = input_name_to_nodes[tensor_name][0] # TBD only for conv bias + node_name = node.name.replace("_quant", "") assert node_name in self.quantize_config - q_type = self.quantize_config[node_name]['weight']['dtype'] + q_type = self.quantize_config[node_name]["weight"]["dtype"] if not_filter: new_tensor_value = self._requantize_bias(model, tensor_name, tensor_value) - elif self.quantize_config[node_name]['weight']['granularity'] == 'per_tensor': + elif self.quantize_config[node_name]["weight"]["granularity"] == "per_tensor": new_tensor_value = quantize_data_with_scale_zero( - tensor_value, - q_type, - self.quantize_config[node_name]['weight']['scheme'], - scale_value, - zo_value) - elif (Version(ort.__version__) >= ONNXRT112_VERSION and \ - model.model.opset_import[0].version < 13) and \ - len(scale_tensor.dims) in [1, 2]: - logger.warning("Skip setting per-channel quantized tensor {}, please " \ - "use onnxruntime < 1.12.0 or upgrade model opset version to 13 or " \ - "higher".format(tensor_name)) + tensor_value, q_type, self.quantize_config[node_name]["weight"]["scheme"], scale_value, zo_value + ) + elif (Version(ort.__version__) >= ONNXRT112_VERSION and model.model.opset_import[0].version < 13) and len( + scale_tensor.dims + ) in [1, 2]: + logger.warning( + "Skip setting per-channel quantized tensor {}, please " + "use onnxruntime < 1.12.0 or upgrade model opset version to 13 or " + "higher".format(tensor_name) + ) return model else: new_tensor_value = quantize_data_per_channel( - tensor_value, - q_type, - self.quantize_config[node_name]['weight']['scheme'], - scale_value, - zo_value) + tensor_value, q_type, self.quantize_config[node_name]["weight"]["scheme"], scale_value, zo_value + ) model.set_initializer(tensor_name, new_tensor_value) return model def _requantize_bias(self, model, bias_name, bias_data): - ''' helper function to requantize bias, borrowed from onnx_quantizer ''' + """Helper function to requantize bias, borrowed from onnx_quantizer.""" from onnx import numpy_helper + node = model.input_name_to_nodes[bias_name][0] input_scale_name = node.input[1] input_scale = numpy_helper.to_array(model.get_initializer(input_scale_name)) @@ -662,17 +695,17 @@ def _detect_domain(self, model): if len(obj) > 0: is_nlp = True break - + # 2. according to input - # typically, NLP models have multiple inputs, + # typically, NLP models have multiple inputs, # and the dimension of each input is usually 2 (batch_size, max_seq_len) if not model.is_large_model: sess = ort.InferenceSession(model.model.SerializeToString(), providers=ort.get_available_providers()) - elif model.model_path is not None: # pragma: no cover + elif model.model_path is not None: # pragma: no cover sess = ort.InferenceSession(model.model_path, providers=ort.get_available_providers()) - else: # pragma: no cover + else: # pragma: no cover assert False, "Please use model path instead of onnx model object to quantize." - input_shape_lens = [len(input.shape) for input in sess.get_inputs()] + input_shape_lens = [len(input.shape) for input in sess.get_inputs()] if len(input_shape_lens) > 1 and all(shape_len == 2 for shape_len in input_shape_lens): is_nlp = True @@ -683,12 +716,14 @@ def _detect_domain(self, model): # 4. according to LSTM/Attention optype op_types = [node.op_type for node in model.model.graph.node] - if "LSTM" in op_types or 'Attention' in op_types: + if "LSTM" in op_types or "Attention" in op_types: is_nlp = True - logger.warning("The model is automatically detected as {} model. " + logger.warning( + "The model is automatically detected as {} model. " "You can use 'domain' argument in 'PostTrainingQuantConfig' " - "to overwrite it".format("an NLP" if is_nlp else "a non-NLP")) + "to overwrite it".format("an NLP" if is_nlp else "a non-NLP") + ) return is_nlp def _pre_optimize(self, model, level=1): @@ -698,57 +733,59 @@ def _pre_optimize(self, model, level=1): logger.info("Pre-optimization already done, return it directly.") return self.pre_optimized_model from neural_compressor import options - from neural_compressor.adaptor.ox_utils.util import \ - remove_init_from_model_input, split_shared_bias + from neural_compressor.adaptor.ox_utils.util import remove_init_from_model_input, split_shared_bias + remove_init_from_model_input(model) sess_options = ort.SessionOptions() optimization_levels = { - 'DISABLE_ALL': ort.GraphOptimizationLevel.ORT_DISABLE_ALL, - 'ENABLE_BASIC': ort.GraphOptimizationLevel.ORT_ENABLE_BASIC, - 'ENABLE_EXTENDED': ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED, - 'ENABLE_ALL': ort.GraphOptimizationLevel.ORT_ENABLE_ALL} + "DISABLE_ALL": ort.GraphOptimizationLevel.ORT_DISABLE_ALL, + "ENABLE_BASIC": ort.GraphOptimizationLevel.ORT_ENABLE_BASIC, + "ENABLE_EXTENDED": ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED, + "ENABLE_ALL": ort.GraphOptimizationLevel.ORT_ENABLE_ALL, + } if not isinstance(self.query_handler.get_graph_optimization(), list): level = self.query_handler.get_graph_optimization() elif options.onnxrt.graph_optimization.level is not None: level = options.onnxrt.graph_optimization.level - elif self.recipes.get('graph_optimization_level', None) is not None: - level = self.recipes['graph_optimization_level'] + elif self.recipes.get("graph_optimization_level", None) is not None: + level = self.recipes["graph_optimization_level"] else: if self.domain == "auto" and self._detect_domain(model): - self.domain = 'nlp' - level = 'ENABLE_EXTENDED' if self.domain == 'nlp' else 'ENABLE_BASIC' - logger.warning("Graph optimization level is automatically set to {}. " - "You can use 'recipe' argument in 'PostTrainingQuantConfig'" - "to overwrite it".format(level)) + self.domain = "nlp" + level = "ENABLE_EXTENDED" if self.domain == "nlp" else "ENABLE_BASIC" + logger.warning( + "Graph optimization level is automatically set to {}. " + "You can use 'recipe' argument in 'PostTrainingQuantConfig'" + "to overwrite it".format(level) + ) sess_options.graph_optimization_level = optimization_levels[level] - sess_options.optimized_model_filepath = os.path.join(self.work_space, \ - "Optimized_model.onnx") - if sys.version_info < (3,11) and find_spec('onnxruntime_extensions'): # pragma: no cover + sess_options.optimized_model_filepath = os.path.join(self.work_space, "Optimized_model.onnx") + if sys.version_info < (3, 11) and find_spec("onnxruntime_extensions"): # pragma: no cover from onnxruntime_extensions import get_library_path + sess_options.register_custom_ops_library(get_library_path()) if not model.is_large_model: - ort.InferenceSession(model.model.SerializeToString(), - sess_options, - providers=['CPUExecutionProvider']) - elif model.model_path is not None: # pragma: no cover - ort.InferenceSession(model.model_path, - sess_options, - providers=['CPUExecutionProvider']) - else: # pragma: no cover - logger.warning('Please use model path instead of onnx model object to quantize') + ort.InferenceSession(model.model.SerializeToString(), sess_options, providers=["CPUExecutionProvider"]) + elif model.model_path is not None: # pragma: no cover + ort.InferenceSession(model.model_path, sess_options, providers=["CPUExecutionProvider"]) + else: # pragma: no cover + logger.warning("Please use model path instead of onnx model object to quantize") tmp_model = onnx.load(sess_options.optimized_model_filepath, load_external_data=False) - if model.is_large_model: # pragma: no cover + if model.is_large_model: # pragma: no cover from onnx.external_data_helper import load_external_data_for_model + load_external_data_for_model(tmp_model, os.path.split(model.model_path)[0]) model.model_path = sess_options.optimized_model_filepath - model.model = self._replace_gemm_with_matmul(tmp_model).model if \ - options.onnxrt.graph_optimization.gemm2matmul and self.recipes.get('gemm_to_matmul', True) else \ - tmp_model + model.model = ( + self._replace_gemm_with_matmul(tmp_model).model + if options.onnxrt.graph_optimization.gemm2matmul and self.recipes.get("gemm_to_matmul", True) + else tmp_model + ) model = self._rename_node(model) model = self._revert_fusedconv(model) - if self.backend == 'TensorrtExecutionProvider': + if self.backend == "TensorrtExecutionProvider": model = self._revert_conv_add_fusion(model) model = split_shared_bias(model) model.topological_sort() @@ -756,11 +793,13 @@ def _pre_optimize(self, model, level=1): def _revert_conv_add_fusion(self, model): from onnx import numpy_helper + from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg + add_nodes = [] remove_nodes = [] for node in model.model.graph.node: - if node.op_type == 'Conv' and len(node.input) == 3: + if node.op_type == "Conv" and len(node.input) == 3: bias_tensor = model.get_initializer(node.input[2]) bias_array = numpy_helper.to_array(bias_tensor).reshape((-1, 1, 1)) model.remove_initializer(bias_tensor) @@ -769,16 +808,8 @@ def _revert_conv_add_fusion(self, model): activation_params = None for attr in node.attribute: kwargs.update(attribute_to_kwarg(attr)) - conv = onnx.helper.make_node( - 'Conv', - node.input[0:2], - [node.name + '_revert'], - node.name, **kwargs) - add = onnx.helper.make_node( - 'Add', - [conv.output[0], node.input[2]], - node.output, - node.name + '_add') + conv = onnx.helper.make_node("Conv", node.input[0:2], [node.name + "_revert"], node.name, **kwargs) + add = onnx.helper.make_node("Add", [conv.output[0], node.input[2]], node.output, node.name + "_add") add_nodes.extend([conv, add]) model.remove_nodes(remove_nodes) @@ -787,29 +818,31 @@ def _revert_conv_add_fusion(self, model): return model def _revert_fusedconv(self, model): - from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg from onnx import onnx_pb as onnx_proto + + from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg + new_nodes = [] remove_nodes = [] for node in model.model.graph.node: - if node.op_type == 'FusedConv': + if node.op_type == "FusedConv": kwargs = {} activation_params = None for attr in node.attribute: - if attr.name == 'activation': - activation_type = attr.s.decode('utf-8') - elif attr.name == 'activation_params': + if attr.name == "activation": + activation_type = attr.s.decode("utf-8") + elif attr.name == "activation_params": continue else: kwargs.update(attribute_to_kwarg(attr)) - if activation_type in ['Relu', 'Clip']: + if activation_type in ["Relu", "Clip"]: continue - conv = onnx.helper.make_node( - 'Conv', node.input, [node.name], node.name.split('fused ')[-1], **kwargs) + conv = onnx.helper.make_node("Conv", node.input, [node.name], node.name.split("fused ")[-1], **kwargs) activation_input = conv.output - activation = onnx.helper.make_node(activation_type, - conv.output, node.output, '_'.join((conv.name, activation_type))) + activation = onnx.helper.make_node( + activation_type, conv.output, node.output, "_".join((conv.name, activation_type)) + ) new_nodes.extend([conv, activation]) remove_nodes.append(node) model.model.graph.node.extend(new_nodes) @@ -822,45 +855,51 @@ def _rename_node(self, model_wrapper): model = model_wrapper.model node_names = [i.name for i in model.graph.node] if len(set(node_names)) < len(node_names): - logger.warning("This model has nodes with the same name, please check" \ - "renamed_model.onnx in workspace_path (default is nc_workspace)" \ - "for newly generated node name") + logger.warning( + "This model has nodes with the same name, please check" + "renamed_model.onnx in workspace_path (default is nc_workspace)" + "for newly generated node name" + ) for idx, node in enumerate(model.graph.node): if node_names.count(node.name) > 1: - node.name = node.op_type + '_nc_rename_' + str(idx) + node.name = node.op_type + "_nc_rename_" + str(idx) if model_wrapper.is_large_model: - onnx.save(model, - os.path.join(self.work_space, "renamed_model.onnx"), - save_as_external_data=True, - all_tensors_to_one_file=True, - location="weights.pb", - convert_attribute=False) + onnx.save( + model, + os.path.join(self.work_space, "renamed_model.onnx"), + save_as_external_data=True, + all_tensors_to_one_file=True, + location="weights.pb", + convert_attribute=False, + ) else: - onnx.save(model, os.path.join(self.work_space, "renamed_model.onnx")) + onnx.save(model, os.path.join(self.work_space, "renamed_model.onnx")) return model_wrapper @staticmethod def _replace_gemm_with_matmul(model): new_nodes = [] from onnx import numpy_helper + from neural_compressor.model.onnx_model import ONNXModel + if not isinstance(model, ONNXModel): model = ONNXModel(model) for node in model.nodes(): - if node.op_type == 'Gemm': + if node.op_type == "Gemm": alpha = 1.0 beta = 1.0 transA = 0 transB = 0 for attr in node.attribute: - if attr.name == 'alpha': + if attr.name == "alpha": alpha = onnx.helper.get_attribute_value(attr) - elif attr.name == 'beta': + elif attr.name == "beta": beta = onnx.helper.get_attribute_value(attr) - elif attr.name == 'transA': + elif attr.name == "transA": transA = onnx.helper.get_attribute_value(attr) - elif attr.name == 'transB': + elif attr.name == "transB": transB = onnx.helper.get_attribute_value(attr) if alpha == 1.0 and beta == 1.0 and transA == 0: inputB = node.input[1] @@ -874,7 +913,7 @@ def _replace_gemm_with_matmul(model): model.remove_initializer(B) model.add_initializer(B_trans) - #TBD this is for onnx model zoo, which are all in old IR version + # TBD this is for onnx model zoo, which are all in old IR version if model.model.ir_version < 4: for input in model.model.graph.input: if input.name == B_trans.name: @@ -882,24 +921,27 @@ def _replace_gemm_with_matmul(model): dim.dim_value = B_array.T.shape[i] else: - inputB += '_Transposed' - transpose_node = onnx.helper.make_node('Transpose', - inputs=[node.input[1]], - outputs=[inputB], - name=node.name+'_Transpose') + inputB += "_Transposed" + transpose_node = onnx.helper.make_node( + "Transpose", inputs=[node.input[1]], outputs=[inputB], name=node.name + "_Transpose" + ) new_nodes.append(transpose_node) - matmul_node = onnx.helper.make_node('MatMul', - inputs=[node.input[0], inputB], - outputs=[node.output[0] + ('_MatMul' if len(node.input)>2 else '')], - name=node.name + '_MatMul') + matmul_node = onnx.helper.make_node( + "MatMul", + inputs=[node.input[0], inputB], + outputs=[node.output[0] + ("_MatMul" if len(node.input) > 2 else "")], + name=node.name + "_MatMul", + ) new_nodes.append(matmul_node) if len(node.input) > 2: - add_node = onnx.helper.make_node('Add', - inputs=[node.output[0] + '_MatMul', node.input[2]], + add_node = onnx.helper.make_node( + "Add", + inputs=[node.output[0] + "_MatMul", node.input[2]], outputs=node.output, - name=node.name + '_Add') + name=node.name + "_Add", + ) new_nodes.append(add_node) # unsupported @@ -910,7 +952,7 @@ def _replace_gemm_with_matmul(model): else: new_nodes.append(node) - model.graph().ClearField('node') + model.graph().ClearField("node") model.graph().node.extend(new_nodes) return model @@ -928,19 +970,27 @@ def query_fw_capability(self, model): # optype_wise and op_wise capability self._pre_optimize(model) recipes_ops = {} - recipes_ops['first_conv_or_matmul_quantization'] = [] - recipes_ops['last_conv_or_matmul_quantization'] = [] - recipes_ops['pre_post_process_quantization'] = [] - exclude_first_quantizable_op = True if 'first_conv_or_matmul_quantization' in \ - self.recipes and not self.recipes['first_conv_or_matmul_quantization'] \ + recipes_ops["first_conv_or_matmul_quantization"] = [] + recipes_ops["last_conv_or_matmul_quantization"] = [] + recipes_ops["pre_post_process_quantization"] = [] + exclude_first_quantizable_op = ( + True + if "first_conv_or_matmul_quantization" in self.recipes + and not self.recipes["first_conv_or_matmul_quantization"] else False - exclude_last_quantizable_op = True if 'last_conv_or_matmul_quantization' in \ - self.recipes and not self.recipes['last_conv_or_matmul_quantization'] \ + ) + exclude_last_quantizable_op = ( + True + if "last_conv_or_matmul_quantization" in self.recipes + and not self.recipes["last_conv_or_matmul_quantization"] else False - exclude_pre_post_process = True if 'pre_post_process_quantization' in \ - self.recipes and not self.recipes['pre_post_process_quantization'] \ + ) + exclude_pre_post_process = ( + True + if "pre_post_process_quantization" in self.recipes and not self.recipes["pre_post_process_quantization"] else False - + ) + quantizable_optype = set([i.op_type for i in self.pre_optimized_model.nodes()]) optype_wise = OrderedDict() op_wise = OrderedDict() @@ -950,46 +1000,50 @@ def query_fw_capability(self, model): precisions = query.get_precisions() for precision in precisions: - if precision == 'fp16' and not self.use_fp16: + if precision == "fp16" and not self.use_fp16: continue - if precision == 'bf16' and \ - (not self.use_bf16 or (not CpuInfo().bf16 and os.getenv('FORCE_BF16') != '1')): + if precision == "bf16" and ( + not self.use_bf16 or (not CpuInfo().bf16 and os.getenv("FORCE_BF16") != "1") + ): continue - elif precision == 'weight_only_integer': + elif precision == "weight_only_integer": continue # get supported optype for target precision - optypes = query.get_op_types_by_precision(precision) if \ - query.get_op_types_by_precision(precision) != ['*'] else \ - optype_wise.keys() - - configs = query.get_quantization_capability()[precision] if \ - precision in query.get_quantization_capability() else \ - {'default': {'weight': {'dtype': precision}, 'activation': {'dtype': precision}}} - - if self.backend == 'TensorrtExecutionProvider' and \ - precision not in query.get_fallback_list(): - optypes.append('Add') - + optypes = ( + query.get_op_types_by_precision(precision) + if query.get_op_types_by_precision(precision) != ["*"] + else optype_wise.keys() + ) + + configs = ( + query.get_quantization_capability()[precision] + if precision in query.get_quantization_capability() + else {"default": {"weight": {"dtype": precision}, "activation": {"dtype": precision}}} + ) + + if self.backend == "TensorrtExecutionProvider" and precision not in query.get_fallback_list(): + optypes.append("Add") + for op in optypes: if op not in quantizable_optype: continue if op not in configs: - if 'default' in configs: - op_capability = copy.deepcopy(configs['default']) + if "default" in configs: + op_capability = copy.deepcopy(configs["default"]) else: continue else: op_capability = copy.deepcopy(configs[op]) - if precision in ['int8', 'uint8']: + if precision in ["int8", "uint8"]: if self.static: - op_capability['activation']['quant_mode'] = 'static' + op_capability["activation"]["quant_mode"] = "static" elif self.dynamic: - op_capability['activation']['quant_mode'] = 'dynamic' - elif query == self.query_handler: # query static capability for auto - op_capability['activation']['quant_mode'] = 'static' - elif query == self.query_handler_ext: # query dynamic capability for auto - op_capability['activation']['quant_mode'] = 'dynamic' + op_capability["activation"]["quant_mode"] = "dynamic" + elif query == self.query_handler: # query static capability for auto + op_capability["activation"]["quant_mode"] = "static" + elif query == self.query_handler_ext: # query dynamic capability for auto + op_capability["activation"]["quant_mode"] = "dynamic" if op not in optype_wise.keys(): optype_wise[op] = [op_capability] @@ -1004,7 +1058,7 @@ def query_fw_capability(self, model): all_conv_matmul = [] attention_matmul = [] for _, node in enumerate(self.pre_optimized_model.nodes()): - if node.op_type in ['Conv', 'MatMul', 'Attention']: + if node.op_type in ["Conv", "MatMul", "Attention"]: # get first Conv or MatMul node if len(first_quantizable_node) == 0: first_quantizable_node.append(node) @@ -1015,62 +1069,64 @@ def query_fw_capability(self, model): last_quantizable_node.append(node) all_conv_matmul.append(node) - if node.op_type != 'Conv': + if node.op_type != "Conv": attention_matmul.append(node) - + if len(first_quantizable_node) != 0: - recipes_ops['first_conv_or_matmul_quantization'] = [(first_quantizable_node[0].name, - first_quantizable_node[0].op_type)] + recipes_ops["first_conv_or_matmul_quantization"] = [ + (first_quantizable_node[0].name, first_quantizable_node[0].op_type) + ] if len(last_quantizable_node) != 0: - recipes_ops['last_conv_or_matmul_quantization'] = [(last_quantizable_node[0].name, - last_quantizable_node[0].op_type)] - - + recipes_ops["last_conv_or_matmul_quantization"] = [ + (last_quantizable_node[0].name, last_quantizable_node[0].op_type) + ] + ffn_matmul = [] attention_matmul_optype = [node.op_type for node in attention_matmul] # find matmul ops in feed forward network (FFN) structure whitch mainly in transfomers based NLP models - if len(attention_matmul) > 0 and 'Attention' in attention_matmul_optype: + if len(attention_matmul) > 0 and "Attention" in attention_matmul_optype: # model is optimized and Attention is fused, # index of Attention is used as split to find FFN MatMul - first_attention_index = attention_matmul_optype.index('Attention') + first_attention_index = attention_matmul_optype.index("Attention") attention_matmul_optype = attention_matmul_optype[first_attention_index:] attention_matmul = attention_matmul[first_attention_index:] - attention_index = list(np.where(np.array(attention_matmul_optype) == 'Attention')[0]) + attention_index = list(np.where(np.array(attention_matmul_optype) == "Attention")[0]) block_len = attention_index[1] - attention_index[0] if len(attention_index) > 2 else 4 for idx in range(len(attention_index)): if idx != len(attention_index) - 1: index = attention_index[idx + 1] if index - 2 >= 0 and index - 1 >= 0: - ffn_matmul.append([attention_matmul[index - 2], - attention_matmul[index - 1]]) + ffn_matmul.append([attention_matmul[index - 2], attention_matmul[index - 1]]) else: index = attention_index[idx] - if index + block_len - 2 < len(attention_matmul) and \ - index + block_len - 1 < len(attention_matmul): - ffn_matmul.append([attention_matmul[index + block_len - 2], - attention_matmul[index + block_len - 1]]) + if index + block_len - 2 < len(attention_matmul) and index + block_len - 1 < len(attention_matmul): + ffn_matmul.append( + [attention_matmul[index + block_len - 2], attention_matmul[index + block_len - 1]] + ) else: - # model is not optimized or Attention isn't fused, + # model is not optimized or Attention isn't fused, # query MatMul, key MatMul and value MatMul are used as split to find FFN MatMul qkv = self.pre_optimized_model.find_qkv_in_attention(find_all=True) if len(qkv) != 0: attention_starts = [nodes[0] for nodes in qkv] - attention_index = [np.where(np.array([n.name for n in attention_matmul]) \ - == attention_start)[0].tolist()[0] \ - for attention_start in attention_starts] + attention_index = [ + np.where(np.array([n.name for n in attention_matmul]) == attention_start)[0].tolist()[0] + for attention_start in attention_starts + ] block_len = attention_index[1] - attention_index[0] if len(attention_index) > 2 else 4 for idx in range(len(attention_index)): if idx != len(attention_index) - 1: index = attention_index[idx + 1] if index - 2 >= 0 and index - 1 >= 0: - ffn_matmul.append([attention_matmul[index - 2], - attention_matmul[index - 1]]) + ffn_matmul.append([attention_matmul[index - 2], attention_matmul[index - 1]]) else: index = attention_index[idx] - if index + block_len - 2 < len(attention_matmul) and \ - index + block_len - 1 < len(attention_matmul): - ffn_matmul.append([attention_matmul[index + block_len - 2], - attention_matmul[index + block_len - 1]]) + if index + block_len - 2 < len(attention_matmul) and index + block_len - 1 < len( + attention_matmul + ): + ffn_matmul.append( + [attention_matmul[index + block_len - 2], attention_matmul[index + block_len - 1]] + ) block_wise = [] for block in reversed(ffn_matmul): @@ -1082,31 +1138,34 @@ def query_fw_capability(self, model): for _, node in enumerate(self.pre_optimized_model.nodes()): # for TRT EP, only insert Q/DQ to inputs of Add nodes followed by ReduceMean - if node.op_type == 'Add' and self.backend == 'TensorrtExecutionProvider': + if node.op_type == "Add" and self.backend == "TensorrtExecutionProvider": children = self.pre_optimized_model.get_children(node) - if 'ReduceMean' not in [i.op_type for i in children]: - op_wise.update({(node.name, node.op_type): - [{'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}}]}) + if "ReduceMean" not in [i.op_type for i in children]: + op_wise.update( + {(node.name, node.op_type): [{"weight": {"dtype": "fp32"}, "activation": {"dtype": "fp32"}}]} + ) continue if node.op_type in optype_wise: - if (exclude_first_quantizable_op and node in first_quantizable_node) \ - or (exclude_last_quantizable_op and node in last_quantizable_node): + if (exclude_first_quantizable_op and node in first_quantizable_node) or ( + exclude_last_quantizable_op and node in last_quantizable_node + ): tmp_cfg = copy.deepcopy(optype_wise[node.op_type]) - tmp_cfg = list(filter(lambda x:'quant_mode' not in x['activation'], tmp_cfg)) + tmp_cfg = list(filter(lambda x: "quant_mode" not in x["activation"], tmp_cfg)) op_wise.update({(node.name, node.op_type): tmp_cfg}) continue - op_wise.update( - {(node.name, node.op_type): copy.deepcopy(optype_wise[node.op_type])}) + op_wise.update({(node.name, node.op_type): copy.deepcopy(optype_wise[node.op_type])}) # only when first and last quantizable nodes are found and they are not the same, # fallback pre/postprocess ops - if len(first_quantizable_node) != 0 and \ - len(last_quantizable_node) != 0 and \ - first_quantizable_node[0].name != last_quantizable_node[0].name: + if ( + len(first_quantizable_node) != 0 + and len(last_quantizable_node) != 0 + and first_quantizable_node[0].name != last_quantizable_node[0].name + ): # get backbone nodes from collections import deque - + # get nodes between first quantizable node and last quantizable node backbone_queue = deque(last_quantizable_node) backbone_nodes = self.pre_optimized_model.get_nodes_chain(backbone_queue, first_quantizable_node) @@ -1116,30 +1175,31 @@ def query_fw_capability(self, model): for conv_or_matmul in all_conv_matmul: if conv_or_matmul.name not in backbone_nodes: backbone_queue_extra.append(conv_or_matmul) - backbone_nodes = self.pre_optimized_model.get_nodes_chain(backbone_queue_extra, - first_quantizable_node, backbone_nodes) + backbone_nodes = self.pre_optimized_model.get_nodes_chain( + backbone_queue_extra, first_quantizable_node, backbone_nodes + ) backbone_nodes += [i.name for i in first_quantizable_node] - + for _, node in enumerate(self.pre_optimized_model.nodes()): if node.name not in backbone_nodes and node.op_type in optype_wise: - recipes_ops['pre_post_process_quantization'].append((node.name, node.op_type)) + recipes_ops["pre_post_process_quantization"].append((node.name, node.op_type)) if exclude_pre_post_process: for _, node in enumerate(self.pre_optimized_model.nodes()): if node.op_type in optype_wise: # nodes not in backbone are not quantized if node.name not in backbone_nodes: tmp_cfg = copy.deepcopy(optype_wise[node.op_type]) - tmp_cfg = list(filter(lambda x:'quant_mode' not in x['activation'], tmp_cfg)) + tmp_cfg = list(filter(lambda x: "quant_mode" not in x["activation"], tmp_cfg)) op_wise.update({(node.name, node.op_type): tmp_cfg}) continue if (node.name, node.op_type) in op_wise: op_wise.update( - {(node.name, node.op_type): copy.deepcopy(op_wise[(node.name, node.op_type)])}) - else: # pragma: no cover - op_wise.update( - {(node.name, node.op_type): copy.deepcopy(optype_wise[node.op_type])}) + {(node.name, node.op_type): copy.deepcopy(op_wise[(node.name, node.op_type)])} + ) + else: # pragma: no cover + op_wise.update({(node.name, node.op_type): copy.deepcopy(optype_wise[node.op_type])}) - return {'optypewise': optype_wise, 'opwise': op_wise, 'recipes_ops': recipes_ops, 'block_wise': block_wise} + return {"optypewise": optype_wise, "opwise": op_wise, "recipes_ops": recipes_ops, "block_wise": block_wise} def _optypewise_filter_for_qdq(self, optype_wise): """Filter optypes that don't support per_channel in QDQ format. @@ -1150,54 +1210,52 @@ def _optypewise_filter_for_qdq(self, optype_wise): dict: filtered optype and quantization config """ supported_perchannel_optypes = { - '1.6.0': ['Conv', 'Gather'], - '1.7.0': ['Conv', 'Gather'], - '1.8.0': ['Conv', 'Gather'], - '1.9.0': ['Conv', 'Gather'], - '1.10.0': ['Conv', 'Gather', 'MatMul'], - '1.11.0': ['Conv', 'Gather', 'MatMul', 'Gemm'], - '1.12.0': ['Conv', 'Gather', 'MatMul', 'Gemm']} + "1.6.0": ["Conv", "Gather"], + "1.7.0": ["Conv", "Gather"], + "1.8.0": ["Conv", "Gather"], + "1.9.0": ["Conv", "Gather"], + "1.10.0": ["Conv", "Gather", "MatMul"], + "1.11.0": ["Conv", "Gather", "MatMul", "Gemm"], + "1.12.0": ["Conv", "Gather", "MatMul", "Gemm"], + } specific_cfg_version = self.query_handler.get_specific_cfg_version() if Version(specific_cfg_version) > ONNXRT112_VERSION: - specific_cfg_version = '1.12.0' + specific_cfg_version = "1.12.0" for optype, caps in optype_wise.items(): if optype not in supported_perchannel_optypes[specific_cfg_version]: for cap in caps: - if 'mode' in cap and \ - cap['mode'] == 'QDQ' and \ - 'per_channel' in cap['weight']['granularity']: - cap['weight']['granularity'].remove('per_channel') + if "mode" in cap and cap["mode"] == "QDQ" and "per_channel" in cap["weight"]["granularity"]: + cap["weight"]["granularity"].remove("per_channel") return optype_wise def _cfg_to_quantize_config(self, tune_cfg): quantize_config = {} - quantize_config['calib_iteration'] = tune_cfg['calib_iteration'] - granularity = 'per_tensor' - algorithm = 'minmax' + quantize_config["calib_iteration"] = tune_cfg["calib_iteration"] + granularity = "per_tensor" + algorithm = "minmax" from onnx import onnx_pb as onnx_proto + for _, op in enumerate(self.quantizable_ops): - if (op.name, op.op_type) not in tune_cfg['op']: + if (op.name, op.op_type) not in tune_cfg["op"]: continue - if tune_cfg['op'][(op.name, op.op_type)]['activation']['dtype'] in \ - self.query_handler.get_fallback_list(): - quantize_config[op.name] = \ - tune_cfg['op'][(op.name, op.op_type)]['activation']['dtype'] + if tune_cfg["op"][(op.name, op.op_type)]["activation"]["dtype"] in self.query_handler.get_fallback_list(): + quantize_config[op.name] = tune_cfg["op"][(op.name, op.op_type)]["activation"]["dtype"] else: - node_config = copy.deepcopy(tune_cfg['op'][(op.name, op.op_type)]) - for tensor, config in tune_cfg['op'][(op.name, op.op_type)].items(): - if 'granularity' not in config: - node_config[tensor]['granularity'] = granularity - if 'algorithm' not in config: - node_config[tensor]['algorithm'] = algorithm - if config['dtype'] == "int8": - node_config[tensor]['dtype'] = onnx_proto.TensorProto.INT8 - if 'scheme' not in config: - node_config[tensor]['scheme'] = 'sym' + node_config = copy.deepcopy(tune_cfg["op"][(op.name, op.op_type)]) + for tensor, config in tune_cfg["op"][(op.name, op.op_type)].items(): + if "granularity" not in config: + node_config[tensor]["granularity"] = granularity + if "algorithm" not in config: + node_config[tensor]["algorithm"] = algorithm + if config["dtype"] == "int8": + node_config[tensor]["dtype"] = onnx_proto.TensorProto.INT8 + if "scheme" not in config: + node_config[tensor]["scheme"] = "sym" else: - node_config[tensor]['dtype'] = onnx_proto.TensorProto.UINT8 - if 'scheme' not in config: - node_config[tensor]['scheme'] = 'asym' + node_config[tensor]["dtype"] = onnx_proto.TensorProto.UINT8 + if "scheme" not in config: + node_config[tensor]["scheme"] = "asym" quantize_config[op.name] = node_config return quantize_config @@ -1210,13 +1268,21 @@ def _query_quantizable_ops(self, model): return self.quantizable_ops def _query_quantizable_op_types(self): - quantizable_op_types = self.query_handler.get_op_types_by_precision(precision='int8') + quantizable_op_types = self.query_handler.get_op_types_by_precision(precision="int8") return quantizable_op_types - def evaluate(self, input_graph, dataloader, postprocess=None, - metrics=None, measurer=None, iteration=-1, - tensorboard=False, fp32_baseline=False): - """The function is for evaluation if no given eval func + def evaluate( + self, + input_graph, + dataloader, + postprocess=None, + metrics=None, + measurer=None, + iteration=-1, + tensorboard=False, + fp32_baseline=False, + ): + """The function is for evaluation if no given eval func. Args: input_graph : onnx model for evaluation @@ -1231,38 +1297,42 @@ def evaluate(self, input_graph, dataloader, postprocess=None, Returns: (float) evaluation results. acc, f1 e.g. """ - if input_graph.is_large_model: # pragma: no cover - onnx.save_model(input_graph.model, - self.work_space + 'eval.onnx', - save_as_external_data=True, - all_tensors_to_one_file=True, - location="weights.pb", - convert_attribute=False) + if input_graph.is_large_model: # pragma: no cover + onnx.save_model( + input_graph.model, + self.work_space + "eval.onnx", + save_as_external_data=True, + all_tensors_to_one_file=True, + location="weights.pb", + convert_attribute=False, + ) sess_options = ort.SessionOptions() - if self.backend == 'TensorrtExecutionProvider': + if self.backend == "TensorrtExecutionProvider": from neural_compressor.adaptor.ox_utils.util import trt_env_setup + trt_env_setup(input_graph.model) - sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL + sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL if measurer: # https://github.com/microsoft/onnxruntime/issues/7347 - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + cores_per_instance = int(os.environ.get("CORES_PER_INSTANCE")) assert cores_per_instance > 0, "benchmark cores_per_instance should greater than 0" sess_options.intra_op_num_threads = cores_per_instance - if sys.version_info < (3,11) and find_spec('onnxruntime_extensions'): # pragma: no cover + if sys.version_info < (3, 11) and find_spec("onnxruntime_extensions"): # pragma: no cover from onnxruntime_extensions import get_library_path + sess_options.register_custom_ops_library(get_library_path()) - session = ort.InferenceSession(self.work_space + 'eval.onnx', - sess_options, - providers=[self.backend]) if input_graph.is_large_model else \ - ort.InferenceSession(input_graph.model.SerializeToString(), - sess_options, - providers=[self.backend]) + session = ( + ort.InferenceSession(self.work_space + "eval.onnx", sess_options, providers=[self.backend]) + if input_graph.is_large_model + else ort.InferenceSession(input_graph.model.SerializeToString(), sess_options, providers=[self.backend]) + ) results = [] if metrics: for metric in metrics: metric.reset() - self.fp32_preds_as_label = any([hasattr(metric, "compare_label") and \ - not metric.compare_label for metric in metrics]) + self.fp32_preds_as_label = any( + [hasattr(metric, "compare_label") and not metric.compare_label for metric in metrics] + ) len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] @@ -1280,8 +1350,7 @@ def eval_func(dataloader): else: ort_inputs.update({inputs_names[0]: to_numpy(inputs)}) else: - assert len_inputs == len(inputs), \ - 'number of input tensors must align with graph inputs' + assert len_inputs == len(inputs), "number of input tensors must align with graph inputs" if isinstance(inputs, dict): for name, input in inputs.items(): @@ -1297,15 +1366,15 @@ def eval_func(dataloader): predictions = session.run(None, ort_inputs) if self.fp32_preds_as_label: - self.fp32_results.append(predictions) if fp32_baseline else \ - results.append(predictions) + self.fp32_results.append(predictions) if fp32_baseline else results.append(predictions) if postprocess is not None: predictions, labels = postprocess((predictions, labels)) if metrics: for metric in metrics: - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): + if not hasattr(metric, "compare_label") or ( + hasattr(metric, "compare_label") and metric.compare_label + ): metric.update(predictions, labels) if idx + 1 == iteration: break @@ -1314,9 +1383,7 @@ def eval_func(dataloader): try: eval_func(dataloader) except Exception: # pragma: no cover - logger.warning( - "Fail to forward with batch size={}, set to {} now.". - format(dataloader.batch_size, 1)) + logger.warning("Fail to forward with batch size={}, set to {} now.".format(dataloader.batch_size, 1)) dataloader.batch(1) eval_func(dataloader) else: # pragma: no cover @@ -1324,6 +1391,7 @@ def eval_func(dataloader): if self.fp32_preds_as_label: from neural_compressor.adaptor.ox_utils.util import collate_preds + if fp32_baseline: results = collate_preds(self.fp32_results) reference = results @@ -1338,20 +1406,41 @@ def eval_func(dataloader): return acc if not isinstance(acc, list) or len(acc) > 1 else acc[0] def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None): - from neural_compressor.utils.utility import dump_data_to_local from neural_compressor.adaptor.ox_utils.util import find_by_name + from neural_compressor.utils.utility import dump_data_to_local + if self.format == "qlinearops": - supported_optype = ['Conv', 'MatMul', 'Concat', 'Attention', 'FusedConv', - 'Add', 'Mul', 'LeakyRelu', 'Sigmoid', 'GlobalAveragePool', 'AveragePool'] + supported_optype = [ + "Conv", + "MatMul", + "Concat", + "Attention", + "FusedConv", + "Add", + "Mul", + "LeakyRelu", + "Sigmoid", + "GlobalAveragePool", + "AveragePool", + ] elif self.format == "qdq": - supported_optype = ['Conv', 'MatMul', 'Concat', 'Attention', 'FusedConv', - 'LeakyRelu', 'Sigmoid', 'GlobalAveragePool', 'AveragePool'] + supported_optype = [ + "Conv", + "MatMul", + "Concat", + "Attention", + "FusedConv", + "LeakyRelu", + "Sigmoid", + "GlobalAveragePool", + "AveragePool", + ] else: - supported_optype = ['Conv', 'MatMul', 'Attention', 'LSTM'] + supported_optype = ["Conv", "MatMul", "Attention", "LSTM"] inspect_node_list = [] int8_node_names = [i.name for i in int8_model.nodes()] for node in fp32_model.nodes(): - if node.op_type in supported_optype and node.name + '_quant' in int8_node_names: + if node.op_type in supported_optype and node.name + "_quant" in int8_node_names: inspect_node_list.append(node.name) filtered_params = {} @@ -1359,15 +1448,16 @@ def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None for node_name in inspect_node_list: node = find_by_name(node_name, fp32_model.nodes()) filtered_params[node_name] = { - 'min': np.array(self.min_max[node.output[0]][0], dtype=np.float32), - 'max': np.array(self.min_max[node.output[0]][1], dtype=np.float32)} + "min": np.array(self.min_max[node.output[0]][0], dtype=np.float32), + "max": np.array(self.min_max[node.output[0]][1], dtype=np.float32), + } if save_path: - dump_data_to_local(filtered_params, save_path, 'activation_min_max.pkl') - dump_data_to_local(tune_cfg, save_path, 'cfg.pkl') + dump_data_to_local(filtered_params, save_path, "activation_min_max.pkl") + dump_data_to_local(tune_cfg, save_path, "cfg.pkl") return inspect_node_list, tune_cfg def save(self, model, path): - """ save model + """Save model. Args: model (ModelProto): model to save @@ -1384,18 +1474,19 @@ def get_output_op_names(self, qmodel): logger.debug(f"output op names: {output_op_names}") return output_op_names - def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, - confidence_batches, fallback=True, requantize_cfgs=None): + def calculate_op_sensitivity( + self, model, dataloader, tune_cfg, output_op_names, confidence_batches, fallback=True, requantize_cfgs=None + ): """Compute the op sensitivity. - - The sensitivity metric is the mse between the output of the last quantized op of + + The sensitivity metric is the mse between the output of the last quantized op of the quantized model and the output of its corresponding op in the fp32 model. - + 1. Backup the tune cfg 2. Fallback each int8 op and compute its mse if use fallback (with 'fallback == True'), or re-quantize each fp32 op(fallen back in the previous stage) and compute its MSE if not. 3. Sorted op name list according to its MSE - + Args: fp32_model: The fp32 model. dataloader: the dataloader with full dataset. @@ -1408,22 +1499,27 @@ def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, """ from copy import deepcopy - fp32_op_cfg = {'activation': {'dtype': 'fp32', 'quant_mode': 'fp32'}, - 'weight': {'dtype': 'fp32'}} + fp32_op_cfg = {"activation": {"dtype": "fp32", "quant_mode": "fp32"}, "weight": {"dtype": "fp32"}} if fallback: - ops_list = [op for op, config in tune_cfg['op'].items() - if config['activation']['quant_mode'] in ('static', 'dynamic')] - replace_cfgs = {op : fp32_op_cfg for op in tune_cfg['op']} + ops_list = [ + op + for op, config in tune_cfg["op"].items() + if config["activation"]["quant_mode"] in ("static", "dynamic") + ] + replace_cfgs = {op: fp32_op_cfg for op in tune_cfg["op"]} else: - ops_list = [op for op, config in tune_cfg['op'].items() - if config['activation']['quant_mode'] == 'fp32' and op in requantize_cfgs] + ops_list = [ + op + for op, config in tune_cfg["op"].items() + if config["activation"]["quant_mode"] == "fp32" and op in requantize_cfgs + ] replace_cfgs = requantize_cfgs # Step2. compute mse mse_result = self._get_mse_order( - model, deepcopy(tune_cfg), replace_cfgs, ops_list, dataloader, - output_op_names, confidence_batches) + model, deepcopy(tune_cfg), replace_cfgs, ops_list, dataloader, output_op_names, confidence_batches + ) # Step3. sort mse_order = [op for op, _ in sorted(mse_result.items(), key=lambda i: i[1])] @@ -1432,24 +1528,27 @@ def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, logger.debug(f"{op}: {mse_result[op]}") return mse_order - def _get_mse_order(self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader, - output_op_names, confidence_batches): + def _get_mse_order( + self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader, output_op_names, confidence_batches + ): """Compute MSE.""" - op_cfg = tune_cfg['op'] + op_cfg = tune_cfg["op"] mse_result = {} - + fp32_output = self._inference_model_on_batches( - fp32_model, tune_cfg, dataloader, output_op_names, confidence_batches) + fp32_model, tune_cfg, dataloader, output_op_names, confidence_batches + ) for op in ops_lst: # backup and set replace tuning config - backup_cfg = op_cfg[op] + backup_cfg = op_cfg[op] op_cfg[op] = replace_cfgs[op] # quantize and inference the model q_model = self.quantize(tune_cfg, fp32_model, dataloader) q_output = self._inference_model_on_batches( - q_model, tune_cfg, dataloader, output_op_names, confidence_batches) + q_model, tune_cfg, dataloader, output_op_names, confidence_batches + ) mse_result[op] = self._calculate_mse(fp32_output, q_output) @@ -1465,16 +1564,16 @@ def _calculate_mse(self, fp32_output, q_output): result.append(np.square(i - j).mean()) return np.array(result).mean() - def _inference_model_on_batches(self, model, tune_cfg, dataloader, - output_op_name, iterations): + def _inference_model_on_batches(self, model, tune_cfg, dataloader, output_op_name, iterations): """Inference model on batches.""" ort_inputs = {} predictions = [] - session = ort.InferenceSession(self.work_space + 'eval.onnx', - providers=[self.backend]) if model.is_large_model else \ - ort.InferenceSession(model.model.SerializeToString(), - providers=[self.backend]) + session = ( + ort.InferenceSession(self.work_space + "eval.onnx", providers=[self.backend]) + if model.is_large_model + else ort.InferenceSession(model.model.SerializeToString(), providers=[self.backend]) + ) inputs_names = [i.name for i in session.get_inputs()] len_inputs = len(session.get_inputs()) for idx, (inputs, _) in enumerate(dataloader): @@ -1488,8 +1587,7 @@ def _inference_model_on_batches(self, model, tune_cfg, dataloader, else: ort_inputs.update({inputs_names[0]: to_numpy(inputs)}) else: - assert len_inputs == len(inputs), \ - 'number of input tensors must align with graph inputs' + assert len_inputs == len(inputs), "number of input tensors must align with graph inputs" if isinstance(inputs, dict): for name, input in inputs.items(): @@ -1500,6 +1598,7 @@ def _inference_model_on_batches(self, model, tune_cfg, dataloader, predictions.extend(session.run(None, ort_inputs)) return predictions + @adaptor_registry class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor): """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. @@ -1528,9 +1627,8 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): """ assert q_func is None, "quantization aware training has not been supported on ONNXRUNTIME" for precision in self.query_handler.get_precisions(): - if precision == 'weight_only_integer': - self.quantizable_op_types += \ - self.query_handler.get_op_types_by_precision(precision=precision) + if precision == "weight_only_integer": + self.quantizable_op_types += self.query_handler.get_op_types_by_precision(precision=precision) self.quantizable_ops = self._query_quantizable_ops(model.model) quant_config = self._cfg_to_quantize_config(tune_cfg) @@ -1538,37 +1636,34 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): if "GPTQ" in algos: from neural_compressor.adaptor.ox_utils.weight_only import gptq_quantize - percdamp = self.recipes.get('gptq_args', {}).get('percdamp', 0.01) - blocksize = self.recipes.get('gptq_args', {}).get('blocksize', 128) - actorder = self.recipes.get('gptq_args', {}).get('actorder', False) - mse = self.recipes.get('gptq_args', {}).get('mse', False) - perchannel = self.recipes.get('gptq_args', {}).get('perchannel', True) - calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) - model = gptq_quantize(model, - quant_config, - data_loader, - calib_sampling_size, - percdamp=percdamp, - blocksize=blocksize, - actorder=actorder, - mse=mse, - perchannel=perchannel) + percdamp = self.recipes.get("gptq_args", {}).get("percdamp", 0.01) + blocksize = self.recipes.get("gptq_args", {}).get("blocksize", 128) + actorder = self.recipes.get("gptq_args", {}).get("actorder", False) + mse = self.recipes.get("gptq_args", {}).get("mse", False) + perchannel = self.recipes.get("gptq_args", {}).get("perchannel", True) + calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) + model = gptq_quantize( + model, + quant_config, + data_loader, + calib_sampling_size, + percdamp=percdamp, + blocksize=blocksize, + actorder=actorder, + mse=mse, + perchannel=perchannel, + ) if "AWQ" in algos: from neural_compressor.adaptor.ox_utils.weight_only import awq_quantize - auto_scale = self.recipes.get('awq_args', {}).get('auto_scale', True) - mse_range = self.recipes.get('awq_args', {}).get('mse_range', True) - n_blocks = self.recipes.get('awq_args', {}).get('n_blocks', 5) - calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) - model = awq_quantize(model, - quant_config, - data_loader, - calib_sampling_size, - auto_scale, - mse_range, - n_blocks) + auto_scale = self.recipes.get("awq_args", {}).get("auto_scale", True) + mse_range = self.recipes.get("awq_args", {}).get("mse_range", True) + n_blocks = self.recipes.get("awq_args", {}).get("n_blocks", 5) + calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) + model = awq_quantize(model, quant_config, data_loader, calib_sampling_size, auto_scale, mse_range, n_blocks) elif "RTN" in algos: from neural_compressor.adaptor.ox_utils.weight_only import rtn_quantize + model = rtn_quantize(model, quant_config) model.q_config = copy.deepcopy(quant_config) self._dump_model_op_stats(model, tune_cfg) @@ -1579,28 +1674,28 @@ def _dump_model_op_stats(self, model, tune_cfg): res = {} # collect all dtype info and build empty results with existing op_type dtype_set = set() - for op, config in tune_cfg['op'].items(): + for op, config in tune_cfg["op"].items(): op_type = op[1] - if not config['weight']['dtype'] == 'fp32': - num_bits = config['weight']['bits'] - group_size = config['weight']['group_size'] + if not config["weight"]["dtype"] == "fp32": + num_bits = config["weight"]["bits"] + group_size = config["weight"]["group_size"] dtype_str = "A32W{}G{}".format(num_bits, group_size) dtype_set.add(dtype_str) - dtype_set.add('FP32') + dtype_set.add("FP32") dtype_list = list(dtype_set) dtype_list.sort() - for op, config in tune_cfg['op'].items(): + for op, config in tune_cfg["op"].items(): op_type = op[1] if op_type not in res.keys(): res[op_type] = {dtype: 0 for dtype in dtype_list} # fill in results with op_type and dtype - for op, config in tune_cfg['op'].items(): - if config['weight']['dtype'] == 'fp32': - res[op_type]['FP32'] += 1 + for op, config in tune_cfg["op"].items(): + if config["weight"]["dtype"] == "fp32": + res[op_type]["FP32"] += 1 else: - num_bits = config['weight']['bits'] - group_size = config['weight']['group_size'] + num_bits = config["weight"]["bits"] + group_size = config["weight"]["group_size"] dtype_str = "A32W{}G{}".format(num_bits, group_size) res[op_type][dtype_str] += 1 @@ -1613,24 +1708,20 @@ def _dump_model_op_stats(self, model, tune_cfg): field_results.extend([res[op_type][dtype] for dtype in dtype_list]) output_data.append(field_results) - Statistics(output_data, - header='Mixed Precision Statistics', - field_names=field_names).print_stat() + Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() self.optype_statistics = field_names, output_data def _cfg_to_quantize_config(self, tune_cfg): quantize_config = {} - quantize_config['calib_iteration'] = tune_cfg['calib_iteration'] + quantize_config["calib_iteration"] = tune_cfg["calib_iteration"] for _, op in enumerate(self.quantizable_ops): - if (op.name, op.op_type) not in tune_cfg['op']: + if (op.name, op.op_type) not in tune_cfg["op"]: continue - if tune_cfg['op'][(op.name, op.op_type)]['weight']['dtype'] in \ - self.query_handler.get_fallback_list(): - quantize_config[op.name] = \ - tune_cfg['op'][(op.name, op.op_type)]['weight']['dtype'] + if tune_cfg["op"][(op.name, op.op_type)]["weight"]["dtype"] in self.query_handler.get_fallback_list(): + quantize_config[op.name] = tune_cfg["op"][(op.name, op.op_type)]["weight"]["dtype"] else: - quantize_config[op.name] = copy.deepcopy(tune_cfg['op'][(op.name, op.op_type)]) + quantize_config[op.name] = copy.deepcopy(tune_cfg["op"][(op.name, op.op_type)]) return quantize_config @@ -1656,43 +1747,47 @@ def query_fw_capability(self, model): precisions = query.get_precisions() for precision in precisions: - if precision != 'weight_only_integer': + if precision != "weight_only_integer": continue # get supported optype for target precision - optypes = query.get_op_types_by_precision(precision) if \ - query.get_op_types_by_precision(precision) != ['*'] else \ - optype_wise.keys() - - configs = query.get_quantization_capability()[precision] if \ - precision in query.get_quantization_capability() else \ - {'default': {'weight': {'dtype': precision}, 'activation': {'dtype': precision}}} + optypes = ( + query.get_op_types_by_precision(precision) + if query.get_op_types_by_precision(precision) != ["*"] + else optype_wise.keys() + ) + + configs = ( + query.get_quantization_capability()[precision] + if precision in query.get_quantization_capability() + else {"default": {"weight": {"dtype": precision}, "activation": {"dtype": precision}}} + ) for op in optypes: if op not in quantizable_optype: continue if op not in configs: - if 'default' in configs: - op_capability = copy.deepcopy(configs['default']) + if "default" in configs: + op_capability = copy.deepcopy(configs["default"]) else: continue else: op_capability = copy.deepcopy(configs[op]) - op_capability['activation']['quant_mode'] = 'weight_only' + op_capability["activation"]["quant_mode"] = "weight_only" if op not in optype_wise.keys(): optype_wise[op] = [op_capability] elif op_capability not in optype_wise[op]: optype_wise[op].append(op_capability) for node in self.pre_optimized_model.nodes(): - if node.op_type in ['MatMul', 'Attention'] and model.get_initializer(node.input[1]) is None: + if node.op_type in ["MatMul", "Attention"] and model.get_initializer(node.input[1]) is None: op_wise.update( - {(node.name, node.op_type): [{'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}}]}) + {(node.name, node.op_type): [{"weight": {"dtype": "fp32"}, "activation": {"dtype": "fp32"}}]} + ) continue if node.op_type in optype_wise: - op_wise.update( - {(node.name, node.op_type): copy.deepcopy(optype_wise[node.op_type])}) + op_wise.update({(node.name, node.op_type): copy.deepcopy(optype_wise[node.op_type])}) - return {'optypewise': optype_wise, 'opwise': op_wise, 'recipes_ops': {}, 'block_wise': []} + return {"optypewise": optype_wise, "opwise": op_wise, "recipes_ops": {}, "block_wise": []} @adaptor_registry @@ -1706,6 +1801,7 @@ class ONNXRT_QLinearOpsAdaptor(ONNXRUNTIMEAdaptor): def __init__(self, framework_specific_info): super().__init__(framework_specific_info) + @adaptor_registry class ONNXRT_IntegerOpsAdaptor(ONNXRUNTIMEAdaptor): """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. @@ -1717,6 +1813,7 @@ class ONNXRT_IntegerOpsAdaptor(ONNXRUNTIMEAdaptor): def __init__(self, framework_specific_info): super().__init__(framework_specific_info) + @adaptor_registry class ONNXRT_QDQAdaptor(ONNXRUNTIMEAdaptor): """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors. @@ -1728,12 +1825,12 @@ class ONNXRT_QDQAdaptor(ONNXRUNTIMEAdaptor): def __init__(self, framework_specific_info): super().__init__(framework_specific_info) -class ONNXRTQuery(QueryBackendCapability): +class ONNXRTQuery(QueryBackendCapability): def __init__(self, dynamic=False, static=False, format=None, local_config_file=None): super().__init__() self.version = ort.__version__ - self.config_version = '1.6.0' + self.config_version = "1.6.0" self.dynamic = dynamic self.static = static self.format = format @@ -1746,22 +1843,23 @@ def _one_shot_query(self): content = yaml.safe_load(f) try: self.cur_config = self._get_specified_version_cfg(content) - except Exception as e: # pragma: no cover + except Exception as e: # pragma: no cover logger.info("Fail to parse {} due to {}.".format(self.cfg, str(e))) self.cur_config = None - raise ValueError("Please check if the format of {} follows Neural Compressor yaml schema.". - format(self.cfg)) + raise ValueError( + "Please check if the format of {} follows Neural Compressor yaml schema.".format(self.cfg) + ) self._update_cfg_with_usr_definition() def _update_cfg_with_usr_definition(self): from neural_compressor.conf.pythonic_config import onnxruntime_config + if onnxruntime_config.graph_optimization_level is not None: - self.cur_config['graph_optimization']['level'] = \ - onnxruntime_config.graph_optimization_level + self.cur_config["graph_optimization"]["level"] = onnxruntime_config.graph_optimization_level if onnxruntime_config.precisions is not None: - self.cur_config['precisions']['names'] = ','.join(onnxruntime_config.precisions) + self.cur_config["precisions"]["names"] = ",".join(onnxruntime_config.precisions) - def _get_specified_version_cfg(self, data): # pragma: no cover + def _get_specified_version_cfg(self, data): # pragma: no cover """Get the configuration for the current runtime. If there's no matched configuration in the input yaml, we'll use the `default` field of yaml. @@ -1773,6 +1871,7 @@ def _get_specified_version_cfg(self, data): # pragma: no cover [dictionary]: the content for specific version. """ from functools import cmp_to_key + version_config = None def _compare(version1, version2): @@ -1785,15 +1884,16 @@ def _compare(version1, version2): extended_cfgs = [] for sub_data in data: - if 'default' in sub_data['version']['name']: - assert version_config == None, "Only one default config " \ - "is allowed in framework yaml file." + if "default" in sub_data["version"]["name"]: + assert version_config is None, "Only one default config " "is allowed in framework yaml file." version_config = sub_data - versions = sub_data['version']['name'] if \ - isinstance(sub_data['version']['name'], list) else \ - [sub_data['version']['name']] + versions = ( + sub_data["version"]["name"] + if isinstance(sub_data["version"]["name"], list) + else [sub_data["version"]["name"]] + ) for version in versions: - if version != 'default': + if version != "default": extended_cfgs.append((version, sub_data)) extended_cfgs = sorted(extended_cfgs, key=cmp_to_key(_compare), reverse=True) @@ -1805,68 +1905,74 @@ def _compare(version1, version2): # generate specified version config according to quantization approach and format config = {} - config['capabilities'] = {} + config["capabilities"] = {} for k, v in version_config.items(): - if k == 'version': - config['version'] = v - elif k == 'recipes': - config['graph_optimization'] = v['graph_optimization'] + if k == "version": + config["version"] = v + elif k == "recipes": + config["graph_optimization"] = v["graph_optimization"] else: - if self.static and 'static' in v: - config['capabilities'].update({k: {node_op: node_config - for node_op, node_config in v['static'].items() - if 'mode' in node_config and \ - self.format.split('ops')[0].lower() in \ - [mode.lower() for mode in node_config['mode']]}}) - elif self.dynamic and 'dynamic' in v: - config['capabilities'].update({k: v['dynamic']}) - elif k == 'weight_only_integer': - config['capabilities'].update({k: v}) - - # generate other config content including precisions and ops - precisions = list(version_config.keys() - {'version', 'recipes'}) - if 'fp32' not in precisions: - precisions.append('fp32') - config['precisions'] = {'names': ','.join(precisions)} + if self.static and "static" in v: + config["capabilities"].update( + { + k: { + node_op: node_config + for node_op, node_config in v["static"].items() + if "mode" in node_config + and self.format.split("ops")[0].lower() + in [mode.lower() for mode in node_config["mode"]] + } + } + ) + elif self.dynamic and "dynamic" in v: + config["capabilities"].update({k: v["dynamic"]}) + elif k == "weight_only_integer": + config["capabilities"].update({k: v}) + + # generate other config content including precisions and ops + precisions = list(version_config.keys() - {"version", "recipes"}) + if "fp32" not in precisions: + precisions.append("fp32") + config["precisions"] = {"names": ",".join(precisions)} op_types = {} for precision in precisions: - if precision in config['capabilities']: - op_types[precision] = [op_type for op_type in config['capabilities'][precision].keys()] + if precision in config["capabilities"]: + op_types[precision] = [op_type for op_type in config["capabilities"][precision].keys()] elif precision in version_config: op_types[precision] = version_config[precision] - for precision, precision_config in config['capabilities'].items(): + for precision, precision_config in config["capabilities"].items(): op_types[precision] = [op_type for op_type in precision_config.keys()] - if 'fp32' not in op_types: - op_types['fp32'] = ['*'] - config['ops'] = op_types + if "fp32" not in op_types: + op_types["fp32"] = ["*"] + config["ops"] = op_types return config - def get_version(self): # pragma: no cover + def get_version(self): # pragma: no cover """Get the current backend version infomation. Returns: [string]: version string. """ - return self.cur_config['version']['name'] + return self.cur_config["version"]["name"] - def get_precisions(self): # pragma: no cover + def get_precisions(self): # pragma: no cover """Get supported precisions for current backend. Returns: [string list]: the precisions' name. """ - return [i.strip() for i in self.cur_config['precisions']['names'].split(',')] + return [i.strip() for i in self.cur_config["precisions"]["names"].split(",")] - def get_op_types(self): # pragma: no cover + def get_op_types(self): # pragma: no cover """Get the supported op types by all precisions. Returns: [dictionary list]: A list composed of dictionary which key is precision and value is the op types. """ - return self.cur_config['ops'] + return self.cur_config["ops"] def get_quantization_capability(self): """Get the supported op types' quantization capability. @@ -1875,10 +1981,10 @@ def get_quantization_capability(self): [dictionary list]: A list composed of dictionary which key is precision and value is a dict that describes all op types' quantization capability. """ - return self.cur_config['capabilities'] + return self.cur_config["capabilities"] def get_op_types_by_precision(self, precision): - """Get op types per precision + """Get op types per precision. Args: precision (string): precision name @@ -1886,20 +1992,20 @@ def get_op_types_by_precision(self, precision): Returns: [string list]: A list composed of op type. """ - #assert precision in list(self.cur_config['ops'].keys()) - if precision in list(self.cur_config['ops'].keys()): - return self.cur_config['ops'][precision] + # assert precision in list(self.cur_config['ops'].keys()) + if precision in list(self.cur_config["ops"].keys()): + return self.cur_config["ops"][precision] else: return [] def get_graph_optimization(self): - """ Get onnxruntime graph optimization level""" - level = self.cur_config['graph_optimization']['level'] + """Get onnxruntime graph optimization level.""" + level = self.cur_config["graph_optimization"]["level"] return level def get_fallback_list(self): """Get fallback list.""" - return list(self.cur_config['ops'].keys() - self.cur_config['capabilities'].keys()) + return list(self.cur_config["ops"].keys() - self.cur_config["capabilities"].keys()) def get_specific_cfg_version(self): """Get version of the specific config.""" diff --git a/neural_compressor/adaptor/onnxrt.yaml b/neural_compressor/adaptor/onnxrt.yaml index 8e967d6d65c..36bbf3069d3 100644 --- a/neural_compressor/adaptor/onnxrt.yaml +++ b/neural_compressor/adaptor/onnxrt.yaml @@ -65,7 +65,7 @@ 'scheme': ['asym'], 'granularity': ['per_channel', 'per_tensor'], 'algorithm': ['minmax'] - }, + }, 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QDQ', 'QLinear'] }, @@ -75,7 +75,7 @@ 'scheme': ['sym'], 'granularity': ['per_tensor'], 'algorithm': ['minmax'] - }, + }, 'activation': *uint8_asym_pertensor, 'mode': ['QDQ', 'QLinear'] }, @@ -89,7 +89,7 @@ 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QLinear'] }, - 'Relu': *default_static_qlinear_qdq_minmax, + 'Relu': *default_static_qlinear_qdq_minmax, 'Clip': *default_static_qlinear_qdq_minmax, 'LeakyRelu': *default_static_qlinear_qdq_minmax, 'Sigmoid': *default_static_qlinear_qdq_minmax, @@ -156,7 +156,7 @@ 'Split': *default_static_qlinear_qdq_minmax, 'Add': *default_static_qlinear, }, - 'dynamic': *ref_1_6_dynamic + 'dynamic': *ref_1_6_dynamic } recipes: <<: *default_optimization @@ -417,7 +417,7 @@ 'Abs': *default_static_qlinear_qdq_minmax, 'Shrink': *default_static_qlinear_qdq_minmax, 'Sign': *default_static_qlinear_qdq_minmax, - 'Flatten': *default_static_qlinear_qdq_minmax, + 'Flatten': *default_static_qlinear_qdq_minmax, 'Expand': *default_static_qlinear_qdq_minmax, 'Slice': *default_static_qlinear_qdq_minmax, 'Mod': *default_static_qlinear_qdq_minmax, diff --git a/neural_compressor/adaptor/onnxrt_cuda.yaml b/neural_compressor/adaptor/onnxrt_cuda.yaml index 36ccc604be2..562fe410758 100644 --- a/neural_compressor/adaptor/onnxrt_cuda.yaml +++ b/neural_compressor/adaptor/onnxrt_cuda.yaml @@ -45,7 +45,7 @@ 'scheme': ['asym'], 'granularity': ['per_channel', 'per_tensor'], 'algorithm': ['minmax'] - }, + }, 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QDQ', 'QLinear'] }, @@ -55,7 +55,7 @@ 'scheme': ['sym'], 'granularity': ['per_tensor'], 'algorithm': ['minmax'] - }, + }, 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QDQ', 'QLinear'] }, @@ -69,7 +69,7 @@ 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QLinear'] }, - 'Relu': *default_static_qlinear_qdq, + 'Relu': *default_static_qlinear_qdq, 'Clip': *default_static_qlinear_qdq, 'LeakyRelu': *default_static_qlinear_qdq, 'Sigmoid': *default_static_qlinear_qdq, @@ -145,7 +145,7 @@ 'Split': *default_static_qlinear_qdq, 'Add': *default_static_qlinear, }, - 'dynamic': *ref_1_6_dynamic + 'dynamic': *ref_1_6_dynamic } fp16: *common_fp16 bf16: *common_bf16 @@ -410,7 +410,7 @@ 'Abs': *default_static_qlinear_qdq, 'Shrink': *default_static_qlinear_qdq, 'Sign': *default_static_qlinear_qdq, - 'Flatten': *default_static_qlinear_qdq, + 'Flatten': *default_static_qlinear_qdq, 'Expand': *default_static_qlinear_qdq, 'Slice': *default_static_qlinear_qdq, 'Mod': *default_static_qlinear_qdq, diff --git a/neural_compressor/adaptor/onnxrt_dml.yaml b/neural_compressor/adaptor/onnxrt_dml.yaml index 9040692d881..126e89572f4 100644 --- a/neural_compressor/adaptor/onnxrt_dml.yaml +++ b/neural_compressor/adaptor/onnxrt_dml.yaml @@ -44,7 +44,7 @@ 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QLinear'] }, - 'Relu': *default_static_qlinear_qdq, + 'Relu': *default_static_qlinear_qdq, 'Clip': *default_static_qlinear_qdq, 'MaxPool': *default_static_qlinear_qdq, 'Add': *default_static_qlinear, @@ -54,7 +54,7 @@ 'Softmax', 'Gemm', 'MatMul', 'Conv', 'Concat', 'Upsample', 'Pow', 'Sqrt', 'DepthToSpace', 'Clip', 'BatchNormalization', 'Transpose', 'Softmax', 'AveragePool', 'Squeeze', 'MaxPool', 'Relu', 'Concat'] - + recipes: &default_optimization graph_optimization: # from onnxruntime graph_optimization_level level: ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL'] diff --git a/neural_compressor/adaptor/onnxrt_dnnl.yaml b/neural_compressor/adaptor/onnxrt_dnnl.yaml index 2d2d718130c..1abc8ab1029 100644 --- a/neural_compressor/adaptor/onnxrt_dnnl.yaml +++ b/neural_compressor/adaptor/onnxrt_dnnl.yaml @@ -45,7 +45,7 @@ 'scheme': ['asym'], 'granularity': ['per_channel', 'per_tensor'], 'algorithm': ['minmax'] - }, + }, 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QDQ', 'QLinear'] }, @@ -55,7 +55,7 @@ 'scheme': ['sym'], 'granularity': ['per_tensor'], 'algorithm': ['minmax'] - }, + }, 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QDQ', 'QLinear'] }, @@ -69,7 +69,7 @@ 'activation': *uint8_asym_pertensor_minmax, 'mode': ['QLinear'] }, - 'Relu': *default_static_qlinear_qdq, + 'Relu': *default_static_qlinear_qdq, 'Clip': *default_static_qlinear_qdq, 'LeakyRelu': *default_static_qlinear_qdq, 'Sigmoid': *default_static_qlinear_qdq, @@ -135,7 +135,7 @@ 'Split': *default_static_qlinear_qdq, 'Add': *default_static_qlinear, }, - 'dynamic': *ref_1_6_dynamic + 'dynamic': *ref_1_6_dynamic } recipes: <<: *default_optimization diff --git a/neural_compressor/adaptor/onnxrt_trt.yaml b/neural_compressor/adaptor/onnxrt_trt.yaml index d63545c006e..1c64b14d6e2 100644 --- a/neural_compressor/adaptor/onnxrt_trt.yaml +++ b/neural_compressor/adaptor/onnxrt_trt.yaml @@ -138,4 +138,4 @@ name: 'default' int8: *ref_1_7 recipes: - <<: *default_optimization \ No newline at end of file + <<: *default_optimization diff --git a/neural_compressor/adaptor/ox_utils/operators/__init__.py b/neural_compressor/adaptor/ox_utils/operators/__init__.py index 90713a2162f..2c5d05e8c32 100644 --- a/neural_compressor/adaptor/ox_utils/operators/__init__.py +++ b/neural_compressor/adaptor/ox_utils/operators/__init__.py @@ -23,7 +23,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) __all__ = ["OPERATORS", "QOPERATORS"] diff --git a/neural_compressor/adaptor/ox_utils/operators/activation.py b/neural_compressor/adaptor/ox_utils/operators/activation.py index 59697f44810..81a1e687f18 100644 --- a/neural_compressor/adaptor/ox_utils/operators/activation.py +++ b/neural_compressor/adaptor/ox_utils/operators/activation.py @@ -17,9 +17,11 @@ """Activation operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain + @op_registry(op_types="LeakyRelu, Sigmoid") class ActivationOperator(Operator): """Activation operator.""" @@ -35,7 +37,7 @@ def quantize_check(self): if not data_found: return False return True - + def quantize(self): """Do quantizaion.""" node = self.node @@ -45,11 +47,10 @@ def quantize(self): def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) - + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) + children = self.quantizer.model.get_children(node) - if len(children) == 0 or not node.name.endswith('_quant'): + if len(children) == 0 or not node.name.endswith("_quant"): return False return True @@ -66,21 +67,22 @@ def convert(self, convert_format): qlinear_activation_output = child.output[0] kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain qlinear_activation_node = onnx.helper.make_node( - "QLinear" + node.op_type, inputs, - [qlinear_activation_output], node.name, **kwargs) + "QLinear" + node.op_type, inputs, [qlinear_activation_output], node.name, **kwargs + ) self.quantizer.new_nodes.append(qlinear_activation_node) self.quantizer.remove_nodes.extend([parent, child, node]) + @op_registry(op_types="Relu, Clip") class RemovableActivationOperator(Operator): """Removable activation operator.""" - + def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(RemovableActivationOperator, self).__init__(onnx_quantizer, onnx_node) @@ -91,7 +93,7 @@ def quantize_check(self): if node.input[0] not in self.quantizer.quantized_value_map: return False return True - + def quantize(self): """Do quantization.""" node = self.node @@ -101,9 +103,11 @@ def quantize(self): self.quantizer.model.replace_input_of_all_nodes(node.output[0], node.input[0]) self.quantizer.remove_nodes.append(node) + @qop_registry(op_types="QLinearLeakyRelu, QLinearSigmoid") class QActivationOperator(QOperator): """INT8 activation operator in QOperator format.""" + def __init__(self, onnx_node, children, initializers): """Initialization.""" super().__init__(onnx_node, children, initializers) @@ -115,31 +119,28 @@ def convert(self): inits = [] # input dq in_dq = onnx.helper.make_node( - 'DequantizeLinear', - node.input[:3], - [node.name + '_in_dequant'], - node.name + '_in_dequant') - inputs = [node.name + '_in_dequant'] + "DequantizeLinear", node.input[:3], [node.name + "_in_dequant"], node.name + "_in_dequant" + ) + inputs = [node.name + "_in_dequant"] add_nodes.append(in_dq) # output q out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out', node.input[3], node.input[4]], - node.output, - node.name + '_out_quant') - outputs = [node.name + '_out'] + "QuantizeLinear", [node.name + "_out", node.input[3], node.input[4]], node.output, node.name + "_out_quant" + ) + outputs = [node.name + "_out"] add_nodes.append(out_q) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) activation_node = onnx.helper.make_node( - node.op_type.split('QLinear')[-1], inputs, - outputs, node.name + '_convert', **kwargs) + node.op_type.split("QLinear")[-1], inputs, outputs, node.name + "_convert", **kwargs + ) add_nodes.append(activation_node) return True, add_nodes, inits + @op_registry(op_types="Softmax, BiasGelu, Elu, Exp, FastGelu, Gelu, Softplus, Tanh") class Float16ActivationOperator(Operator): """Float16 Activation operator.""" diff --git a/neural_compressor/adaptor/ox_utils/operators/argmax.py b/neural_compressor/adaptor/ox_utils/operators/argmax.py index 841b8bb967c..392d3bf83c6 100644 --- a/neural_compressor/adaptor/ox_utils/operators/argmax.py +++ b/neural_compressor/adaptor/ox_utils/operators/argmax.py @@ -16,7 +16,8 @@ # limitations under the License. """ArgMax operator.""" -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry + @op_registry(op_types="ArgMax") class ArgMaxOperator(Operator): @@ -29,21 +30,22 @@ def __init__(self, onnx_quantizer, onnx_node): def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) return True def convert(self, convert_format): """Convert to quantized format.""" node = self.node - origin_name = node.input[0].split('_argmax_node')[0] + origin_name = node.input[0].split("_argmax_node")[0] if origin_name in self.quantizer.quantized_value_map: - node.name = node.name + '_quant' + node.name = node.name + "_quant" + @qop_registry(op_types="ArgMax") class QArgMaxOperator(QOperator): """INT8 ArgMax operator.""" + def __init__(self, onnx_node, children, initializers): """Initialization.""" super().__init__(onnx_node, children, initializers) diff --git a/neural_compressor/adaptor/ox_utils/operators/attention.py b/neural_compressor/adaptor/ox_utils/operators/attention.py index 8886bf32f09..33065314f18 100644 --- a/neural_compressor/adaptor/ox_utils/operators/attention.py +++ b/neural_compressor/adaptor/ox_utils/operators/attention.py @@ -17,8 +17,10 @@ """Attention operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator -from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain, find_by_name + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry +from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, find_by_name, ms_domain + @op_registry(op_types="Attention") class AttentionOperator(Operator): @@ -37,10 +39,12 @@ def quantize(self): def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['dynamic', 'static'], \ - "convert format for {} should be in ['dynamic', 'static']".format(node.op_type) - - if not node.name.endswith('_quant'): + assert convert_format in [ + "dynamic", + "static", + ], "convert format for {} should be in ['dynamic', 'static']".format(node.op_type) + + if not node.name.endswith("_quant"): return False return True @@ -52,16 +56,16 @@ def convert(self, convert_format): scale = [] zp = [] for parent in parents[:2]: - if parent.op_type == 'DequantizeLinear': + if parent.op_type == "DequantizeLinear": quantized_name.append(parent.input[0]) scale.append(parent.input[1]) zp.append(parent.input[2]) self.quantizer.remove_nodes.append(parent) - elif parent.op_type == 'DynamicQuantizeLinear': + elif parent.op_type == "DynamicQuantizeLinear": quantized_name.append(parent.output[0]) scale.append(parent.output[1]) zp.append(parent.output[2]) - + inputs = [] inputs.extend(quantized_name) inputs.append(node.input[2]) @@ -72,15 +76,15 @@ def convert(self, convert_format): inputs.append(node.input[4]) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain - qattention_node = onnx.helper.make_node("QAttention", inputs, node.output, - node.name, **kwargs) + qattention_node = onnx.helper.make_node("QAttention", inputs, node.output, node.name, **kwargs) self.quantizer.new_nodes.append(qattention_node) self.quantizer.remove_nodes.append(node) + @qop_registry(op_types="QAttention") class QAttentionOperator(QOperator): """QAttention operator.""" @@ -99,31 +103,28 @@ def convert(self): return False, add_nodes, inits # input dq in_dq1 = onnx.helper.make_node( - 'DequantizeLinear', + "DequantizeLinear", [node.input[0], node.input[3], node.input[6]], - [node.name + '_in_dequant1'], - node.name + '_in_dequant1') - + [node.name + "_in_dequant1"], + node.name + "_in_dequant1", + ) + in_dq2 = onnx.helper.make_node( - 'DequantizeLinear', + "DequantizeLinear", [node.input[1], node.input[4], node.input[7]], - [node.name + '_in_dequant2'], - node.name + '_in_dequant2') - inputs = [node.name + '_in_dequant1', - node.name + '_in_dequant2', - node.input[2], - node.input[5]] - + [node.name + "_in_dequant2"], + node.name + "_in_dequant2", + ) + inputs = [node.name + "_in_dequant1", node.name + "_in_dequant2", node.input[2], node.input[5]] + add_nodes.extend([in_dq1, in_dq2]) outputs = node.output kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain - binary_node = onnx.helper.make_node( - 'Attention', inputs, - outputs, node.name + '_convert', **kwargs) + binary_node = onnx.helper.make_node("Attention", inputs, outputs, node.name + "_convert", **kwargs) add_nodes.append(binary_node) - return True, add_nodes, inits \ No newline at end of file + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/binary_op.py b/neural_compressor/adaptor/ox_utils/operators/binary_op.py index e14956d2562..33f51a0ebff 100644 --- a/neural_compressor/adaptor/ox_utils/operators/binary_op.py +++ b/neural_compressor/adaptor/ox_utils/operators/binary_op.py @@ -17,13 +17,15 @@ """Binary operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain + @op_registry(op_types="Add, Mul") class BinaryOperator(Operator): """Binary operator.""" - + def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(BinaryOperator, self).__init__(onnx_quantizer, onnx_node) @@ -34,7 +36,7 @@ def quantize_check(self): data_found, _, _, _, _ = self.quantizer._get_quantization_params(node.output[0]) if not data_found: return False - if self.quantizer.backend == 'TensorrtExecutionProvider': + if self.quantizer.backend == "TensorrtExecutionProvider": return True if not all([self.quantizer.is_valid_quantize_weight(i) for i in node.input]): return False @@ -44,18 +46,17 @@ def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(node, initializer_use_weight_qType=False) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(node) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) children = self.quantizer.model.get_children(node) - if len(children) == 0 or not node.name.endswith('_quant'): + if len(children) == 0 or not node.name.endswith("_quant"): return False return True @@ -68,7 +69,7 @@ def convert(self, convert_format): qlinear_binary_math_output = child.output[0] kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain @@ -77,21 +78,20 @@ def convert(self, convert_format): qlinear_binary_math_inputs.extend(parent.input) qlinear_binary_math_inputs.extend(child.input[1:]) - qlinear_binary_math_node = onnx.helper.make_node("QLinear" + node.op_type, - qlinear_binary_math_inputs, - [qlinear_binary_math_output], - node.name, - **kwargs) + qlinear_binary_math_node = onnx.helper.make_node( + "QLinear" + node.op_type, qlinear_binary_math_inputs, [qlinear_binary_math_output], node.name, **kwargs + ) self.quantizer.new_nodes += [qlinear_binary_math_node] self.quantizer.remove_nodes.extend(parents) self.quantizer.remove_nodes.append(child) self.quantizer.remove_nodes.append(node) + @op_registry(op_types="Mod") class BinaryDirect8BitOperator(Operator): """Binary operator.""" - + def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(BinaryDirect8BitOperator, self).__init__(onnx_quantizer, onnx_node) @@ -104,25 +104,24 @@ def quantize_check(self): return False if not all([self.quantizer.is_valid_quantize_weight(i) for i in node.input]): return False - + return True def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(node, initializer_use_weight_qType=False) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(node) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) children = self.quantizer.model.get_children(node) - if len(children) == 0 or not node.name.endswith('_quant'): + if len(children) == 0 or not node.name.endswith("_quant"): return False return True @@ -131,18 +130,19 @@ def convert(self, convert_format): node = self.node parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if any([i.op_type == 'DequantizeLinear' for i in parents]) and \ - any([i.op_type == 'QuantizeLinear' for i in children]): + if any([i.op_type == "DequantizeLinear" for i in parents]) and any( + [i.op_type == "QuantizeLinear" for i in children] + ): for idx, parent in enumerate(parents): - if parent.op_type == 'DequantizeLinear': + if parent.op_type == "DequantizeLinear": self.node.input[idx] = parent.input[0] self.quantizer.remove_nodes.append(parent) for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) - self.quantizer.model.replace_input_of_all_nodes( - child.output[0], node.output[0] + '_quantized') - node.output[0] = node.output[0] + '_quantized' + self.quantizer.model.replace_input_of_all_nodes(child.output[0], node.output[0] + "_quantized") + node.output[0] = node.output[0] + "_quantized" + @qop_registry(op_types="QLinearAdd, QLinearMul") class QBinaryOperator(QOperator): @@ -159,38 +159,33 @@ def convert(self): inits = [] # input dq in_dq1 = onnx.helper.make_node( - 'DequantizeLinear', - node.input[:3], - [node.name + '_in_dequant1'], - node.name + '_in_dequant1') - + "DequantizeLinear", node.input[:3], [node.name + "_in_dequant1"], node.name + "_in_dequant1" + ) + in_dq2 = onnx.helper.make_node( - 'DequantizeLinear', - node.input[3:6], - [node.name + '_in_dequant2'], - node.name + '_in_dequant2') - inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2'] - + "DequantizeLinear", node.input[3:6], [node.name + "_in_dequant2"], node.name + "_in_dequant2" + ) + inputs = [node.name + "_in_dequant1", node.name + "_in_dequant2"] + add_nodes.extend([in_dq1, in_dq2]) # output q out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out', node.input[6], node.input[7]], - node.output, - node.name + '_out_quant') - outputs = [node.name + '_out'] + "QuantizeLinear", [node.name + "_out", node.input[6], node.input[7]], node.output, node.name + "_out_quant" + ) + outputs = [node.name + "_out"] add_nodes.append(out_q) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) binary_node = onnx.helper.make_node( - node.op_type.split('QLinear')[-1], inputs, - outputs, node.name + '_convert', **kwargs) + node.op_type.split("QLinear")[-1], inputs, outputs, node.name + "_convert", **kwargs + ) add_nodes.append(binary_node) return True, add_nodes, inits + @op_registry(op_types="Sum, Sub, Div, Pow, Equal, Greater, GreaterOrEqual, Less, LessOrEqual") class Float16BinaryOperator(Operator): """Float16 Binary operator.""" diff --git a/neural_compressor/adaptor/ox_utils/operators/concat.py b/neural_compressor/adaptor/ox_utils/operators/concat.py index c5879a76066..846fb3150cb 100644 --- a/neural_compressor/adaptor/ox_utils/operators/concat.py +++ b/neural_compressor/adaptor/ox_utils/operators/concat.py @@ -17,9 +17,11 @@ """Concat Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain + @op_registry(op_types="Concat") class ConcatOperator(Operator): """Concat Operator.""" @@ -31,74 +33,70 @@ def __init__(self, onnx_quantizer, onnx_node): def quantize_check(self): """Check if quantizaion can be done.""" node = self.node - if len(node.input) == 1: # pragma: no cover + if len(node.input) == 1: # pragma: no cover return False inits = [i.name for i in self.quantizer.model.initializer()] - if all([inp not in self.quantizer.quantized_value_map and inp not in inits \ - for inp in node.input]) or \ - not all([inp in self.quantizer.quantized_value_map or inp in inits \ - for inp in node.input]): + if all([inp not in self.quantizer.quantized_value_map and inp not in inits for inp in node.input]) or not all( + [inp in self.quantizer.quantized_value_map or inp in inits for inp in node.input] + ): return False return True def quantize(self): """Do quantizaion.""" node = self.node - inits = [i.name for i in self.quantizer.model.initializer()] + inits = [i.name for i in self.quantizer.model.initializer()] for idx, inp in enumerate(node.input): initializer_use_weight_qType = inp not in inits self.quantizer.quantize_inputs(node, [idx], initializer_use_weight_qType) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(node) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) - + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if len(children) == 0 or len(parents) == 0 or not node.name.endswith('_quant'): + if len(children) == 0 or len(parents) == 0 or not node.name.endswith("_quant"): return False return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - - if all([i.op_type == 'DequantizeLinear' for i in parents]) and \ - any([i.op_type == 'QuantizeLinear' for i in children]): + + if all([i.op_type == "DequantizeLinear" for i in parents]) and any( + [i.op_type == "QuantizeLinear" for i in children] + ): inputs = [] - inputs.extend([i for i in children if i.op_type == 'QuantizeLinear'][0].input[1:]) + inputs.extend([i for i in children if i.op_type == "QuantizeLinear"][0].input[1:]) for parent in parents: inputs.extend(parent.input) self.quantizer.remove_nodes.append(parent) for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) - self.quantizer.model.replace_input_of_all_nodes( - child.output[0], node.output[0] + '_quantized') - + self.quantizer.model.replace_input_of_all_nodes(child.output[0], node.output[0] + "_quantized") + kwargs = {} for attribute in node.attribute: kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain - qlconcat_node = onnx.helper.make_node("QLinearConcat", - inputs, - [node.output[0] + "_quantized"], - node.name, - **kwargs) + qlconcat_node = onnx.helper.make_node( + "QLinearConcat", inputs, [node.output[0] + "_quantized"], node.name, **kwargs + ) self.quantizer.new_nodes += [qlconcat_node] self.quantizer.remove_nodes.append(node) - - def cast(self): # pragma: no cover + + def cast(self): # pragma: no cover """Cast node.""" node = self.node cast_tensor = [i.tensor_name for i in self.quantizer.new_value_info.values()] @@ -107,6 +105,7 @@ def cast(self): # pragma: no cover self.quantizer.cast_inputs(self.node, self.dtype) self.quantizer.cast_outputs(self.node, self.dtype) + @qop_registry(op_types="QLinearConcat") class QConcatOperator(QOperator): """QConcat Operator.""" @@ -124,28 +123,25 @@ def convert(self): # input dq for i in range(int((len(node.input) - 2) / 3)): in_dq = onnx.helper.make_node( - 'DequantizeLinear', - node.input[2 + i*3 : 2 + (i+1)*3], - [node.name + '_in_dequant_' + str(i)], - node.name + '_in_dequant_' + str(i)) - inputs.append(node.name + '_in_dequant_' + str(i)) + "DequantizeLinear", + node.input[2 + i * 3 : 2 + (i + 1) * 3], + [node.name + "_in_dequant_" + str(i)], + node.name + "_in_dequant_" + str(i), + ) + inputs.append(node.name + "_in_dequant_" + str(i)) add_nodes.append(in_dq) # output q out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out', node.input[0], node.input[1]], - node.output, - node.name + '_out_quant') - outputs = [node.name + '_out'] + "QuantizeLinear", [node.name + "_out", node.input[0], node.input[1]], node.output, node.name + "_out_quant" + ) + outputs = [node.name + "_out"] add_nodes.append(out_q) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) - concat_node = onnx.helper.make_node( - 'Concat', inputs, - outputs, node.name + '_convert', **kwargs) + concat_node = onnx.helper.make_node("Concat", inputs, outputs, node.name + "_convert", **kwargs) add_nodes.append(concat_node) return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/conv.py b/neural_compressor/adaptor/ox_utils/operators/conv.py index cbe5d13b722..821b180ed55 100644 --- a/neural_compressor/adaptor/ox_utils/operators/conv.py +++ b/neural_compressor/adaptor/ox_utils/operators/conv.py @@ -19,12 +19,15 @@ import onnx from onnx import onnx_pb as onnx_proto -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry -from neural_compressor.adaptor.ox_utils.util import find_by_name, attribute_to_kwarg + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry +from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, find_by_name + @op_registry(op_types="Conv, FusedConv") class ConvOperator(Operator): """Conv Operator.""" + def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(ConvOperator, self).__init__(onnx_quantizer, onnx_node) @@ -35,9 +38,9 @@ def quantize(self): if node.op_type == "FusedConv": kwargs = {} for attribute in node.attribute: - if attribute.name == 'activation' and attribute.s in [b'Relu', b'Clip']: + if attribute.name == "activation" and attribute.s in [b"Relu", b"Clip"]: continue - if attribute.name == 'activation_params': + if attribute.name == "activation_params": continue kwargs.update(attribute_to_kwarg(attribute)) conv = onnx.helper.make_node("Conv", node.input, node.output, node.name, **kwargs) @@ -46,14 +49,13 @@ def quantize(self): self.quantizer.quantize_inputs(node, [0]) if self.per_channel: - self.quantizer.quantize_weights_per_channel(node, [1], - self.weight_dtype, self.weight_scheme, 0) + self.quantizer.quantize_weights_per_channel(node, [1], self.weight_dtype, self.weight_scheme, 0) else: self.quantizer.quantize_inputs(node, [1]) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(node) - + if len(node.input) == 3: self.quantizer.quantize_bias_tensor(node) @@ -62,17 +64,18 @@ def quantize(self): def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['dynamic', 'static'], \ - 'convert format for {} should be in [dynamic, static]'.format(node.op_type) + assert convert_format in ["dynamic", "static"], "convert format for {} should be in [dynamic, static]".format( + node.op_type + ) return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - if convert_format == 'dynamic': + if convert_format == "dynamic": inputs = [] parents = self.quantizer.model.get_parents(node) - if parents[0].op_type == 'QuantizeLinear': + if parents[0].op_type == "QuantizeLinear": inputs.append(parents[0].output[0]) inputs.append(parents[1].input[0]) inputs.append(parents[0].input[2]) @@ -96,29 +99,29 @@ def convert(self, convert_format): kwargs = {} for attribute in node.attribute: - if attribute.name == 'activation' and attribute.s in [b'Relu', b'Clip']: # pragma: no cover + if attribute.name == "activation" and attribute.s in [b"Relu", b"Clip"]: # pragma: no cover continue - if attribute.name == 'activation_params': # pragma: no cover + if attribute.name == "activation_params": # pragma: no cover continue kwargs.update(attribute_to_kwarg(attribute)) - conv_integer_node = onnx.helper.make_node("ConvInteger", - inputs, - [conv_integer_output], - node.name, **kwargs) + conv_integer_node = onnx.helper.make_node("ConvInteger", inputs, [conv_integer_output], node.name, **kwargs) self.quantizer.new_nodes.append(conv_integer_node) # Add bias add nodes if bias_present: - conv_integer_output = self.quantizer.get_bias_add_nodes(node, - parents[1].input[0], - conv_integer_output, - quantized_bias_name) + conv_integer_output = self.quantizer.get_bias_add_nodes( + node, parents[1].input[0], conv_integer_output, quantized_bias_name + ) # Add cast operation to cast convInteger output to float. cast_op_output = conv_integer_output + "_cast_output" - cast_node = onnx.helper.make_node("Cast", [conv_integer_output], [cast_op_output], - conv_integer_output + "_cast", - to=onnx_proto.TensorProto.FLOAT) + cast_node = onnx.helper.make_node( + "Cast", + [conv_integer_output], + [cast_op_output], + conv_integer_output + "_cast", + to=onnx_proto.TensorProto.FLOAT, + ) self.quantizer.new_nodes.append(cast_node) # Add mul operation to multiply scales of two inputs. @@ -126,8 +129,9 @@ def convert(self, convert_format): scales_mul_node = find_by_name(scales_mul_op, self.quantizer.new_nodes) if scales_mul_node is None: - scales_mul_node = onnx.helper.make_node("Mul", [scale_0, scale_1], - [scales_mul_op + ":0"], scales_mul_op) + scales_mul_node = onnx.helper.make_node( + "Mul", [scale_0, scale_1], [scales_mul_op + ":0"], scales_mul_op + ) self.quantizer.new_nodes.append(scales_mul_node) scales_mul_op_output = scales_mul_node.output[0] @@ -135,13 +139,17 @@ def convert(self, convert_format): # Add mul operation to multiply mul_scales_op result with output of ConvInteger # and make the output of this node the same as output of original conv node. output_scale_mul_op = node.name + "_output_scale_mul" - self.quantizer.new_nodes.append(onnx.helper.make_node("Mul", - [cast_op_output, scales_mul_op_output], [node.output[0]], output_scale_mul_op)) + self.quantizer.new_nodes.append( + onnx.helper.make_node( + "Mul", [cast_op_output, scales_mul_op_output], [node.output[0]], output_scale_mul_op + ) + ) self.quantizer.remove_nodes.extend(parents[1:]) - self.quantizer.remove_nodes.append(node) - elif convert_format == 'static': - if len(self.quantizer.model.get_children(node)) == 0 or \ - not node.name.endswith('_quant'): # pragma: no cover + self.quantizer.remove_nodes.append(node) + elif convert_format == "static": + if len(self.quantizer.model.get_children(node)) == 0 or not node.name.endswith( + "_quant" + ): # pragma: no cover return parents = self.quantizer.model.get_parents(node) child = self.quantizer.model.get_children(node)[0] @@ -156,20 +164,21 @@ def convert(self, convert_format): kwargs = {} for attribute in node.attribute: - if attribute.name == 'activation' and attribute.s in [b'Relu', b'Clip']: # pragma: no cover + if attribute.name == "activation" and attribute.s in [b"Relu", b"Clip"]: # pragma: no cover continue - if attribute.name == 'activation_params': # pragma: no cover + if attribute.name == "activation_params": # pragma: no cover continue kwargs.update(attribute_to_kwarg(attribute)) - qlinear_conv_node = onnx.helper.make_node("QLinearConv", qlinear_conv_inputs, - [qlinear_conv_output], - node.name, **kwargs) + qlinear_conv_node = onnx.helper.make_node( + "QLinearConv", qlinear_conv_inputs, [qlinear_conv_output], node.name, **kwargs + ) self.quantizer.new_nodes.append(qlinear_conv_node) self.quantizer.remove_nodes.extend(parents) self.quantizer.remove_nodes.append(child) self.quantizer.remove_nodes.append(node) + @qop_registry(op_types="QLinearConv") class QConvOperator(QOperator): """QLinearConv Operator.""" @@ -185,60 +194,52 @@ def convert(self): inits = [] # input dq in_dq1 = onnx.helper.make_node( - 'DequantizeLinear', - node.input[:3], - [node.name + '_in_dequant1'], - node.name + '_in_dequant1') + "DequantizeLinear", node.input[:3], [node.name + "_in_dequant1"], node.name + "_in_dequant1" + ) in_dq2 = onnx.helper.make_node( - 'DequantizeLinear', - node.input[3:6], - [node.name + '_in_dequant2'], - node.name + '_in_dequant2') - + "DequantizeLinear", node.input[3:6], [node.name + "_in_dequant2"], node.name + "_in_dequant2" + ) + add_nodes.extend([in_dq1, in_dq2]) - inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2'] + inputs = [node.name + "_in_dequant1", node.name + "_in_dequant2"] if len(node.input) == 9: import numpy as np - input_scale = onnx.numpy_helper.to_array( - find_by_name(node.input[1], self.initializers)) - weight_scale = onnx.numpy_helper.to_array( - find_by_name(node.input[4], self.initializers)) + + input_scale = onnx.numpy_helper.to_array(find_by_name(node.input[1], self.initializers)) + weight_scale = onnx.numpy_helper.to_array(find_by_name(node.input[4], self.initializers)) bias_scale = input_scale * weight_scale # update scale initializer bias_scale_data = np.asarray(bias_scale, dtype=np.float32).reshape(-1) - bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data, - node.input[8] + '_scale') + bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data, node.input[8] + "_scale") inits.extend([bias_scale_initializer]) - + # update zero initializer bias_zp_data = np.zeros(bias_scale.shape, dtype=np.int32).reshape(-1) - bias_zp_initializer = onnx.numpy_helper.from_array( - bias_zp_data, node.input[8] + '_zero_point') + bias_zp_initializer = onnx.numpy_helper.from_array(bias_zp_data, node.input[8] + "_zero_point") inits.extend([bias_zp_initializer]) in_dq3 = onnx.helper.make_node( - 'DequantizeLinear', + "DequantizeLinear", [node.input[8], bias_scale_initializer.name, bias_zp_initializer.name], - [node.name + '_in_dequant3'], - node.name + '_in_dequant3') + [node.name + "_in_dequant3"], + node.name + "_in_dequant3", + ) inputs.append(in_dq3.name) add_nodes.append(in_dq3) # output q out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out', node.input[6], node.input[7]], - node.output, - node.name + '_out_quant') - outputs = [node.name + '_out'] + "QuantizeLinear", [node.name + "_out", node.input[6], node.input[7]], node.output, node.name + "_out_quant" + ) + outputs = [node.name + "_out"] add_nodes.append(out_q) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) binary_node = onnx.helper.make_node( - node.op_type.split('QLinear')[-1], inputs, - outputs, node.name + '_convert', **kwargs) + node.op_type.split("QLinear")[-1], inputs, outputs, node.name + "_convert", **kwargs + ) add_nodes.append(binary_node) return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/direct_q8.py b/neural_compressor/adaptor/ox_utils/operators/direct_q8.py index a4b4c5bea6a..f40a154070e 100644 --- a/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +++ b/neural_compressor/adaptor/ox_utils/operators/direct_q8.py @@ -16,11 +16,12 @@ # limitations under the License. """Direct8Bit Operator.""" -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry -@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice, " \ - "SpaceToDepth, DepthToSpace, Upsample") +@op_registry( + op_types="Reshape, Transpose, Squeeze, Unsqueeze, Flatten, Expand, Slice, " "SpaceToDepth, DepthToSpace, Upsample" +) class Direct8BitOperator(Operator): """Direct8Bit Operator.""" @@ -34,49 +35,47 @@ def quantize_check(self): if not self.quantizer.is_valid_quantize_weight(node.input[0]): return False return True - + def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(self.node, [0], direct_int8=True) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(self.node, direct_int8=True) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) - + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if (len(children) == 0 and len(parents) == 0) or \ - not node.name.endswith('_quant'): + if (len(children) == 0 and len(parents) == 0) or not node.name.endswith("_quant"): return False return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if any([i.op_type == 'DequantizeLinear' for i in parents]) and \ - any([i.op_type == 'QuantizeLinear' for i in children]): + if any([i.op_type == "DequantizeLinear" for i in parents]) and any( + [i.op_type == "QuantizeLinear" for i in children] + ): for parent in parents: - if parent.op_type == 'DequantizeLinear': + if parent.op_type == "DequantizeLinear": self.node.input[0] = parent.input[0] self.quantizer.remove_nodes.append(parents[0]) break for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) - self.quantizer.model.replace_input_of_all_nodes( - child.output[0], node.output[0] + '_quantized') - node.output[0] = node.output[0] + '_quantized' - - def cast(self): # pragma: no cover + self.quantizer.model.replace_input_of_all_nodes(child.output[0], node.output[0] + "_quantized") + node.output[0] = node.output[0] + "_quantized" + + def cast(self): # pragma: no cover """Cast node.""" node = self.node if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]: @@ -84,6 +83,7 @@ def cast(self): # pragma: no cover self.quantizer.cast_inputs(self.node, self.dtype, [0]) self.quantizer.cast_outputs(self.node, self.dtype) + @qop_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze") class QDirectOperator(QOperator): """QDirect Operator.""" diff --git a/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py b/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py index 6270b8c7e9f..cd9f416a227 100644 --- a/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +++ b/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py @@ -17,9 +17,11 @@ """EmbedLayerNormalization Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain + @op_registry(op_types="EmbedLayerNormalization") class EmbedLayerNormalizationOperator(Operator): """EmbedLayerNormalization Operator.""" @@ -37,10 +39,12 @@ def quantize(self): def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['dynamic', 'static'], \ - "convert format for {} should be in ['dynamic', 'static']".format(node.op_type) - - if not node.name.endswith('_quant'): + assert convert_format in [ + "dynamic", + "static", + ], "convert format for {} should be in ['dynamic', 'static']".format(node.op_type) + + if not node.name.endswith("_quant"): return False return True @@ -48,8 +52,7 @@ def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - parents = [i for i in self.quantizer.model.get_parents(node) \ - if i.op_type == 'DequantizeLinear'] + parents = [i for i in self.quantizer.model.get_parents(node) if i.op_type == "DequantizeLinear"] inputs = [] # 'input_ids' inputs.extend([node.input[0]]) @@ -65,19 +68,20 @@ def convert(self, convert_format): inputs.append(parent.input[1]) for parent in parents: inputs.append(parent.input[2]) - + kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain - qembed_layer_norm_node = onnx.helper.make_node("QEmbedLayerNormalization", - inputs, node.output, - node.name, **kwargs) + qembed_layer_norm_node = onnx.helper.make_node( + "QEmbedLayerNormalization", inputs, node.output, node.name, **kwargs + ) self.quantizer.new_nodes.append(qembed_layer_norm_node) self.quantizer.remove_nodes.extend(parents) self.quantizer.remove_nodes.append(node) + @qop_registry(op_types="QEmbedLayerNormalization") class QEmbedLayerNormalizationOperator(QOperator): """QEmbedLayerNormalization Operator.""" @@ -95,22 +99,23 @@ def convert(self): # input dq for i in range(5): in_dq = onnx.helper.make_node( - 'DequantizeLinear', - [node.input[2+i], node.input[-10+i], node.input[-5+i]], - [node.name + '_in_dequant_' + str(i)], - node.name + '_in_dequant_' + str(i)) - inputs.append(node.name + '_in_dequant_' + str(i)) + "DequantizeLinear", + [node.input[2 + i], node.input[-10 + i], node.input[-5 + i]], + [node.name + "_in_dequant_" + str(i)], + node.name + "_in_dequant_" + str(i), + ) + inputs.append(node.name + "_in_dequant_" + str(i)) add_nodes.append(in_dq) if len(node.input) > 17: inputs.append(node.input[7]) outputs = node.output kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) binary_node = onnx.helper.make_node( - 'EmbedLayerNormalization', inputs, - outputs, node.name + '_convert', **kwargs) + "EmbedLayerNormalization", inputs, outputs, node.name + "_convert", **kwargs + ) add_nodes.append(binary_node) - return True, add_nodes, inits \ No newline at end of file + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/gather.py b/neural_compressor/adaptor/ox_utils/operators/gather.py index 26c0a789a2e..74360cd32af 100644 --- a/neural_compressor/adaptor/ox_utils/operators/gather.py +++ b/neural_compressor/adaptor/ox_utils/operators/gather.py @@ -17,9 +17,11 @@ """Gather Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg + @op_registry(op_types="Gather") class GatherOperator(Operator): """Gather Operator.""" @@ -39,32 +41,35 @@ def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(node, [0]) - if not self.disable_qdq_for_node_output or self.quantizer != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer != "qdq": self.quantizer.quantize_outputs(node) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['dynamic', 'static'], \ - "convert format for {} should be in ['dynamic', 'static']".format(node.op_type) + assert convert_format in [ + "dynamic", + "static", + ], "convert format for {} should be in ['dynamic', 'static']".format(node.op_type) parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if len(children) == 0 or len(parents) == 0 or not node.name.endswith('_quant'): + if len(children) == 0 or len(parents) == 0 or not node.name.endswith("_quant"): return False - + return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if any([i.op_type == 'DequantizeLinear' for i in parents]): + if any([i.op_type == "DequantizeLinear" for i in parents]): from onnx import numpy_helper + inputs = [] inputs.append(parents[0].input[0]) inputs.append(node.input[1]) @@ -72,48 +77,45 @@ def convert(self, convert_format): gather_new_output = node.output[0] + "_quantized" kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) - gather_node = onnx.helper.make_node("Gather", - inputs, - [gather_new_output], - node.name, - **kwargs) + gather_node = onnx.helper.make_node("Gather", inputs, [gather_new_output], node.name, **kwargs) self.quantizer.new_nodes.append(gather_node) - if any([i.op_type != 'QuantizeLinear' for i in children]): # pragma: no cover + if any([i.op_type != "QuantizeLinear" for i in children]): # pragma: no cover dq_inputs = [] dq_inputs.append(gather_new_output) dq_inputs.extend(parents[0].input[1:]) - dq_node = onnx.helper.make_node("DequantizeLinear", - dq_inputs, - [node.output[0]], - node.name + '_DequantizeLinear') + dq_node = onnx.helper.make_node( + "DequantizeLinear", dq_inputs, [node.output[0]], node.name + "_DequantizeLinear" + ) self.quantizer.new_nodes.append(dq_node) - - out_scale = 1. + + out_scale = 1.0 out_zp = 0 for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": out_scale = numpy_helper.to_array(self.quantizer.model.get_initializer(child.input[1])) out_zp = numpy_helper.to_array(self.quantizer.model.get_initializer(child.input[2])) self.quantizer.remove_nodes.append(child) for n in self.quantizer.model.get_children(child): - self.quantizer.model.replace_node_input(n, - child.output[0], gather_new_output) - + self.quantizer.model.replace_node_input(n, child.output[0], gather_new_output) + # int8 weight will be recalculated for the first time - if any([child.op_type == 'QuantizeLinear' for child in children]) and \ - self.quantizer.model.get_initializer(parents[0].input[0]) is not None and \ - parents[0].input[0] not in self.quantizer.recalculate_quantized_value: + if ( + any([child.op_type == "QuantizeLinear" for child in children]) + and self.quantizer.model.get_initializer(parents[0].input[0]) is not None + and parents[0].input[0] not in self.quantizer.recalculate_quantized_value + ): int8_tensor = numpy_helper.to_array(self.quantizer.model.get_initializer(parents[0].input[0])) in_scale = numpy_helper.to_array(self.quantizer.model.get_initializer(parents[0].input[1])) in_zp = numpy_helper.to_array(self.quantizer.model.get_initializer(parents[0].input[2])) - new_int8_tensor = (((int8_tensor.astype('float32') - in_zp) * in_scale) / out_scale).round() + out_zp + new_int8_tensor = (((int8_tensor.astype("float32") - in_zp) * in_scale) / out_scale).round() + out_zp self.quantizer.model.set_initializer(parents[0].input[0], new_int8_tensor.astype(int8_tensor.dtype)) self.quantizer.recalculate_quantized_value.append(parents[0].input[0]) self.quantizer.remove_nodes.extend([node, parents[0]]) - + + @qop_registry(op_types="Gather") class QGatherOperator(QOperator): """QGather Operator.""" diff --git a/neural_compressor/adaptor/ox_utils/operators/gavgpool.py b/neural_compressor/adaptor/ox_utils/operators/gavgpool.py index 47fc80d8ef9..acbd19a76a8 100644 --- a/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +++ b/neural_compressor/adaptor/ox_utils/operators/gavgpool.py @@ -17,9 +17,11 @@ """GlobalAveragePool Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain + @op_registry(op_types="GlobalAveragePool") class GlobalAveragePoolOperator(Operator): """GlobalAveragePool Operator.""" @@ -31,18 +33,17 @@ def __init__(self, onnx_quantizer, onnx_node): def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) - + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) + children = self.quantizer.model.get_children(node) - if len(children) == 0: # pragma: no cover + if len(children) == 0: # pragma: no cover return False return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parent = self.quantizer.model.get_parents(node)[0] child = self.quantizer.model.get_children(node)[0] @@ -55,16 +56,13 @@ def convert(self, convert_format): inputs = parent.input inputs.extend(child.input[1:]) - qnode = onnx.helper.make_node( - "QLinear" + node.op_type, - inputs, - child.output, - node.name + '_quant', **kwargs) + qnode = onnx.helper.make_node("QLinear" + node.op_type, inputs, child.output, node.name + "_quant", **kwargs) self.quantizer.new_nodes += [qnode] self.quantizer.remove_nodes.append(child) self.quantizer.remove_nodes.append(parent) self.quantizer.remove_nodes.append(node) - + + @qop_registry(op_types="QLinearGlobalAveragePool") class QGlobalAveragePoolOperator(QOperator): """QLinearGlobalAveragePool Operator.""" @@ -80,24 +78,18 @@ def convert(self): inits = [] # input dq in_dq = onnx.helper.make_node( - 'DequantizeLinear', - node.input[:3], - [node.name + '_in_dequant'], - node.name + '_in_dequant') - inputs = [node.name + '_in_dequant'] + "DequantizeLinear", node.input[:3], [node.name + "_in_dequant"], node.name + "_in_dequant" + ) + inputs = [node.name + "_in_dequant"] add_nodes.append(in_dq) # output q out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out', node.input[3], node.input[4]], - node.output, - node.name + '_out_quant') - outputs = [node.name + '_out'] + "QuantizeLinear", [node.name + "_out", node.input[3], node.input[4]], node.output, node.name + "_out_quant" + ) + outputs = [node.name + "_out"] add_nodes.append(out_q) kwargs = {} - activation_node = onnx.helper.make_node( - 'GlobalAveragePool', inputs, - outputs, node.name + '_convert', **kwargs) + activation_node = onnx.helper.make_node("GlobalAveragePool", inputs, outputs, node.name + "_convert", **kwargs) add_nodes.append(activation_node) return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/gemm.py b/neural_compressor/adaptor/ox_utils/operators/gemm.py index fdc2ab7af2f..346c99e9161 100644 --- a/neural_compressor/adaptor/ox_utils/operators/gemm.py +++ b/neural_compressor/adaptor/ox_utils/operators/gemm.py @@ -17,9 +17,10 @@ """Gemm Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry -from neural_compressor.adaptor.ox_utils.util import find_by_name, ms_domain, \ - attribute_to_kwarg, is_B_transposed + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry +from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, find_by_name, is_B_transposed, ms_domain + @op_registry(op_types="Gemm") class GemmOperator(Operator): @@ -28,16 +29,18 @@ class GemmOperator(Operator): def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(GemmOperator, self).__init__(onnx_quantizer, onnx_node) - + def quantize_check(self): """Check if quantizaion can be done.""" node = self.node - if len(node.input) == 3 and \ - not find_by_name(node.input[2], self.quantizer.model.initializer()): + if len(node.input) == 3 and not find_by_name(node.input[2], self.quantizer.model.initializer()): from neural_compressor.utils import logger - logger.warning("Bias of Gemm node '{}' is not constant. " \ - "Exclude this node can get better performance.".format(node.name)) - if self.quantizer.mode != 'qdq': + + logger.warning( + "Bias of Gemm node '{}' is not constant. " + "Exclude this node can get better performance.".format(node.name) + ) + if self.quantizer.mode != "qdq": return False return True @@ -46,37 +49,36 @@ def quantize(self): node = self.node self.quantizer.quantize_inputs(node, [0]) if self.per_channel and find_by_name(node.input[1], self.quantizer.model.initializer()): - self.quantizer.quantize_weights_per_channel(node, [1], - self.weight_dtype, self.weight_scheme, 0 if is_B_transposed(node) else 1) + self.quantizer.quantize_weights_per_channel( + node, [1], self.weight_dtype, self.weight_scheme, 0 if is_B_transposed(node) else 1 + ) else: self.quantizer.quantize_inputs(node, [1]) - if len(node.input) == 3 and \ - find_by_name(node.input[2], self.quantizer.model.initializer()): + if len(node.input) == 3 and find_by_name(node.input[2], self.quantizer.model.initializer()): self.quantizer.quantize_bias_tensor(node) beta_attribute = [attr for attr in node.attribute if attr.name == "beta"] if len(beta_attribute): beta_attribute[0].f = 1.0 - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(node) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) - + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) + children = self.quantizer.model.get_children(node) - if len(children) == 0 or not node.name.endswith('_quant'): + if len(children) == 0 or not node.name.endswith("_quant"): return False return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parents = self.quantizer.model.get_parents(node) child = self.quantizer.model.get_children(node)[0] qgemm_output = child.output[0] @@ -92,14 +94,14 @@ def convert(self, convert_format): kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain - qgemm_node = onnx.helper.make_node("QGemm", - qgemm_inputs, [qgemm_output], node.name, **kwargs) + qgemm_node = onnx.helper.make_node("QGemm", qgemm_inputs, [qgemm_output], node.name, **kwargs) self.quantizer.new_nodes.append(qgemm_node) self.quantizer.remove_nodes.extend(parents) self.quantizer.remove_nodes.append(child) self.quantizer.remove_nodes.append(node) - + + @qop_registry(op_types="QGemm") class QGemmOperator(QOperator): """QGemm Operator.""" @@ -111,64 +113,53 @@ def __init__(self, onnx_node, children, initializers): def convert(self): """Convert to QDQ format.""" import numpy as np + node = self.node add_nodes = [] inits = [] - input_scale = onnx.numpy_helper.to_array( - find_by_name(node.input[1], self.initializers)) - weight_scale = onnx.numpy_helper.to_array( - find_by_name(node.input[4], self.initializers)) + input_scale = onnx.numpy_helper.to_array(find_by_name(node.input[1], self.initializers)) + weight_scale = onnx.numpy_helper.to_array(find_by_name(node.input[4], self.initializers)) bias_scale = input_scale * weight_scale # input dq in_dq1 = onnx.helper.make_node( - 'DequantizeLinear', - node.input[:3], - [node.name + '_in_dequant1'], - node.name + '_in_dequant1') - + "DequantizeLinear", node.input[:3], [node.name + "_in_dequant1"], node.name + "_in_dequant1" + ) in_dq2 = onnx.helper.make_node( - 'DequantizeLinear', - node.input[3:6], - [node.name + '_in_dequant2'], - node.name + '_in_dequant2') + "DequantizeLinear", node.input[3:6], [node.name + "_in_dequant2"], node.name + "_in_dequant2" + ) # update scale initializer bias_scale_data = np.asarray(bias_scale, dtype=np.float32).reshape(-1) - bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data, - node.input[6] + '_scale') + bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data, node.input[6] + "_scale") inits.extend([bias_scale_initializer]) - + # update zero initializer bias_zp_data = np.zeros(bias_scale.shape, dtype=np.int32).reshape(-1) - bias_zp_initializer = onnx.numpy_helper.from_array( - bias_zp_data, node.input[6] + '_zero_point') + bias_zp_initializer = onnx.numpy_helper.from_array(bias_zp_data, node.input[6] + "_zero_point") inits.extend([bias_zp_initializer]) in_dq3 = onnx.helper.make_node( - 'DequantizeLinear', + "DequantizeLinear", [node.input[8], bias_scale_initializer.name, bias_zp_initializer.name], - [node.name + '_in_dequant3']) - + [node.name + "_in_dequant3"], + ) + inputs = [in_dq1.name, in_dq2.name, in_dq3.name] add_nodes.extend([in_dq1, in_dq2, in_dq3]) # output q out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out', node.input[6], node.input[7]], - node.output, - node.name + '_out_quant') - outputs = [node.name + '_out'] + "QuantizeLinear", [node.name + "_out", node.input[6], node.input[7]], node.output, node.name + "_out_quant" + ) + outputs = [node.name + "_out"] add_nodes.append(out_q) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) - gemm_node = onnx.helper.make_node( - 'Gemm', inputs, - outputs, node.name + '_convert', **kwargs) + gemm_node = onnx.helper.make_node("Gemm", inputs, outputs, node.name + "_convert", **kwargs) add_nodes.append(gemm_node) - return True, add_nodes, inits \ No newline at end of file + return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/lstm.py b/neural_compressor/adaptor/ox_utils/operators/lstm.py index 82d0a0701e9..9e35139f209 100644 --- a/neural_compressor/adaptor/ox_utils/operators/lstm.py +++ b/neural_compressor/adaptor/ox_utils/operators/lstm.py @@ -16,10 +16,12 @@ # limitations under the License. """LSTM Operator.""" -import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator -from neural_compressor.adaptor.ox_utils.util import ms_domain, attribute_to_kwarg import numpy +import onnx + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, op_registry +from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain + @op_registry(op_types="LSTM") class LSTMOperator(Operator): @@ -32,22 +34,22 @@ def __init__(self, onnx_quantizer, onnx_node): def quantize(self): """Do quantizaion.""" return - + def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['dynamic'], \ - "convert format for {} should be in ['dynamic']".format(node.op_type) - - if (not self.quantizer.is_valid_quantize_weight(node.input[1]) or - not self.quantizer.is_valid_quantize_weight(node.input[2])): # pragma: no cover + assert convert_format in ["dynamic"], "convert format for {} should be in ['dynamic']".format(node.op_type) + + if not self.quantizer.is_valid_quantize_weight(node.input[1]) or not self.quantizer.is_valid_quantize_weight( + node.input[2] + ): # pragma: no cover return False model = self.quantizer.model W = model.get_initializer(node.input[1]) R = model.get_initializer(node.input[2]) - if (len(W.dims) != 3 or len(R.dims) != 3): # pragma: no cover + if len(W.dims) != 3 or len(R.dims) != 3: # pragma: no cover return False return True @@ -59,20 +61,22 @@ def convert(self, convert_format): model = self.quantizer.model W = model.get_initializer(self.node.input[1]) R = model.get_initializer(self.node.input[2]) - + [W_num_dir, W_4_hidden_size, W_input_size] = W.dims [R_num_dir, R_4_hidden_size, R_hidden_size] = R.dims - if self.per_channel: # pragma: no cover + if self.per_channel: # pragma: no cover del W.dims[0] del R.dims[0] W.dims[0] = W_num_dir * W_4_hidden_size R.dims[0] = R_num_dir * R_4_hidden_size - quant_input_weight_tuple = self.quantizer.quantize_weight_per_channel(node.input[1], - self.weight_dtype, self.weight_scheme, 0) - quant_recurrent_weight_tuple = self.quantizer.quantize_weight_per_channel(node.input[2], - self.weight_dtype, self.weight_scheme, 0) + quant_input_weight_tuple = self.quantizer.quantize_weight_per_channel( + node.input[1], self.weight_dtype, self.weight_scheme, 0 + ) + quant_recurrent_weight_tuple = self.quantizer.quantize_weight_per_channel( + node.input[2], self.weight_dtype, self.weight_scheme, 0 + ) W_quant_weight = model.get_initializer(quant_input_weight_tuple[0]) R_quant_weight = model.get_initializer(quant_recurrent_weight_tuple[0]) @@ -86,10 +90,8 @@ def convert(self, convert_format): W_quant_array = numpy.transpose(W_quant_array, (0, 2, 1)) R_quant_array = numpy.transpose(R_quant_array, (0, 2, 1)) - W_quant_tranposed = onnx.numpy_helper.from_array(W_quant_array, \ - quant_input_weight_tuple[0]) - R_quant_tranposed = onnx.numpy_helper.from_array(R_quant_array, - quant_recurrent_weight_tuple[0]) + W_quant_tranposed = onnx.numpy_helper.from_array(W_quant_array, quant_input_weight_tuple[0]) + R_quant_tranposed = onnx.numpy_helper.from_array(R_quant_array, quant_recurrent_weight_tuple[0]) model.remove_initializers([W_quant_weight, R_quant_weight]) model.add_initializer(W_quant_tranposed) @@ -100,7 +102,7 @@ def convert(self, convert_format): W_quant_scale = model.get_initializer(quant_input_weight_tuple[2]) R_quant_scale = model.get_initializer(quant_recurrent_weight_tuple[2]) - if self.per_channel: # pragma: no cover + if self.per_channel: # pragma: no cover W_quant_zp.dims[:] = [W_num_dir, W_4_hidden_size] R_quant_zp.dims[:] = [R_num_dir, R_4_hidden_size] W_quant_scale.dims[:] = [W_num_dir, W_4_hidden_size] @@ -115,18 +117,21 @@ def convert(self, convert_format): inputs.extend([node.input[5] if input_len > 5 else ""]) inputs.extend([node.input[6] if input_len > 6 else ""]) inputs.extend([node.input[7] if input_len > 7 else ""]) - inputs.extend([quant_input_weight_tuple[2], - quant_input_weight_tuple[1], - quant_recurrent_weight_tuple[2], - quant_recurrent_weight_tuple[1]]) - + inputs.extend( + [ + quant_input_weight_tuple[2], + quant_input_weight_tuple[1], + quant_recurrent_weight_tuple[2], + quant_recurrent_weight_tuple[1], + ] + ) + kwargs = {} for attribute in node.attribute: kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain quant_lstm_name = node.name + "_quant" - quant_lstm_node = onnx.helper.make_node("DynamicQuantizeLSTM", - inputs, node.output, quant_lstm_name, **kwargs) + quant_lstm_node = onnx.helper.make_node("DynamicQuantizeLSTM", inputs, node.output, quant_lstm_name, **kwargs) self.quantizer.remove_nodes.append(node) - self.quantizer.new_nodes.append(quant_lstm_node) \ No newline at end of file + self.quantizer.new_nodes.append(quant_lstm_node) diff --git a/neural_compressor/adaptor/ox_utils/operators/matmul.py b/neural_compressor/adaptor/ox_utils/operators/matmul.py index e77c8d60b81..5181657a0bb 100644 --- a/neural_compressor/adaptor/ox_utils/operators/matmul.py +++ b/neural_compressor/adaptor/ox_utils/operators/matmul.py @@ -17,10 +17,12 @@ """MatMul Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry -from neural_compressor.adaptor.ox_utils.util import find_by_name, attribute_to_kwarg from onnx import onnx_pb as onnx_proto +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry +from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, find_by_name + + @op_registry(op_types="MatMul") class MatMulOperator(Operator): """MatMul Operator.""" @@ -44,8 +46,7 @@ def quantize(self): node = self.node self.quantizer.quantize_inputs(node, [0]) if self.per_channel and find_by_name(node.input[1], self.quantizer.model.initializer()): - self.quantizer.quantize_weights_per_channel(node, [1], - self.weight_dtype, self.weight_scheme, 1) + self.quantizer.quantize_weights_per_channel(node, [1], self.weight_dtype, self.weight_scheme, 1) else: self.quantizer.quantize_inputs(node, [1]) @@ -56,9 +57,11 @@ def quantize(self): def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['dynamic', 'static'], \ - "convert format for {} should be in ['dynamic', 'static']".format(node.op_type) - if not node.name.endswith('_quant'): + assert convert_format in [ + "dynamic", + "static", + ], "convert format for {} should be in ['dynamic', 'static']".format(node.op_type) + if not node.name.endswith("_quant"): return False return True @@ -66,7 +69,7 @@ def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - if convert_format == 'dynamic': + if convert_format == "dynamic": parents = self.quantizer.model.get_parents(node) inputs = [] @@ -74,11 +77,11 @@ def convert(self, convert_format): scale = [] zp = [] for parent in parents: - if parent.op_type == 'DequantizeLinear': + if parent.op_type == "DequantizeLinear": quantized_name.append(parent.input[0]) else: quantized_name.append(parent.output[0]) - if parent.op_type == 'DynamicQuantizeLinear': + if parent.op_type == "DynamicQuantizeLinear": scale.append(parent.output[1]) zp.append(parent.output[2]) else: @@ -87,16 +90,18 @@ def convert(self, convert_format): inputs.extend(quantized_name) inputs.extend(zp) matmul_integer_output = node.output[0] + "_output_quantized" - matmul_integer_node = onnx.helper.make_node("MatMulInteger", - inputs, - [matmul_integer_output], node.name) + matmul_integer_node = onnx.helper.make_node("MatMulInteger", inputs, [matmul_integer_output], node.name) self.quantizer.new_nodes.append(matmul_integer_node) # Add cast operation to cast matmulInteger output to float. cast_op_output = matmul_integer_output + "_cast_output" - cast_node = onnx.helper.make_node("Cast", [matmul_integer_output], [cast_op_output], - matmul_integer_output + "_cast", - to=onnx_proto.TensorProto.FLOAT) + cast_node = onnx.helper.make_node( + "Cast", + [matmul_integer_output], + [cast_op_output], + matmul_integer_output + "_cast", + to=onnx_proto.TensorProto.FLOAT, + ) self.quantizer.new_nodes.append(cast_node) # Add mul operation to multiply scales of two inputs. @@ -104,8 +109,9 @@ def convert(self, convert_format): scales_mul_node = find_by_name(scales_mul_op, self.quantizer.new_nodes) if scales_mul_node is None: - scales_mul_node = onnx.helper.make_node("Mul", [scale[0], scale[1]], - [scales_mul_op + ":0"], scales_mul_op) + scales_mul_node = onnx.helper.make_node( + "Mul", [scale[0], scale[1]], [scales_mul_op + ":0"], scales_mul_op + ) self.quantizer.new_nodes.append(scales_mul_node) scales_mul_op_output = scales_mul_node.output[0] @@ -114,41 +120,42 @@ def convert(self, convert_format): # and make the output of this node the same as output of original matmul node. output_scale_mul_op = node.name + "_output_scale_mul" self.quantizer.new_nodes.append( - onnx.helper.make_node("Mul", [cast_op_output, scales_mul_op_output], - [node.output[0]], output_scale_mul_op)) - if parents[1].op_type == 'DequantizeLinear': + onnx.helper.make_node( + "Mul", [cast_op_output, scales_mul_op_output], [node.output[0]], output_scale_mul_op + ) + ) + if parents[1].op_type == "DequantizeLinear": self.quantizer.remove_nodes.append(parents[1]) self.quantizer.remove_nodes.append(node) - elif convert_format == 'static': + elif convert_format == "static": parents = self.quantizer.model.get_parents(node) - if len(self.quantizer.model.get_children(node)) == 0 or \ - not node.name.endswith('_quant'): # pragma: no cover + if len(self.quantizer.model.get_children(node)) == 0 or not node.name.endswith( + "_quant" + ): # pragma: no cover return qlinear_matmul_inputs = [] if self.disable_qdq_for_node_output: for i in range(len(parents[0].input)): qlinear_matmul_inputs.extend([parent.input[i] for parent in parents]) - qlinear_matmul_node = onnx.helper.make_node("MatMulIntegerToFloat", - qlinear_matmul_inputs, - node.output, - node.name, - domain='com.microsoft') + qlinear_matmul_node = onnx.helper.make_node( + "MatMulIntegerToFloat", qlinear_matmul_inputs, node.output, node.name, domain="com.microsoft" + ) else: child = self.quantizer.model.get_children(node)[0] qlinear_matmul_output = child.output[0] for parent in parents: qlinear_matmul_inputs.extend(parent.input) qlinear_matmul_inputs.extend(child.input[1:]) - qlinear_matmul_node = onnx.helper.make_node("QLinearMatMul", - qlinear_matmul_inputs, - [qlinear_matmul_output], - node.name) + qlinear_matmul_node = onnx.helper.make_node( + "QLinearMatMul", qlinear_matmul_inputs, [qlinear_matmul_output], node.name + ) self.quantizer.remove_nodes.append(child) self.quantizer.new_nodes.append(qlinear_matmul_node) self.quantizer.remove_nodes.extend(parents) self.quantizer.remove_nodes.append(node) - + + @qop_registry(op_types="QLinearMatMul") class QMatMulOperator(QOperator): """QLinearMatMul Operator.""" @@ -164,38 +171,31 @@ def convert(self): inits = [] # input dq in_dq1 = onnx.helper.make_node( - 'DequantizeLinear', - node.input[:3], - [node.name + '_in_dequant1'], - node.name + '_in_dequant1') + "DequantizeLinear", node.input[:3], [node.name + "_in_dequant1"], node.name + "_in_dequant1" + ) in_dq2 = onnx.helper.make_node( - 'DequantizeLinear', - node.input[3:6], - [node.name + '_in_dequant2'], - node.name + '_in_dequant2') - inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2'] - + "DequantizeLinear", node.input[3:6], [node.name + "_in_dequant2"], node.name + "_in_dequant2" + ) + inputs = [node.name + "_in_dequant1", node.name + "_in_dequant2"] + add_nodes.extend([in_dq1, in_dq2]) # output q out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out', node.input[6], node.input[7]], - node.output, - node.name + '_out_quant') - outputs = [node.name + '_out'] + "QuantizeLinear", [node.name + "_out", node.input[6], node.input[7]], node.output, node.name + "_out_quant" + ) + outputs = [node.name + "_out"] add_nodes.append(out_q) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) - matmul_node = onnx.helper.make_node( - 'MatMul', inputs, - outputs, node.name + '_convert', **kwargs) + matmul_node = onnx.helper.make_node("MatMul", inputs, outputs, node.name + "_convert", **kwargs) add_nodes.append(matmul_node) return True, add_nodes, inits + @op_registry(op_types="FusedMatMul") class FusedMatMulOperator(Operator): """FusedMatMul Operator.""" diff --git a/neural_compressor/adaptor/ox_utils/operators/maxpool.py b/neural_compressor/adaptor/ox_utils/operators/maxpool.py index 7a08f3c64e5..a037a5fcfbd 100644 --- a/neural_compressor/adaptor/ox_utils/operators/maxpool.py +++ b/neural_compressor/adaptor/ox_utils/operators/maxpool.py @@ -16,7 +16,8 @@ # limitations under the License. """MaxPool Operator.""" -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry + @op_registry(op_types="MaxPool") class MaxPoolOperator(Operator): @@ -25,35 +26,34 @@ class MaxPoolOperator(Operator): def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(MaxPoolOperator, self).__init__(onnx_quantizer, onnx_node) - + def quantize_check(self): """Check if quantizaion can be done.""" node = self.node # if opset version is less than 12, just no change - if self.quantizer.opset_version < 12: # pragma: no cover + if self.quantizer.opset_version < 12: # pragma: no cover return False - if not self.quantizer.is_valid_quantize_weight(node.input[0]): # pragma: no cover + if not self.quantizer.is_valid_quantize_weight(node.input[0]): # pragma: no cover return False - + return True def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(self.node, direct_int8=True) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(self.node, direct_int8=True) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) children = self.quantizer.model.get_children(node) - if len(children) == 0 or not node.name.endswith('_quant'): # pragma: no cover + if len(children) == 0 or not node.name.endswith("_quant"): # pragma: no cover return False return True @@ -62,20 +62,21 @@ def convert(self, convert_format): node = self.node parent = self.quantizer.model.get_parents(node)[0] children = self.quantizer.model.get_children(node) - if parent.op_type != 'DequantizeLinear' or \ - all([i.op_type != 'QuantizeLinear' for i in children]): # pragma: no cover + if parent.op_type != "DequantizeLinear" or all( + [i.op_type != "QuantizeLinear" for i in children] + ): # pragma: no cover return node.input[0] = parent.input[0] - node.output[0] = node.output[0].replace('_QuantizeInput', '_quantized') + node.output[0] = node.output[0].replace("_QuantizeInput", "_quantized") for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) for n in self.quantizer.model.get_children(child): - self.quantizer.model.replace_node_input(n, - child.output[0], node.output[0]) + self.quantizer.model.replace_node_input(n, child.output[0], node.output[0]) self.quantizer.remove_nodes.append(parent) + @qop_registry(op_types="MaxPool") class QMaxPoolOperator(QOperator): """QMaxPool Operator.""" diff --git a/neural_compressor/adaptor/ox_utils/operators/norm.py b/neural_compressor/adaptor/ox_utils/operators/norm.py index 3b5db5bb7fb..ddb2a4efdbe 100644 --- a/neural_compressor/adaptor/ox_utils/operators/norm.py +++ b/neural_compressor/adaptor/ox_utils/operators/norm.py @@ -17,9 +17,11 @@ """Normalization Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain + @op_registry(op_types="BatchNormalization") class BatchNormalizationOperator(Operator): """BatchNormalization Operator.""" @@ -30,12 +32,13 @@ def __init__(self, onnx_quantizer, onnx_node): def cast(self): """Cast node.""" - if self.dtype == 'bf16': + if self.dtype == "bf16": self.quantizer.cast_inputs(self.node, self.dtype, [0]) else: self.quantizer.cast_inputs(self.node, self.dtype) self.quantizer.cast_outputs(self.node, self.dtype) + @op_registry(op_types="LayerNormalization") class NormalizationOperator(Operator): """Normalization Operator.""" diff --git a/neural_compressor/adaptor/ox_utils/operators/ops.py b/neural_compressor/adaptor/ox_utils/operators/ops.py index 8cb24ad89cd..690d9f4014d 100644 --- a/neural_compressor/adaptor/ox_utils/operators/ops.py +++ b/neural_compressor/adaptor/ox_utils/operators/ops.py @@ -16,39 +16,59 @@ # limitations under the License. """Base Operator.""" -from neural_compressor.utils.utility import LazyImport from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg -onnx = LazyImport('onnx') +from neural_compressor.utils.utility import LazyImport + +onnx = LazyImport("onnx") OPERATORS = {} -QOPERATORS= {} +QOPERATORS = {} + def op_registry(op_types): """The class decorator used to register all Operator subclasses.""" + def decorator_op(cls): assert cls.__name__.endswith( - 'Operator'), "The name of subclass of Operator should end with \'Operator\' substring." - if cls.__name__[:-len('Operator')] in OPERATORS: # pragma: no cover - raise ValueError('Cannot have two operators with the same name.') - for single_op_type in [op_type.strip() for op_type in op_types.split(',')]: + "Operator" + ), "The name of subclass of Operator should end with 'Operator' substring." + if cls.__name__[: -len("Operator")] in OPERATORS: # pragma: no cover + raise ValueError("Cannot have two operators with the same name.") + for single_op_type in [op_type.strip() for op_type in op_types.split(",")]: OPERATORS[single_op_type] = cls return cls + return decorator_op + def qop_registry(op_types): """The class decorator used to register all QOperator subclasses.""" + def decorator_op(cls): assert cls.__name__.endswith( - 'Operator'), "The name of subclass of QOperator should end with \'Operator\' substring." - if cls.__name__[:-len('Operator')] in QOPERATORS: # pragma: no cover - raise ValueError('Cannot have two operators with the same name.') - for single_op_type in [op_type.strip() for op_type in op_types.split(',')]: - if single_op_type.startswith('QLinear') or \ - single_op_type in ['QGemm', 'QAttention', 'QEmbedLayerNormalization', 'ArgMax', - 'Reshape', 'Transpose', 'Squeeze', 'Unsqueeze', 'Gather', - 'MaxPool', 'Pad', 'Resize', 'Split']: + "Operator" + ), "The name of subclass of QOperator should end with 'Operator' substring." + if cls.__name__[: -len("Operator")] in QOPERATORS: # pragma: no cover + raise ValueError("Cannot have two operators with the same name.") + for single_op_type in [op_type.strip() for op_type in op_types.split(",")]: + if single_op_type.startswith("QLinear") or single_op_type in [ + "QGemm", + "QAttention", + "QEmbedLayerNormalization", + "ArgMax", + "Reshape", + "Transpose", + "Squeeze", + "Unsqueeze", + "Gather", + "MaxPool", + "Pad", + "Resize", + "Split", + ]: QOPERATORS[single_op_type] = cls return cls + return decorator_op @@ -61,30 +81,25 @@ def __init__(self, onnx_quantizer, onnx_node): self.node = onnx_node if self.node.name in self.quantizer.config: self.dtype = self.quantizer.config[self.node.name] - self.disable_qdq_for_node_output = True if onnx_node.op_type in \ - onnx_quantizer.op_types_to_exclude_output_quantization else False + self.disable_qdq_for_node_output = ( + True if onnx_node.op_type in onnx_quantizer.op_types_to_exclude_output_quantization else False + ) self.per_channel = False - self.algorithm = 'minmax' - self.weight_scheme = 'sym' + self.algorithm = "minmax" + self.weight_scheme = "sym" self.weight_dtype = None self.activation_dtype = None - self.activation_scheme = 'asym' + self.activation_scheme = "asym" if self.node.name in self.quantizer.config: if self.quantizer.config[self.node.name] not in self.quantizer.fallback_list: - if 'weight' in self.quantizer.config[self.node.name].keys(): - self.per_channel = self.quantizer.config[self.node.name]\ - ['weight']['granularity'] == 'per_channel' - self.algorithm = self.quantizer.config[self.node.name]\ - ['weight']['algorithm'] - self.weight_scheme = self.quantizer.config[self.node.name]\ - ['weight']['scheme'] - self.weight_dtype = self.quantizer.config[self.node.name]\ - ['weight']['dtype'] - if 'activation' in self.quantizer.config[self.node.name].keys(): - self.activation_dtype = self.quantizer.config[self.node.name]\ - ['activation']['dtype'] - self.activation_scheme = self.quantizer.config[self.node.name]\ - ['activation']['scheme'] + if "weight" in self.quantizer.config[self.node.name].keys(): + self.per_channel = self.quantizer.config[self.node.name]["weight"]["granularity"] == "per_channel" + self.algorithm = self.quantizer.config[self.node.name]["weight"]["algorithm"] + self.weight_scheme = self.quantizer.config[self.node.name]["weight"]["scheme"] + self.weight_dtype = self.quantizer.config[self.node.name]["weight"]["dtype"] + if "activation" in self.quantizer.config[self.node.name].keys(): + self.activation_dtype = self.quantizer.config[self.node.name]["activation"]["dtype"] + self.activation_scheme = self.quantizer.config[self.node.name]["activation"]["scheme"] def quantize_check(self): """Check if quantizaion can be done.""" @@ -94,7 +109,7 @@ def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(node) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(node) def convert_check(self, convert_format): @@ -105,11 +120,12 @@ def convert(self, convert_format): """Convert to QOperator format.""" return - def cast(self): # pragma: no cover + def cast(self): # pragma: no cover """Cast node.""" self.quantizer.cast_inputs(self.node, self.dtype) self.quantizer.cast_outputs(self.node, self.dtype) + class QOperator(object): """Base QOperator.""" @@ -118,10 +134,20 @@ def __init__(self, onnx_node, children, initializers): self.node = onnx_node self.children = children self.initializers = initializers - self.qop_list = ['QGemm', 'QAttention', 'QEmbedLayerNormalization', - 'QLinearLeakyRelu', 'QLinearSigmoid', 'QLinearAdd','QLinearMul', - 'QLinearConcat', 'QLinearConv', 'QLinearGlobalAveragePool', - 'QLinearMatMul', 'QLinearAveragePool'] + self.qop_list = [ + "QGemm", + "QAttention", + "QEmbedLayerNormalization", + "QLinearLeakyRelu", + "QLinearSigmoid", + "QLinearAdd", + "QLinearMul", + "QLinearConcat", + "QLinearConv", + "QLinearGlobalAveragePool", + "QLinearMatMul", + "QLinearAveragePool", + ] def convert(self): """Convert to QDQ format.""" diff --git a/neural_compressor/adaptor/ox_utils/operators/pad.py b/neural_compressor/adaptor/ox_utils/operators/pad.py index 63d76b39a2f..1f98536225a 100644 --- a/neural_compressor/adaptor/ox_utils/operators/pad.py +++ b/neural_compressor/adaptor/ox_utils/operators/pad.py @@ -17,9 +17,11 @@ """Pad Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, quantize_nparray + @op_registry(op_types="Pad") class PadOperator(Operator): """Pad Operator.""" @@ -31,7 +33,7 @@ def __init__(self, onnx_quantizer, onnx_node): def quantize_check(self): """Check if quantizaion can be done.""" # if opset version is less than 11, just no change - if self.quantizer.opset_version < 11: # pragma: no cover + if self.quantizer.opset_version < 11: # pragma: no cover return False return True @@ -39,25 +41,24 @@ def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(node, [0]) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(node) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) children = self.quantizer.model.get_children(node) - if len(children) == 0 or not node.name.endswith('_quant'): # pragma: no cover + if len(children) == 0 or not node.name.endswith("_quant"): # pragma: no cover return False return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parent = self.quantizer.model.get_parents(node)[0] child = self.quantizer.model.get_children(node)[0] @@ -66,11 +67,10 @@ def convert(self, convert_format): kv = attribute_to_kwarg(attribute) kwargs.update(kv) - if 'mode' not in kwargs or kwargs['mode'] == b'constant': + if "mode" not in kwargs or kwargs["mode"] == b"constant": if len(node.input) > 2: # There is 3rd input 'constant_value' zp_tensor = self.quantizer.model.get_initializer(parent.input[2]) - scale_tensor = \ - self.quantizer.model.get_initializer(parent.input[1]) + scale_tensor = self.quantizer.model.get_initializer(parent.input[1]) padding_constant_initializer = self.quantizer.model.get_initializer(node.input[2]) if padding_constant_initializer is not None: @@ -78,21 +78,21 @@ def convert(self, convert_format): zp_value = zp_array.item() if zp_array.ndim == 0 else zp_array[0] scale_array = onnx.numpy_helper.to_array(scale_tensor) scale_value = scale_array.item() if scale_array.ndim == 0 else scale_array[0] - padding_constant_array = \ - onnx.numpy_helper.to_array(padding_constant_initializer) + padding_constant_array = onnx.numpy_helper.to_array(padding_constant_initializer) quantized_padding_constant_array = quantize_nparray( - self.weight_dtype, - padding_constant_array, scale_value, zp_value) + self.weight_dtype, padding_constant_array, scale_value, zp_value + ) quantized_padding_constant_name = node.input[2] + "_quantized" quantized_padding_constant_initializer = onnx.numpy_helper.from_array( - quantized_padding_constant_array, quantized_padding_constant_name) + quantized_padding_constant_array, quantized_padding_constant_name + ) # Suppose this padding constant initializer only used by the node self.quantizer.model.remove_initializer(padding_constant_initializer) self.quantizer.model.add_initializer(quantized_padding_constant_initializer) node.input[2] = quantized_padding_constant_name else: self.quantizer.quantize_inputs(node, [2], False) - node.input[2] = node.input[2] + '_DequantizeLinear' + node.input[2] = node.input[2] + "_DequantizeLinear" else: # pad zero_point for original zero node.input.extend([parent.input[2]]) @@ -102,10 +102,11 @@ def convert(self, convert_format): node.output[0] = child.output[0] self.quantizer.remove_nodes.extend([parent, child]) + @qop_registry(op_types="Pad") class QPadOperator(QOperator): """QPad Operator.""" def __init__(self, onnx_node, children, initializers): """Initialization.""" - super().__init__(onnx_node, children, initializers) \ No newline at end of file + super().__init__(onnx_node, children, initializers) diff --git a/neural_compressor/adaptor/ox_utils/operators/pooling.py b/neural_compressor/adaptor/ox_utils/operators/pooling.py index 05c7ce22fc1..c09654fd9c9 100644 --- a/neural_compressor/adaptor/ox_utils/operators/pooling.py +++ b/neural_compressor/adaptor/ox_utils/operators/pooling.py @@ -17,13 +17,15 @@ """AveragePool Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain + @op_registry(op_types="AveragePool") class PoolOperator(Operator): """AveragePool Operator.""" - + def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(PoolOperator, self).__init__(onnx_quantizer, onnx_node) @@ -44,50 +46,45 @@ def quantize(self): def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) - + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - - if len(children) == 0 or len(parents) == 0 or not node.name.endswith('_quant'): + + if len(children) == 0 or len(parents) == 0 or not node.name.endswith("_quant"): return False return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if all([i.op_type == 'DequantizeLinear' for i in parents]) and \ - any([i.op_type == 'QuantizeLinear' for i in children]): - qlinear_output_name = node.output[0] + '_quantized' + if all([i.op_type == "DequantizeLinear" for i in parents]) and any( + [i.op_type == "QuantizeLinear" for i in children] + ): + qlinear_output_name = node.output[0] + "_quantized" inputs = [] inputs.extend(parents[0].input) - inputs.extend([i for i in children if i.op_type == 'QuantizeLinear'][0].input[1:]) + inputs.extend([i for i in children if i.op_type == "QuantizeLinear"][0].input[1:]) kwargs = {} for attribute in node.attribute: kwargs.update(attribute_to_kwarg(attribute)) kwargs["domain"] = ms_domain - qnode = onnx.helper.make_node( - "QLinear" + node.op_type, - inputs, - [qlinear_output_name], - node.name, - **kwargs) - + qnode = onnx.helper.make_node("QLinear" + node.op_type, inputs, [qlinear_output_name], node.name, **kwargs) + self.quantizer.remove_nodes.extend(parents) for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) - self.quantizer.model.replace_input_of_all_nodes( - child.output[0], qnode.output[0]) + self.quantizer.model.replace_input_of_all_nodes(child.output[0], qnode.output[0]) self.quantizer.new_nodes.append(qnode) self.quantizer.remove_nodes.append(node) + @qop_registry(op_types="QLinearAveragePool") class QPoolOperator(QOperator): """QLinearAveragePool Operator.""" @@ -103,27 +100,21 @@ def convert(self): inits = [] # input dq in_dq = onnx.helper.make_node( - 'DequantizeLinear', - node.input[:3], - [node.name + '_in_dequant'], - node.name + '_in_dequant') - inputs = [node.name + '_in_dequant'] + "DequantizeLinear", node.input[:3], [node.name + "_in_dequant"], node.name + "_in_dequant" + ) + inputs = [node.name + "_in_dequant"] add_nodes.append(in_dq) # output q out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out', node.input[3], node.input[4]], - node.output, - node.name + '_out_quant') - outputs = [node.name + '_out'] + "QuantizeLinear", [node.name + "_out", node.input[3], node.input[4]], node.output, node.name + "_out_quant" + ) + outputs = [node.name + "_out"] add_nodes.append(out_q) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) - activation_node = onnx.helper.make_node( - 'AveragePool', inputs, - outputs, node.name + '_convert', **kwargs) + activation_node = onnx.helper.make_node("AveragePool", inputs, outputs, node.name + "_convert", **kwargs) add_nodes.append(activation_node) return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/reduce.py b/neural_compressor/adaptor/ox_utils/operators/reduce.py index afd834b95fa..d8339c978b9 100644 --- a/neural_compressor/adaptor/ox_utils/operators/reduce.py +++ b/neural_compressor/adaptor/ox_utils/operators/reduce.py @@ -16,10 +16,12 @@ # limitations under the License. """Reduce Operator.""" -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, op_registry -@op_registry(op_types="ReduceMean, ReduceLogSum, ReduceLogSumExp, " \ - "ReduceL1, ReduceL2, ReduceProd, ReduceSum, ReduceSumSquare") + +@op_registry( + op_types="ReduceMean, ReduceLogSum, ReduceLogSumExp, " "ReduceL1, ReduceL2, ReduceProd, ReduceSum, ReduceSumSquare" +) class ReduceOperator(Operator): """Reduce Operator.""" @@ -27,6 +29,7 @@ def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(ReduceOperator, self).__init__(onnx_quantizer, onnx_node) + @op_registry(op_types="ReduceMax, ReduceMin") class ReduceMinMaxOperator(Operator): """ReduceMin and ReduceMax Operator.""" @@ -41,44 +44,42 @@ def quantize_check(self): if not self.quantizer.is_valid_quantize_weight(node.input[0]): return False return True - + def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(self.node, [0], direct_int8=True) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(self.node, direct_int8=True) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) - + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if (len(children) == 0 and len(parents) == 0) or \ - not node.name.endswith('_quant'): + if (len(children) == 0 and len(parents) == 0) or not node.name.endswith("_quant"): return False return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if any([i.op_type == 'DequantizeLinear' for i in parents]) and \ - any([i.op_type == 'QuantizeLinear' for i in children]): + if any([i.op_type == "DequantizeLinear" for i in parents]) and any( + [i.op_type == "QuantizeLinear" for i in children] + ): for parent in parents: - if parent.op_type == 'DequantizeLinear': + if parent.op_type == "DequantizeLinear": self.node.input[0] = parent.input[0] self.quantizer.remove_nodes.append(parents[0]) break for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) - self.quantizer.model.replace_input_of_all_nodes( - child.output[0], node.output[0] + '_quantized') - node.output[0] = node.output[0] + '_quantized' \ No newline at end of file + self.quantizer.model.replace_input_of_all_nodes(child.output[0], node.output[0] + "_quantized") + node.output[0] = node.output[0] + "_quantized" diff --git a/neural_compressor/adaptor/ox_utils/operators/resize.py b/neural_compressor/adaptor/ox_utils/operators/resize.py index cf6ef4cfa81..b824cbcee73 100644 --- a/neural_compressor/adaptor/ox_utils/operators/resize.py +++ b/neural_compressor/adaptor/ox_utils/operators/resize.py @@ -16,7 +16,8 @@ # limitations under the License. """Resize Operator.""" -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry + @op_registry(op_types="Resize") class ResizeOperator(Operator): @@ -40,19 +41,18 @@ def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(node, [0], direct_int8=True) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(self.node, direct_int8=True) node.name = node.name + "_quant" - + def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if (len(children) == 0 and len(parents) == 0) or not node.name.endswith('_quant'): + if (len(children) == 0 and len(parents) == 0) or not node.name.endswith("_quant"): return False return True @@ -63,19 +63,20 @@ def convert(self, convert_format): parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if any([i.op_type == 'DequantizeLinear' for i in parents]) and \ - any([i.op_type == 'QuantizeLinear' for i in children]): + if any([i.op_type == "DequantizeLinear" for i in parents]) and any( + [i.op_type == "QuantizeLinear" for i in children] + ): for parent in parents: - if parent.op_type == 'DequantizeLinear' and parent.output[0] == node.input[0]: + if parent.op_type == "DequantizeLinear" and parent.output[0] == node.input[0]: self.node.input[0] = parent.input[0] self.quantizer.remove_nodes.append(parent) break for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) - self.quantizer.model.replace_input_of_all_nodes( - child.output[0], node.output[0] + '_quantized') - node.output[0] = node.output[0] + '_quantized' + self.quantizer.model.replace_input_of_all_nodes(child.output[0], node.output[0] + "_quantized") + node.output[0] = node.output[0] + "_quantized" + @qop_registry(op_types="Resize") class QResizeOperator(QOperator): @@ -83,4 +84,4 @@ class QResizeOperator(QOperator): def __init__(self, onnx_node, children, initializers): """Initialization.""" - super().__init__(onnx_node, children, initializers) \ No newline at end of file + super().__init__(onnx_node, children, initializers) diff --git a/neural_compressor/adaptor/ox_utils/operators/split.py b/neural_compressor/adaptor/ox_utils/operators/split.py index 2bc83266ba9..cb99f3625c3 100644 --- a/neural_compressor/adaptor/ox_utils/operators/split.py +++ b/neural_compressor/adaptor/ox_utils/operators/split.py @@ -17,7 +17,8 @@ """Split Operator.""" import onnx -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry + +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, QOperator, op_registry, qop_registry from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg @@ -33,20 +34,20 @@ def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(node, [0]) - if not self.disable_qdq_for_node_output or self.quantizer != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer != "qdq": self.quantizer.quantize_outputs(self.node, direct_int8=True) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) parent = self.quantizer.model.get_parents(node)[0] children = self.quantizer.model.get_children(node) - if parent.op_type != 'DequantizeLinear' or len(children) == 0 or \ - not node.name.endswith('_quant'): # pragma: no cover + if ( + parent.op_type != "DequantizeLinear" or len(children) == 0 or not node.name.endswith("_quant") + ): # pragma: no cover return False return True @@ -56,33 +57,30 @@ def convert(self, convert_format): parent = self.quantizer.model.get_parents(node)[0] kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) quantized_input_names = [] quantized_input_names.append(parent.input[0]) - if len(node.input) > 1: # pragma: no cover + if len(node.input) > 1: # pragma: no cover quantized_input_names.extend(node.input[1:]) outputs = [] for output in node.output: if output in self.quantizer.model.input_name_to_nodes: child = self.quantizer.model.input_name_to_nodes[output][0] - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) outputs.append(child.output[0]) - else: # pragma: no cover + else: # pragma: no cover outputs.append(output) - else: # pragma: no cover - outputs.append(output + '_quatized') + else: # pragma: no cover + outputs.append(output + "_quatized") - quantized_node = onnx.helper.make_node(node.op_type, - quantized_input_names, - outputs, - node.name, **kwargs) + quantized_node = onnx.helper.make_node(node.op_type, quantized_input_names, outputs, node.name, **kwargs) self.quantizer.new_nodes.append(quantized_node) self.quantizer.remove_nodes.extend([parent, node]) - def cast(self): # pragma: no cover + def cast(self): # pragma: no cover """Cast node.""" node = self.node if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]: @@ -90,6 +88,7 @@ def cast(self): # pragma: no cover self.quantizer.cast_inputs(self.node, self.dtype) self.quantizer.cast_outputs(self.node, self.dtype) + @qop_registry(op_types="Split") class QSplitOperator(QOperator): """QSplit Operator.""" @@ -105,29 +104,32 @@ def convert(self): inputs = [] inits = [] - if all([child.op_type not in self.qop_list and \ - child.op_type != 'DequantizeLinear' for child in self.children]): + if all([child.op_type not in self.qop_list and child.op_type != "DequantizeLinear" for child in self.children]): return False, add_nodes, inits # input dq in_dq = None for child in self.children: idx = [list(child.input).index(i) for i in node.output if i in child.input][0] - if child.op_type in ['DequantizeLinear', 'QLinearLeakyRelu', 'QLinearSigmoid', 'QLinearConv', - 'QLinearGlobalAveragePool', 'QLinearAveragePool']: + if child.op_type in [ + "DequantizeLinear", + "QLinearLeakyRelu", + "QLinearSigmoid", + "QLinearConv", + "QLinearGlobalAveragePool", + "QLinearAveragePool", + ]: in_dq_inputs = [node.input[0], child.input[1], child.input[2]] - elif child.op_type in ['QEmbedLayerNormalization']: + elif child.op_type in ["QEmbedLayerNormalization"]: in_dq_inputs = [node.input[0], child.input[idx + 6], child.input[idx + 11]] - elif child.op_type in ['QAttention']: + elif child.op_type in ["QAttention"]: in_dq_inputs = [node.input[0], child.input[idx + 3], child.input[idx + 3]] else: in_dq_inputs = [node.input[0], child.input[idx + 1], child.input[idx + 2]] in_dq = onnx.helper.make_node( - 'DequantizeLinear', - in_dq_inputs, - [node.name + '_in_dequant'], - node.name + '_in_dequant') - inputs.append(node.name + '_in_dequant') + "DequantizeLinear", in_dq_inputs, [node.name + "_in_dequant"], node.name + "_in_dequant" + ) + inputs.append(node.name + "_in_dequant") add_nodes.append(in_dq) break @@ -137,19 +139,18 @@ def convert(self): outputs = [] for i, out in enumerate(node.output): out_q = onnx.helper.make_node( - 'QuantizeLinear', - [node.name + '_out_' + str(i), in_dq.input[1], in_dq.input[2]], + "QuantizeLinear", + [node.name + "_out_" + str(i), in_dq.input[1], in_dq.input[2]], [node.output[i]], - node.name + '_out_quant_' + str(i)) + node.name + "_out_quant_" + str(i), + ) outputs.append(out_q.input[0]) add_nodes.append(out_q) kwargs = {} - for attribute in node.attribute: # pragma: no cover + for attribute in node.attribute: # pragma: no cover kwargs.update(attribute_to_kwarg(attribute)) - new_node = onnx.helper.make_node( - node.op_type, inputs, - outputs, node.name + '_convert', **kwargs) + new_node = onnx.helper.make_node(node.op_type, inputs, outputs, node.name + "_convert", **kwargs) add_nodes.append(new_node) return True, add_nodes, inits diff --git a/neural_compressor/adaptor/ox_utils/operators/unary_op.py b/neural_compressor/adaptor/ox_utils/operators/unary_op.py index c0a9f93c0a2..bdbfc208214 100644 --- a/neural_compressor/adaptor/ox_utils/operators/unary_op.py +++ b/neural_compressor/adaptor/ox_utils/operators/unary_op.py @@ -16,12 +16,13 @@ # limitations under the License. """Unary operator.""" -from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator +from neural_compressor.adaptor.ox_utils.operators.ops import Operator, op_registry + @op_registry(op_types="Exp, Log, Round, Sqrt") class UnaryOperator(Operator): """Unary operator.""" - + def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(UnaryOperator, self).__init__(onnx_quantizer, onnx_node) @@ -30,7 +31,7 @@ def __init__(self, onnx_quantizer, onnx_node): @op_registry(op_types="Abs, Shrink, Sign") class UnaryDirect8BitOperator(Operator): """Unary operator.""" - + def __init__(self, onnx_quantizer, onnx_node): """Initialization.""" super(UnaryDirect8BitOperator, self).__init__(onnx_quantizer, onnx_node) @@ -41,44 +42,42 @@ def quantize_check(self): if not self.quantizer.is_valid_quantize_weight(node.input[0]): return False return True - + def quantize(self): """Do quantizaion.""" node = self.node self.quantizer.quantize_inputs(self.node, [0], direct_int8=True) - if not self.disable_qdq_for_node_output or self.quantizer.mode != 'qdq': + if not self.disable_qdq_for_node_output or self.quantizer.mode != "qdq": self.quantizer.quantize_outputs(self.node, direct_int8=True) node.name = node.name + "_quant" def convert_check(self, convert_format): """Check if conversion can be done.""" node = self.node - assert convert_format in ['static'], \ - "convert format for {} should be in ['static']".format(node.op_type) - + assert convert_format in ["static"], "convert format for {} should be in ['static']".format(node.op_type) + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if (len(children) == 0 and len(parents) == 0) or \ - not node.name.endswith('_quant'): + if (len(children) == 0 and len(parents) == 0) or not node.name.endswith("_quant"): return False return True def convert(self, convert_format): """Convert to QOperator format.""" node = self.node - + parents = self.quantizer.model.get_parents(node) children = self.quantizer.model.get_children(node) - if any([i.op_type == 'DequantizeLinear' for i in parents]) and \ - any([i.op_type == 'QuantizeLinear' for i in children]): + if any([i.op_type == "DequantizeLinear" for i in parents]) and any( + [i.op_type == "QuantizeLinear" for i in children] + ): for parent in parents: - if parent.op_type == 'DequantizeLinear': + if parent.op_type == "DequantizeLinear": self.node.input[0] = parent.input[0] self.quantizer.remove_nodes.append(parents[0]) break for child in children: - if child.op_type == 'QuantizeLinear': + if child.op_type == "QuantizeLinear": self.quantizer.remove_nodes.append(child) - self.quantizer.model.replace_input_of_all_nodes( - child.output[0], node.output[0] + '_quantized') - node.output[0] = node.output[0] + '_quantized' \ No newline at end of file + self.quantizer.model.replace_input_of_all_nodes(child.output[0], node.output[0] + "_quantized") + node.output[0] = node.output[0] + "_quantized" diff --git a/neural_compressor/adaptor/ox_utils/smooth_quant.py b/neural_compressor/adaptor/ox_utils/smooth_quant.py index 87d9fe8f9f6..7006f32ac14 100644 --- a/neural_compressor/adaptor/ox_utils/smooth_quant.py +++ b/neural_compressor/adaptor/ox_utils/smooth_quant.py @@ -16,31 +16,35 @@ # limitations under the License. """SmoothQuant for onnxrt adaptor.""" -import os import copy -import onnx import logging +import os + import numpy as np +import onnx +from onnx import helper, numpy_helper from onnx import onnx_pb as onnx_proto + +from neural_compressor.adaptor.ox_utils.util import _get_qrange_for_qType, is_B_transposed, quantize_data, to_numpy from neural_compressor.model.model import BaseModel from neural_compressor.model.onnx_model import ONNXModel -from neural_compressor.adaptor.ox_utils.util import to_numpy, \ - quantize_data, _get_qrange_for_qType, is_B_transposed -from onnx import numpy_helper, helper logger = logging.getLogger("neural_compressor") -dtype_map = {np.dtype('float32'): 1, - np.dtype('uint8'): 2, - np.dtype('int8'): 3, - np.dtype('int32'): 6, - np.dtype('int64'): 7, - np.dtype('float16'): 10, - np.dtype('double'): 11} - +dtype_map = { + np.dtype("float32"): 1, + np.dtype("uint8"): 2, + np.dtype("int8"): 3, + np.dtype("int32"): 6, + np.dtype("int64"): 7, + np.dtype("float16"): 10, + np.dtype("double"): 11, +} + + def get_quant_dequant_output(model, input_data, output_data, reduce_range, backend): """Get loss between fp32 output and QDQ output. - + Args: model (object): model input_data (numpy.ndarray): fp32 input @@ -49,15 +53,17 @@ def get_quant_dequant_output(model, input_data, output_data, reduce_range, backe backend (str): execution provider """ import onnxruntime as ort - input_data = quant_dequant_data(input_data, reduce_range, 2, 'asym') + + input_data = quant_dequant_data(input_data, reduce_range, 2, "asym") sess = ort.InferenceSession(model.SerializeToString(), providers=[backend]) preds = sess.run(None, {model.graph.input[0].name: input_data}) loss = np.sum(np.abs(output_data - preds) ** 2) return loss + def make_sub_graph(node, inits, input_data, output_data, reduce_range, opset, ir_version): """Build a model with the specific node. - + Args: node (object): node inits (list): initializer inputs of this node @@ -67,15 +73,17 @@ def make_sub_graph(node, inits, input_data, output_data, reduce_range, opset, ir opset (object): opset of the model ir_version (object): ir_version of the model """ - from onnx import helper, TensorProto, numpy_helper + from onnx import TensorProto, helper, numpy_helper + input = helper.make_tensor_value_info(node.input[0], dtype_map[input_data.dtype], input_data.shape) output = helper.make_tensor_value_info(node.output[0], dtype_map[output_data.dtype], output_data.shape) - graph = helper.make_graph([node], 'sub_graph', [input], [output], inits) + graph = helper.make_graph([node], "sub_graph", [input], [output], inits) model = helper.make_model(graph, opset_imports=opset) model.ir_version = ir_version return model -def quant_dequant_data(data, reduce_range=False, qType=3, scheme='sym'): + +def quant_dequant_data(data, reduce_range=False, qType=3, scheme="sym"): """Quantize and then dequantize data. Args: @@ -85,12 +93,14 @@ def quant_dequant_data(data, reduce_range=False, qType=3, scheme='sym'): scheme (str): sym or asym quantization """ rmin, rmax, zero_point, scale, quantized_data = quantize_data( - data.flatten().tolist(), _get_qrange_for_qType(qType, reduce_range), qType, scheme) + data.flatten().tolist(), _get_qrange_for_qType(qType, reduce_range), qType, scheme + ) return ((quantized_data - zero_point) * scale).astype(data.dtype).reshape(data.shape) + class ORTSmoothQuant: """Fake input channel quantization. - + For more details please refer to: [1] SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models @@ -98,9 +108,10 @@ class ORTSmoothQuant: We only support inplace mode which means the model weights will be changed, you can call recover function to recover the weights if needed. """ - def __init__(self, model, dataloader, reduce_range=False, backend='CPUExecutionProvider'): + + def __init__(self, model, dataloader, reduce_range=False, backend="CPUExecutionProvider"): """Initialize the attributes of class.""" - self.model = model if isinstance(model, BaseModel) else ONNXModel(model) + self.model = model if isinstance(model, BaseModel) else ONNXModel(model) self.value_infos = {vi.name: vi for vi in self.model.model.graph.value_info} self.value_infos.update({ot.name: ot for ot in self.model.model.graph.output}) self.value_infos.update({it.name: it for it in self.model.model.graph.input}) @@ -123,10 +134,18 @@ def __init__(self, model, dataloader, reduce_range=False, backend='CPUExecutionP self.ops_to_absorb = [] self.record_max_info = False self._build_absorb_function() - - def transform(self, alpha=0.5, folding=True, percentile=99.999, op_types=['Gemm', 'Conv', 'MatMul', 'FusedConv'], - scales_per_op=True, calib_iter=100, quantize_config=None, - auto_alpha_args={'alpha_min': 0.3, 'alpha_max': 0.7, 'alpha_step': 0.05, 'attn_method': 'min'}): + + def transform( + self, + alpha=0.5, + folding=True, + percentile=99.999, + op_types=["Gemm", "Conv", "MatMul", "FusedConv"], + scales_per_op=True, + calib_iter=100, + quantize_config=None, + auto_alpha_args={"alpha_min": 0.3, "alpha_max": 0.7, "alpha_step": 0.05, "attn_method": "min"}, + ): """The main entry of smooth quant. Args: @@ -160,7 +179,7 @@ def transform(self, alpha=0.5, folding=True, percentile=99.999, op_types=['Gemm' if self.record_max_info: return self.model - if alpha == 'auto': + if alpha == "auto": alpha = self._auto_tune_alpha(calib_iter, **auto_alpha_args) scales = self._get_smooth_scales(alpha) @@ -189,9 +208,9 @@ def recover(self): continue input = node_info[1][1] weight = numpy_helper.to_array( - self.model.get_initializer(input), - base_dir=os.path.dirname(self.model.model_path) if \ - self.model.model_path is not None else "") + self.model.get_initializer(input), + base_dir=os.path.dirname(self.model.model_path) if self.model.model_path is not None else "", + ) scale = self.tensor_scales_info[key] new_weight = weight * scale self.model.set_initializer(input, new_weight) @@ -230,8 +249,12 @@ def _check_need_calibration(self, alpha, percentile, op_types, scales_per_op, ca """ need_calib = True - if self.percentile == percentile and self.op_types == op_types \ - and self.scales_per_op == scales_per_op and self.calib_iter == calib_iter: + if ( + self.percentile == percentile + and self.op_types == op_types + and self.scales_per_op == scales_per_op + and self.calib_iter == calib_iter + ): need_calib = False self.alpha = alpha @@ -244,59 +267,80 @@ def _check_need_calibration(self, alpha, percentile, op_types, scales_per_op, ca def _build_absorb_function(self): """Build function mapping for scale folding.""" from onnx import numpy_helper - def norm(node, scale): # pragma: no cover + + def norm(node, scale): # pragma: no cover for idx in [1, 2]: tensor = self.model.get_initializer(node.input[idx]) - new_tensor = numpy_helper.to_array(tensor, os.path.dirname(self.model.model_path)) * scale if \ - self.model.model_path is not None else numpy_helper.to_array(tensor) * scale + new_tensor = ( + numpy_helper.to_array(tensor, os.path.dirname(self.model.model_path)) * scale + if self.model.model_path is not None + else numpy_helper.to_array(tensor) * scale + ) self.model.set_initializer(node.input[idx], new_tensor) - self.tensor_scales_info[node.input[idx]] = 1. / scale if \ - node.input[idx] not in self.tensor_scales_info else \ - self.tensor_scales_info[node.input[idx]] * 1. / scale + self.tensor_scales_info[node.input[idx]] = ( + 1.0 / scale + if node.input[idx] not in self.tensor_scales_info + else self.tensor_scales_info[node.input[idx]] * 1.0 / scale + ) return True - - def mul(node, scale): # pragma: no cover + + def mul(node, scale): # pragma: no cover if all([self.model.get_initializer(inp) is None for inp in node.input]): return False for inp in node.input: if self.model.get_initializer(inp) is not None: - key = node.input[0].split('_smooth_output')[0] + key = node.input[0].split("_smooth_output")[0] tensor = self.model.get_initializer(inp) - new_tensor = numpy_helper.to_array(tensor, os.path.dirname(self.model.model_path)) * scale if \ - self.model.model_path is not None else numpy_helper.to_array(tensor) * scale + new_tensor = ( + numpy_helper.to_array(tensor, os.path.dirname(self.model.model_path)) * scale + if self.model.model_path is not None + else numpy_helper.to_array(tensor) * scale + ) self.model.set_initializer(inp, new_tensor) - self.tensor_scales_info[key] = 1. / scale if key not in self.tensor_scales_info \ - else 1. / scale * self.tensor_scales_info[key] + self.tensor_scales_info[key] = ( + 1.0 / scale + if key not in self.tensor_scales_info + else 1.0 / scale * self.tensor_scales_info[key] + ) return True - - def conv(node, scale): # pragma: no cover + + def conv(node, scale): # pragma: no cover if len(node.input) > 2: if self.model.get_initializer(node.input[2]) is not None: tensor = self.model.get_initializer(node.input[2]) - new_tensor = numpy_helper.to_array(tensor, os.path.dirname(self.model.model_path)) * scale if \ - self.model.model_path is not None else numpy_helper.to_array(tensor) * scale + new_tensor = ( + numpy_helper.to_array(tensor, os.path.dirname(self.model.model_path)) * scale + if self.model.model_path is not None + else numpy_helper.to_array(tensor) * scale + ) self.model.set_initializer(node.input[2], new_tensor) - self.tensor_scales_info[node.input[2]] = 1. / scale + self.tensor_scales_info[node.input[2]] = 1.0 / scale scale = scale.reshape(-1, 1, 1, 1) tensor = self.model.get_initializer(node.input[1]) - new_tensor = numpy_helper.to_array(tensor, os.path.dirname(self.model.model_path)) * scale if \ - self.model.model_path is not None else numpy_helper.to_array(tensor) * scale + new_tensor = ( + numpy_helper.to_array(tensor, os.path.dirname(self.model.model_path)) * scale + if self.model.model_path is not None + else numpy_helper.to_array(tensor) * scale + ) self.model.set_initializer(node.input[1], new_tensor) - self.tensor_scales_info[node.input[1]] = 1. / scale if \ - node.input[1] not in self.tensor_scales_info else \ - self.tensor_scales_info[node.input[1]] * 1. / scale + self.tensor_scales_info[node.input[1]] = ( + 1.0 / scale + if node.input[1] not in self.tensor_scales_info + else self.tensor_scales_info[node.input[1]] * 1.0 / scale + ) return True - - self.could_absorb_optype = {"LayerNormalization": norm, - "BatchNormalization": norm, - "InstanceNormalization": norm, - "SimplifiedLayerNormalization": mul, - "MatMul": mul, - "Gemm": mul, - "Conv": conv, - "FusedConv": conv, - "Mul": mul - } + + self.could_absorb_optype = { + "LayerNormalization": norm, + "BatchNormalization": norm, + "InstanceNormalization": norm, + "SimplifiedLayerNormalization": mul, + "MatMul": mul, + "Gemm": mul, + "Conv": conv, + "FusedConv": conv, + "Mul": mul, + } def _fold_scale(self, scales): """Absorb the scale to the operator at output channel. @@ -306,14 +350,15 @@ def _fold_scale(self, scales): """ remove_nodes = [] for node in self.model.nodes(): - if node.op_type == "Mul" and node.name.endswith("_smooth_mul") and node not in remove_nodes: + if node.op_type == "Mul" and node.name.endswith("_smooth_mul") and node not in remove_nodes: parent = self.model.get_parent(node, 0) if parent is None: continue if parent.op_type in self.could_absorb_optype and len(self.model.get_children(parent)) == 1: if node.output[0].split("_smooth_output")[0] in scales: - if self.could_absorb_optype[parent.op_type](parent, - 1.0 / scales[node.output[0].split("_smooth_output")[0]]): + if self.could_absorb_optype[parent.op_type]( + parent, 1.0 / scales[node.output[0].split("_smooth_output")[0]] + ): remove_nodes.append(node) children = [i for i in self.model.nodes() if node.output[0] in i.input] for child in children: @@ -324,7 +369,7 @@ def _fold_scale(self, scales): def _dump_op_info(self, percentile, op_types, iterations, quantize_config=None): """Dump op info for smooth quant. - + Args: percentile (float): percentile of calibration to remove outliers op_types (list): the op type to be smooth quantized @@ -332,30 +377,38 @@ def _dump_op_info(self, percentile, op_types, iterations, quantize_config=None): quantize_config (dict): quantize config """ from neural_compressor.adaptor.ox_utils.calibration import ONNXRTAugment - augment = ONNXRTAugment(self.model, - self.dataloader, - [], - iterations=list(range(0, iterations)), - backend=self.backend, - reduce_range=self.reduce_range) - self.max_vals_per_channel, self.shape_info, self.tensors_to_node = \ - augment.calib_smooth(percentile, op_types, None) + + augment = ONNXRTAugment( + self.model, + self.dataloader, + [], + iterations=list(range(0, iterations)), + backend=self.backend, + reduce_range=self.reduce_range, + ) + self.max_vals_per_channel, self.shape_info, self.tensors_to_node = augment.calib_smooth( + percentile, op_types, None + ) for node in self.model.nodes(): for out in node.output: - if out in self.tensors_to_node and node.op_type in self.could_absorb_optype and \ - self.model.get_initializer(node.input[1]) is not None : - self.ops_to_absorb.append(node.name) + if ( + out in self.tensors_to_node + and node.op_type in self.could_absorb_optype + and self.model.get_initializer(node.input[1]) is not None + ): + self.ops_to_absorb.append(node.name) def _get_output_loss(self, node_name, scale, calib_iter): """Get output loss of specific node after inserting QDQ pair. - + Args: node_name (str): node name scale (float): scale of the specific node calib_iter (int): iterations """ - from onnx import helper import onnxruntime as ort + from onnx import helper + node = [i for i in self.model.nodes() if i.name == node_name] loss = 0 if len(node) > 0: @@ -364,18 +417,18 @@ def _get_output_loss(self, node_name, scale, calib_iter): added_tensors = [node.input[0], node.output[0]] self.model.add_tensors_to_outputs(added_tensors) - session = ort.InferenceSession(self.model.model_path + '_augment.onnx', - providers=[self.backend]) if \ - self.model.is_large_model else \ - ort.InferenceSession(self.model.model.SerializeToString(), - providers=[self.backend]) - base_dir = '' if not self.model.is_large_model else os.path.dirname(self.model.model_path) + session = ( + ort.InferenceSession(self.model.model_path + "_augment.onnx", providers=[self.backend]) + if self.model.is_large_model + else ort.InferenceSession(self.model.model.SerializeToString(), providers=[self.backend]) + ) + base_dir = "" if not self.model.is_large_model else os.path.dirname(self.model.model_path) weight = onnx.numpy_helper.to_array(self.model.get_initializer(node.input[1]), base_dir) weight_q = quant_dequant_data(weight) self.model.set_initializer(node.input[1], weight_q) inits = [self.model.get_initializer(i) for i in node.input if self.model.get_initializer(i) is not None] - + inputs_names = [i.name for i in session.get_inputs()] model = None ort_inputs = {} @@ -384,14 +437,13 @@ def _get_output_loss(self, node_name, scale, calib_iter): break if len(inputs_names) == 1: - if isinstance(inputs, dict): # pragma: no cover + if isinstance(inputs, dict): # pragma: no cover for name, input in inputs.items(): ort_inputs.update({name: to_numpy(input)}) else: ort_inputs.update({inputs_names[0]: to_numpy(inputs)}) - else: # pragma: no cover - assert len(inputs_names) == len(inputs), \ - 'number of input tensors must align with graph inputs' + else: # pragma: no cover + assert len(inputs_names) == len(inputs), "number of input tensors must align with graph inputs" if isinstance(inputs, dict): for name, input in inputs.items(): @@ -401,8 +453,15 @@ def _get_output_loss(self, node_name, scale, calib_iter): outputs = session.run(added_tensors, ort_inputs) if model is None: - model = make_sub_graph(node, inits, outputs[0], outputs[1], - self.reduce_range, self.model.model.opset_import, self.model.model.ir_version) + model = make_sub_graph( + node, + inits, + outputs[0], + outputs[1], + self.reduce_range, + self.model.model.opset_import, + self.model.model.ir_version, + ) loss += get_quant_dequant_output(model, outputs[0] * scale, outputs[1], self.reduce_range, self.backend) self.model.remove_tensors_from_outputs([i for i in added_tensors if i not in orig_outputs]) @@ -422,7 +481,7 @@ def _reshape_scale_for_input(self, tensor, key): scale = np.reshape(self.tensor_scales_info[key], (1, self.tensor_scales_info[key].shape[0])) return scale - def _auto_tune_alpha(self, calib_iter, alpha_min=0.3, alpha_max=0.7, alpha_step=0.05, attn_method='min'): + def _auto_tune_alpha(self, calib_iter, alpha_min=0.3, alpha_max=0.7, alpha_step=0.05, attn_method="min"): """Perform alpha-tuning to obtain layer-wise optimal alpha values and adjust parameters accordingly. Args: @@ -434,19 +493,27 @@ def _auto_tune_alpha(self, calib_iter, alpha_min=0.3, alpha_max=0.7, alpha_step= """ logger.info("auto tuning alpha") import copy + alpha_scale = 100 - alpha_space = list(range(round(alpha_min * alpha_scale), round((alpha_max + alpha_step) * alpha_scale), - round(alpha_step * alpha_scale))) + alpha_space = list( + range( + round(alpha_min * alpha_scale), + round((alpha_max + alpha_step) * alpha_scale), + round(alpha_step * alpha_scale), + ) + ) alpha_space = [alpha / alpha_scale for alpha in alpha_space] optimal_alphas = {} if self.model.is_large_model: - onnx.save_model(self.model.model, - self.model.model_path + '_augment.onnx', - save_as_external_data=True, - all_tensors_to_one_file=True, - location="weights.pb", - convert_attribute=False) + onnx.save_model( + self.model.model, + self.model.model_path + "_augment.onnx", + save_as_external_data=True, + all_tensors_to_one_file=True, + location="weights.pb", + convert_attribute=False, + ) ## Searching optimal alphas for tensor_name, node_infos in self.tensors_to_node.items(): @@ -457,35 +524,41 @@ def _auto_tune_alpha(self, calib_iter, alpha_min=0.3, alpha_max=0.7, alpha_step= for alpha in alpha_space: scale = self._get_smooth_scales(alpha, [key]) self._adjust_weights(scale) - input_scale = self._reshape_scale_for_input(tensor_name, key) if \ - not (node.op_type == 'Gemm' and is_B_transposed(node)) else \ - self.tensor_scales_info[key] + input_scale = ( + self._reshape_scale_for_input(tensor_name, key) + if not (node.op_type == "Gemm" and is_B_transposed(node)) + else self.tensor_scales_info[key] + ) loss = self._get_output_loss(node_info[0], input_scale, calib_iter) loss_alpha[alpha] = loss if key not in optimal_alphas: # Update alpha results optimal_alphas[key] = alpha else: - optimal_alphas[key] = alpha if optimal_alphas[key] in loss_alpha and \ - loss < loss_alpha[optimal_alphas[key]] else optimal_alphas[key] + optimal_alphas[key] = ( + alpha + if optimal_alphas[key] in loss_alpha and loss < loss_alpha[optimal_alphas[key]] + else optimal_alphas[key] + ) self.recover() logger.info("auto tuning alpha done") if self.model.is_large_model: from onnx.external_data_helper import load_external_data_for_model + load_external_data_for_model(self.model.model, os.path.split(self.model.model_path)[0]) - os.remove(self.model.model_path + '_augment.onnx') + os.remove(self.model.model_path + "_augment.onnx") os.remove(os.path.join(os.path.dirname(self.model.model_path), "weights.pb")) return optimal_alphas - + def _get_smooth_scales(self, alpha, target_list=[]): """Get the smooth scales for. - + The ops with the same input will share one mul layer. TODO support individual scales for each layer. - + Args: alpha: smooth alpha in paper target_list: target objects to get scale, [] means get all scales - + Returns: the smooth scales for weights, currently one input tensor only have one scale """ @@ -498,11 +571,12 @@ def _get_smooth_scales(self, alpha, target_list=[]): if len(target_list) > 0 and node_info[0] not in target_list: continue weight = numpy_helper.to_array( - self.model.get_initializer(node_info[1][1]), - base_dir=os.path.dirname(self.model.model_path) if \ - self.model.model_path is not None else "") - if (len(weight.shape) == 4 and weight.shape[1] != 1) or \ - (node.op_type == 'Gemm' and is_B_transposed(node)): + self.model.get_initializer(node_info[1][1]), + base_dir=os.path.dirname(self.model.model_path) if self.model.model_path is not None else "", + ) + if (len(weight.shape) == 4 and weight.shape[1] != 1) or ( + node.op_type == "Gemm" and is_B_transposed(node) + ): weight = np.moveaxis(weight, 0, 1) specific_alpha = alpha[node_info[0]] if isinstance(alpha, dict) else alpha scales[node_info[0]] = self._get_smooth_scale(weight, specific_alpha, tensor) @@ -513,11 +587,12 @@ def _get_smooth_scales(self, alpha, target_list=[]): for node_info in nodes: node = self.model.input_name_to_nodes[node_info[1][1]][0] weight = numpy_helper.to_array( - self.model.get_initializer(node_info[1][1]), - base_dir=os.path.dirname(self.model.model_path) if \ - self.model.model_path is not None else "") - if (len(weight.shape) == 4 and weight.shape[1] != 1) or \ - (node.op_type == 'Gemm' and is_B_transposed(node)): + self.model.get_initializer(node_info[1][1]), + base_dir=os.path.dirname(self.model.model_path) if self.model.model_path is not None else "", + ) + if (len(weight.shape) == 4 and weight.shape[1] != 1) or ( + node.op_type == "Gemm" and is_B_transposed(node) + ): weight = np.moveaxis(weight, 0, 1) weight = weight.reshape(weight.shape[0], -1) cur_max = np.amax(weight, axis=-1) @@ -525,9 +600,9 @@ def _get_smooth_scales(self, alpha, target_list=[]): weights_stack = np.stack(weights_in_channel_max, axis=-1) specific_alpha = alpha[tensor] if isinstance(alpha, dict) else alpha scales[tensor] = self._get_smooth_scale(weights_stack, specific_alpha, tensor) - + return scales - + def _get_smooth_scale(self, weights, specific_alpha, tensor): """Get smooth scale for specific weight. @@ -545,19 +620,21 @@ def _get_smooth_scale(self, weights, specific_alpha, tensor): def _insert_smooth_mul_op(self, scales): """Insert the Mul after inupt. - + The ops with the same input will share one mul layer. - + Args: scales (dict): The smooth scales """ for key in scales.keys(): input_name = key if not self.scales_per_op else self.model.get_node(key).input[0] - weight_name = self.tensors_to_node[key][0][1][1] if not self.scales_per_op \ - else self.model.get_node(key).input[1] + weight_name = ( + self.tensors_to_node[key][0][1][1] if not self.scales_per_op else self.model.get_node(key).input[1] + ) scale_factor = 1.0 / scales[key] - if len(self.shape_info[weight_name]) == 3 or \ - len(self.shape_info[weight_name]) == 2: # the last dim is input channel + if ( + len(self.shape_info[weight_name]) == 3 or len(self.shape_info[weight_name]) == 2 + ): # the last dim is input channel pass elif len(self.shape_info[weight_name]) == 4: scale_factor = np.reshape(scale_factor, (1, -1, 1, 1)) @@ -568,14 +645,15 @@ def _insert_smooth_mul_op(self, scales): name=key + "_" + "smooth_scale", data_type=onnx_proto.TensorProto.FLOAT, dims=scale_factor.shape, - vals=scale_factor.flatten().tolist()) + vals=scale_factor.flatten().tolist(), + ) self.new_init_tensors.append(scale_tensor) mul_output_name = key + "_smooth_output" mul_node = helper.make_node( "Mul", inputs=[input_name, key + "_" + "smooth_scale"], outputs=[mul_output_name], - name=key + "_smooth_mul" + name=key + "_smooth_mul", ) self.new_added_mul_nodes.append(mul_node) if input_name in self.value_infos: @@ -587,10 +665,10 @@ def _insert_smooth_mul_op(self, scales): else: for node_info in self.tensors_to_node[key]: self.replace_input.append([self.model.get_node(node_info[0]), key, mul_output_name]) - + def _adjust_weights(self, scales): """Adjust the weights with scale. - + Args: scales (dict): The input scales """ @@ -602,25 +680,30 @@ def _adjust_weights(self, scales): input = node_info[1][1] node = self.model.input_name_to_nodes[input][0] weight = numpy_helper.to_array( - self.model.get_initializer(input), - base_dir=os.path.dirname(self.model.model_path) if \ - self.model.model_path is not None else "") + self.model.get_initializer(input), + base_dir=os.path.dirname(self.model.model_path) if self.model.model_path is not None else "", + ) if len(weight.shape) == 2: - scale = np.expand_dims(scales[key], axis=0) if \ - node.op_type == 'Gemm' and is_B_transposed(node) else\ - np.expand_dims(scales[key], axis=-1) + scale = ( + np.expand_dims(scales[key], axis=0) + if node.op_type == "Gemm" and is_B_transposed(node) + else np.expand_dims(scales[key], axis=-1) + ) new_weight = weight * scale elif len(weight.shape) == 4: # TODO need to check conv node = self.model.input_name_to_nodes[input][0] - if weight.shape[1] == 1 and "group" in [i.name for i in node.attribute] and \ - [i for i in node.attribute if i.name == "group"][0].i > 1: + if ( + weight.shape[1] == 1 + and "group" in [i.name for i in node.attribute] + and [i for i in node.attribute if i.name == "group"][0].i > 1 + ): scale = np.reshape(scales[key], (-1, 1, 1, 1)) else: scale = np.reshape(scales[key], (1, -1, 1, 1)) new_weight = weight * scale else: assert False, "not support" - self.tensor_scales_info[key] = 1. / scale + self.tensor_scales_info[key] = 1.0 / scale new_tensor = numpy_helper.from_array(new_weight, input) self.model.get_initializer(input).CopyFrom(new_tensor) diff --git a/neural_compressor/adaptor/ox_utils/util.py b/neural_compressor/adaptor/ox_utils/util.py index 8fe8d8443bb..253a3c6626e 100644 --- a/neural_compressor/adaptor/ox_utils/util.py +++ b/neural_compressor/adaptor/ox_utils/util.py @@ -17,86 +17,92 @@ """Helper classes or functions for onnxrt adaptor.""" import os +from enum import Enum + import numpy as np + from neural_compressor.utils.utility import LazyImport -from enum import Enum -helper = LazyImport('onnx.helper') -numpy_helper = LazyImport('onnx.numpy_helper') -onnx_proto = LazyImport('onnx.onnx_pb') -torch = LazyImport('torch') +helper = LazyImport("onnx.helper") +numpy_helper = LazyImport("onnx.numpy_helper") +onnx_proto = LazyImport("onnx.onnx_pb") +torch = LazyImport("torch") __producer__ = "onnx.quantize" __version__ = "0.1.0" onnx_domain = "ai.onnx" -ms_domain = "com.microsoft" +ms_domain = "com.microsoft" support_pair = { - 'float32 bfloat16': True, - '1 16': True, - 'bfloat16 float32': True, - '16 1': True, - 'uint8 uint8': True, - '2 2': True, - 'float16 float16': True, - '10 10': True, - 'bfloat16 bfloat16': True, - '16 16': True, - 'float32 float16': True, - '1 10': True, - 'float16 float32': True, - '10 1': True + "float32 bfloat16": True, + "1 16": True, + "bfloat16 float32": True, + "16 1": True, + "uint8 uint8": True, + "2 2": True, + "float16 float16": True, + "10 10": True, + "bfloat16 bfloat16": True, + "16 16": True, + "float32 float16": True, + "1 10": True, + "float16 float32": True, + "10 1": True, } dtype_mapping = { - 'fp32': 1, - 'uint8': 2, - 'int8': 3, - 'uint16': 4, - 'int16': 5, - 'int32': 6, - 'int64': 7, - 'string': 8, - 'bool': 9, - 'fp16': 10, - 'double': 11, - 'uint32': 12, - 'uint64': 13, - 'complex64': 14, - 'complex128': 15, - 'bf16': 16 + "fp32": 1, + "uint8": 2, + "int8": 3, + "uint16": 4, + "int16": 5, + "int32": 6, + "int64": 7, + "string": 8, + "bool": 9, + "fp16": 10, + "double": 11, + "uint32": 12, + "uint64": 13, + "complex64": 14, + "complex128": 15, + "bf16": 16, } PROVIDERS = { - 'default': 'CPUExecutionProvider', - 'onnxrt_trt_ep': 'TensorrtExecutionProvider', - 'onnxrt_dnnl_ep': 'DnnlExecutionProvider', - 'onnxrt_cuda_ep': 'CUDAExecutionProvider', - 'onnxrt_dml_ep': 'DmlExecutionProvider' + "default": "CPUExecutionProvider", + "onnxrt_trt_ep": "TensorrtExecutionProvider", + "onnxrt_dnnl_ep": "DnnlExecutionProvider", + "onnxrt_cuda_ep": "CUDAExecutionProvider", + "onnxrt_dml_ep": "DmlExecutionProvider", } ONNXRT_BACKENDS = { - 'CPUExecutionProvider': 'default', - 'TensorrtExecutionProvider': 'onnxrt_trt_ep', - 'CUDAExecutionProvider': 'onnxrt_cuda_ep', - 'DnnlExecutionProvider': 'onnxrt_dnnl_ep', - 'DmlExecutionProvider': 'onnxrt_dml_ep' + "CPUExecutionProvider": "default", + "TensorrtExecutionProvider": "onnxrt_trt_ep", + "CUDAExecutionProvider": "onnxrt_cuda_ep", + "DnnlExecutionProvider": "onnxrt_dnnl_ep", + "DmlExecutionProvider": "onnxrt_dml_ep", } + def dtype_to_name(dtype_mapping, dtype): """Map data type and its string representation.""" return list(dtype_mapping.keys())[list(dtype_mapping.values()).index(dtype)] -class QuantType(Enum): # pragma: no cover + +class QuantType(Enum): # pragma: no cover """Represent QuantType value.""" QInt8 = 0 QUInt8 = 1 + def make_quant_node(name, inputs, outputs): """Make a QuantizeLinear node.""" return helper.make_node("QuantizeLinear", inputs, outputs, name) + def make_dquant_node(name, inputs, outputs, axis=None): """Make a DequantizeLinear node.""" if axis is not None: @@ -104,6 +110,7 @@ def make_dquant_node(name, inputs, outputs, axis=None): else: return helper.make_node("DequantizeLinear", inputs, outputs, name) + def is_B_transposed(node): """Wheter inuput B is transposed.""" transB = [attr for attr in node.attribute if attr.name == "transB"] @@ -111,6 +118,7 @@ def is_B_transposed(node): return 0 < helper.get_attribute_value(transB[0]) return False + def _get_qrange_for_qType(qType, reduce_range=False): """Helper function to get the quantization range for a type. @@ -124,48 +132,53 @@ def _get_qrange_for_qType(qType, reduce_range=False): # [-64, 64] for reduce_range, and [-127, 127] full_range. return 128 if reduce_range else 254 else: - raise ValueError('unsupported quantization data type') + raise ValueError("unsupported quantization data type") + def split_shared_bias(model): """Split shared tensor.""" for input_name, node_list in model.input_name_to_nodes.items(): if len(node_list) > 1 and input_name in [i.name for i in model.model.graph.initializer]: for node in node_list[1:]: - if node.op_type not in ['Conv', 'FusedConv']: + if node.op_type not in ["Conv", "FusedConv"]: continue if len(node.input) > 2 and node.input[2] == input_name: - new_input_name = node.input[2] + '_nc_split_' + node.name + new_input_name = node.input[2] + "_nc_split_" + node.name new_input = helper.make_tensor( - new_input_name, - model.get_initializer(input_name).data_type, - model.get_initializer(input_name).dims, - model.get_initializer(input_name).raw_data, - True) + new_input_name, + model.get_initializer(input_name).data_type, + model.get_initializer(input_name).dims, + model.get_initializer(input_name).raw_data, + True, + ) model.add_initializer(new_input) node.input[2] = new_input_name - return model + return model + def float_to_float16(tensor): """Convert float to float16.""" min_val = 5.96e-08 max_val = 65504.0 - tensor[(tensor > max_val) & (tensor < float('inf'))] = max_val + tensor[(tensor > max_val) & (tensor < float("inf"))] = max_val tensor[(tensor < min_val) & (tensor > 0)] = min_val tensor[(tensor > -min_val) & (tensor < 0)] = -min_val - tensor[(tensor < -max_val) & (tensor > float('-inf'))] = -max_val + tensor[(tensor < -max_val) & (tensor > float("-inf"))] = -max_val return np.float16(tensor) + def float_to_bfloat16(tensor): """Convert float to bfloat16.""" min_val = 9.2e-41 max_val = 3.38953139e38 - tensor[(tensor > max_val) & (tensor < float('inf'))] = max_val + tensor[(tensor > max_val) & (tensor < float("inf"))] = max_val tensor[(tensor < min_val) & (tensor > 0)] = min_val tensor[(tensor > -min_val) & (tensor < 0)] = -min_val - tensor[(tensor < -max_val) & (tensor > float('-inf'))] = -max_val + tensor[(tensor < -max_val) & (tensor > float("-inf"))] = -max_val return tensor -def cast_tensor(tensor, dtype): # pragma: no cover + +def cast_tensor(tensor, dtype): # pragma: no cover """Convert tensor float to target dtype. Args: @@ -173,24 +186,23 @@ def cast_tensor(tensor, dtype): # pragma: no cover dtype (int): target data type """ if not isinstance(tensor, onnx_proto.TensorProto): - raise ValueError('Expected input type is an ONNX TensorProto but got %s' % type(tensor)) + raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor)) if tensor.data_type == onnx_proto.TensorProto.FLOAT: val = numpy_helper.to_array(tensor).copy() - if dtype == 'fp16': + if dtype == "fp16": new_val = float_to_float16(val) - elif dtype == 'bf16': + elif dtype == "bf16": new_val = float_to_bfloat16(val) else: - raise ValueError('Expect fp16 or bf16 but get {}.'.format(dtype)) + raise ValueError("Expect fp16 or bf16 but get {}.".format(dtype)) try: new_tensor = helper.make_tensor( - name=tensor.name, - data_type=dtype_mapping[dtype], - dims=numpy_helper.to_array(tensor).shape if \ - len(numpy_helper.to_array(tensor).shape) != 0 else [], - vals=new_val if \ - len(numpy_helper.to_array(tensor)) != 0 else [numpy_helper.to_array(tensor)]) + name=tensor.name, + data_type=dtype_mapping[dtype], + dims=numpy_helper.to_array(tensor).shape if len(numpy_helper.to_array(tensor).shape) != 0 else [], + vals=new_val if len(numpy_helper.to_array(tensor)) != 0 else [numpy_helper.to_array(tensor)], + ) tensor.CopyFrom(new_tensor) except: tensor.float_data[:] = [] @@ -200,6 +212,7 @@ def cast_tensor(tensor, dtype): # pragma: no cover return True return False + def remove_init_from_model_input(model): """Remove initializer from model input.""" inputs = model.model.graph.input @@ -210,6 +223,7 @@ def remove_init_from_model_input(model): if initializer.name in name_to_input: inputs.remove(name_to_input[initializer.name]) + def collate_preds(results): """Collect model outputs.""" batch = results[0] @@ -217,14 +231,15 @@ def collate_preds(results): results = zip(*results) collate_results = [] for output in results: - collate_results.append(np.concatenate(output)) + collate_results.append(np.concatenate(output)) elif isinstance(batch, np.ndarray): collate_results = np.concatenate(results) return collate_results + def quantize_data_with_scale_zero(data, qType, scheme, scale, zero_point): """Quantize data with scale and zero point. - + To pack weights, we compute a linear transformation - when data type == uint8 mode, from [rmin, rmax] -> [0, 2^{b-1}] and - when data type == int8, from [-m , m] -> [-(2^{b-1}-1), 2^{b-1}-1] where @@ -238,57 +253,66 @@ def quantize_data_with_scale_zero(data, qType, scheme, scale, zero_point): zero_point (uint8 or int8): computed zero point of quantized data """ data = np.asarray(data) - if qType == onnx_proto.TensorProto.INT8 and scheme == 'sym': + if qType == onnx_proto.TensorProto.INT8 and scheme == "sym": # signed byte type - quantized_data = (data.astype(np.float32) / scale).round().astype('b') - elif qType == onnx_proto.TensorProto.UINT8 and scheme == 'asym': - quantized_data = ((data.astype(np.float32) / scale).round() + zero_point).astype('B') + quantized_data = (data.astype(np.float32) / scale).round().astype("b") + elif qType == onnx_proto.TensorProto.UINT8 and scheme == "asym": + quantized_data = ((data.astype(np.float32) / scale).round() + zero_point).astype("B") else: - raise ValueError("Unexpected combination of data type {} and scheme {}.".format( - qType, scheme)) + raise ValueError("Unexpected combination of data type {} and scheme {}.".format(qType, scheme)) return quantized_data + def calculate_scale_zp(rmin, rmax, quantize_range, qType, scheme): """Calculate scale and zero point.""" if isinstance(rmax, np.ndarray): - if scheme == 'sym': + if scheme == "sym": max_range = np.maximum(abs(rmin), abs(rmax)) - scale = np.ones(rmax.shape, dtype='float32') - scale[max_range > 0] = np.array([float(i) / quantize_range for i in \ - (max_range[max_range > 0] * 2.).flatten().tolist()], dtype='float32') + scale = np.ones(rmax.shape, dtype="float32") + scale[max_range > 0] = np.array( + [float(i) / quantize_range for i in (max_range[max_range > 0] * 2.0).flatten().tolist()], + dtype="float32", + ) else: - scale = np.ones(rmax.shape, dtype='float32') - scale[rmin != rmax] = np.array([float(i) / quantize_range for i in \ - (rmax - rmin)[rmin != rmax].flatten().tolist()], dtype='float32') + scale = np.ones(rmax.shape, dtype="float32") + scale[rmin != rmax] = np.array( + [float(i) / quantize_range for i in (rmax - rmin)[rmin != rmax].flatten().tolist()], dtype="float32" + ) - if scheme == 'sym' and qType == onnx_proto.TensorProto.INT8: - zero_point = np.zeros(scale.shape, dtype='int8') if isinstance(scale, np.ndarray) else 0 + if scheme == "sym" and qType == onnx_proto.TensorProto.INT8: + zero_point = np.zeros(scale.shape, dtype="int8") if isinstance(scale, np.ndarray) else 0 elif isinstance(scale, np.ndarray) and (scale == 1).all(): - zero_point = np.zeros(scale.shape, dtype='int8') if qType == onnx_proto.TensorProto.INT8 \ - else np.zeros(scale.shape, dtype='uint8') + zero_point = ( + np.zeros(scale.shape, dtype="int8") + if qType == onnx_proto.TensorProto.INT8 + else np.zeros(scale.shape, dtype="uint8") + ) elif qType == onnx_proto.TensorProto.UINT8: - zero_point = np.maximum(0, np.minimum(255, ((0 - float(rmin)) / scale).round()).round()).astype('uint8') + zero_point = np.maximum(0, np.minimum(255, ((0 - float(rmin)) / scale).round()).round()).astype("uint8") else: - zero_point = ((-64 - rmin) / float(scale) if quantize_range == 128 \ - else (-127 - rmin) / float(scale)).round() + zero_point = ( + (-64 - rmin) / float(scale) if quantize_range == 128 else (-127 - rmin) / float(scale) + ).round() else: - if scheme == 'sym': + if scheme == "sym": max_range = max(abs(rmin), abs(rmax)) scale = (float(max_range) * 2) / quantize_range if max_range > 0 else 1 else: scale = (float(rmax) - float(rmin)) / quantize_range if rmin != rmax else 1 - if scale == 1 or (scheme == 'sym' and qType == onnx_proto.TensorProto.INT8): + if scale == 1 or (scheme == "sym" and qType == onnx_proto.TensorProto.INT8): zero_point = 0 elif qType == onnx_proto.TensorProto.UINT8: zero_point = round((0 - float(rmin)) / scale) zero_point = np.uint8(round(max(0, min(255, zero_point)))) else: - zero_point = round((-64 - float(rmin)) / scale) if quantize_range == 128 \ - else round((-127 - float(rmin)) / scale) + zero_point = ( + round((-64 - float(rmin)) / scale) if quantize_range == 128 else round((-127 - float(rmin)) / scale) + ) return scale, zero_point + def quantize_data(data, quantize_range, qType, scheme): """Quantize data. @@ -316,6 +340,7 @@ def quantize_data(data, quantize_range, qType, scheme): quantized_data = quantize_data_with_scale_zero(data, qType, scheme, scale, zero_point) return rmin, rmax, zero_point, scale, quantized_data + def quantize_data_per_channel(data, axis, quantize_range, qType, scheme): """Quantize tensor per-channel.""" rmin = None @@ -323,50 +348,49 @@ def quantize_data_per_channel(data, axis, quantize_range, qType, scheme): for i in range(len(data.shape)): if i != axis: rmin = np.min(data, axis=i, keepdims=True) if rmin is None else np.min(rmin, axis=i, keepdims=True) - rmax = np.max(data, axis=i, keepdims=True) if rmax is None else np.max(rmax, axis=i, keepdims=True) + rmax = np.max(data, axis=i, keepdims=True) if rmax is None else np.max(rmax, axis=i, keepdims=True) rmin = np.minimum(rmin, 0) rmax = np.maximum(rmax, 0) scale, zero_point = calculate_scale_zp(rmin, rmax, quantize_range, qType, scheme) quantized_data = quantize_data_with_scale_zero(data, qType, scheme, scale, zero_point) return rmin.reshape(-1, 1), rmax.reshape(-1, 1), zero_point.reshape(-1, 1), scale.reshape(-1, 1), quantized_data -def dequantize_data_with_scale_zero(tensor_value, scale_value, zo_value): # pragma: no cover + +def dequantize_data_with_scale_zero(tensor_value, scale_value, zo_value): # pragma: no cover """Dequantize tensor with sacale and zero point.""" return (tensor_value.astype(np.float32) - zo_value.astype(np.float32)) * scale_value -def dequantize_data(tensor_value, scale_value, zo_value, axis=0): # pragma: no cover + +def dequantize_data(tensor_value, scale_value, zo_value, axis=0): # pragma: no cover """Dequantize tensor.""" if scale_value.size == 1: return dequantize_data_with_scale_zero(tensor_value, scale_value, zo_value) else: - channel_count = tensor_value.shape[axis] # TBD, default from axis 0 + channel_count = tensor_value.shape[axis] # TBD, default from axis 0 new_per_channel_tensor_values = [] for i in range(channel_count): per_channel_tensor_value = tensor_value.take(i, 0) per_channel_scale_value = scale_value.take(i) per_channel_zero_value = zo_value.take(i) - new_per_channel_tensor_values.append(dequantize_data_with_scale_zero(\ - per_channel_tensor_value, - per_channel_scale_value, - per_channel_zero_value)) + new_per_channel_tensor_values.append( + dequantize_data_with_scale_zero( + per_channel_tensor_value, per_channel_scale_value, per_channel_zero_value + ) + ) # combine per_channel_data into one reshape_dims = list(tensor_value.shape) # deep copy reshape_dims[0] = 1 # only one per channel for reshape new_tensor_value = new_per_channel_tensor_values[0].reshape(reshape_dims) for i in range(1, channel_count): - new_per_channel_tensor_value = new_per_channel_tensor_values[i].\ - reshape(reshape_dims) - new_tensor_value = np.concatenate((new_tensor_value, \ - new_per_channel_tensor_value), 0) + new_per_channel_tensor_value = new_per_channel_tensor_values[i].reshape(reshape_dims) + new_tensor_value = np.concatenate((new_tensor_value, new_per_channel_tensor_value), 0) return new_tensor_value -class ValueInfo: # pragma: no cover + +class ValueInfo: # pragma: no cover """Represents a casted tensor info.""" - def __init__(self, - tensor_name, - dtype, - new_dtype): + def __init__(self, tensor_name, dtype, new_dtype): """Initialization. Args: @@ -378,17 +402,20 @@ def __init__(self, self.dtype = dtype self.new_dtype = new_dtype + class QuantizedValue: """Represents a linearly quantized value (input/output/intializer).""" - def __init__(self, - name, - new_quantized_name, - scale_name, - zero_point_name, - quantized_value_type, - axis=None, - qType=QuantType.QUInt8): + def __init__( + self, + name, + new_quantized_name, + scale_name, + zero_point_name, + quantized_value_type, + axis=None, + qType=QuantType.QUInt8, + ): """Initialization. Args: @@ -408,20 +435,23 @@ def __init__(self, self.axis = axis self.qType = qType + class QuantizedInitializer: """Represents a linearly quantized weight input from ONNX operators.""" - def __init__(self, - name, - initializer, - rmins, - rmaxs, - zero_points, - scales, - data=[], - quantized_data=[], - axis=None, - qType=QuantType.QUInt8): + def __init__( + self, + name, + initializer, + rmins, + rmaxs, + zero_points, + scales, + data=[], + quantized_data=[], + axis=None, + qType=QuantType.QUInt8, + ): """Initialization. Args: @@ -451,21 +481,27 @@ def __init__(self, self.qType = qType -class QuantizationMode(Enum): # pragma: no cover +class QuantizationMode(Enum): # pragma: no cover """Represent QuantizationMode value.""" + IntegerOps = 0 QLinearOps = 1 -class QuantizedValueType(Enum): # pragma: no cover + +class QuantizedValueType(Enum): # pragma: no cover """Represent QuantizedValueType value.""" + Input = 0 Initializer = 1 -class QuantFormat(Enum): # pragma: no cover + +class QuantFormat(Enum): # pragma: no cover """Represent QuantFormat value.""" + QOperator = 0 QDQ = 1 + def quantize_nparray(qtype, arr, scale, zero_point, low=None, high=None): """Quantize numpy array.""" dtype = np.uint8 if qtype == "uint8" else np.int8 @@ -475,6 +511,7 @@ def quantize_nparray(qtype, arr, scale, zero_point, low=None, high=None): np.clip(arr_fp32, cliplow, cliphigh, out=arr_fp32) return arr_fp32.astype(dtype) + def attribute_to_kwarg(attribute): """Convert attribute to kwarg format for use with onnx.helper.make_node.""" attribute_mapping = { @@ -487,22 +524,22 @@ def attribute_to_kwarg(attribute): 7: attribute.ints, 8: attribute.strings, 9: attribute.tensors, - 10: attribute.graphs + 10: attribute.graphs, } if attribute.type in attribute_mapping: value = attribute_mapping[attribute.type] - else: # pragma: no cover + else: # pragma: no cover raise ValueError( - 'attribute {} has no type specified ' - 'or unsupported type {}.'.format(attribute.name, attribute.type)) + "attribute {} has no type specified " "or unsupported type {}.".format(attribute.name, attribute.type) + ) return {attribute.name: value} + def find_by_name(name, item_list): """Helper function to find item by name in a list.""" items = [] for item in item_list: - assert hasattr(item, "name"), \ - "{} should have a 'name' atrribute defined".format(item) # pragma: no cover + assert hasattr(item, "name"), "{} should have a 'name' atrribute defined".format(item) # pragma: no cover if item.name == name: items.append(item) if len(items) > 0: @@ -510,6 +547,7 @@ def find_by_name(name, item_list): else: return None + def trt_env_setup(model): """Set environment variable for Tensorrt Execution Provider.""" is_int8 = False @@ -521,21 +559,24 @@ def trt_env_setup(model): os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1" else: os.environ["ORT_TENSORRT_INT8_ENABLE"] = "0" - + + def to_numpy(data): """Convert to numpy ndarrays.""" if not isinstance(data, np.ndarray): if isinstance(data, torch.Tensor): - if data.dtype is torch.bfloat16: # pragma: no cover + if data.dtype is torch.bfloat16: # pragma: no cover return data.detach().cpu().to(torch.float32).numpy() - if data.dtype is torch.chalf: # pragma: no cover + if data.dtype is torch.chalf: # pragma: no cover return data.detach().cpu().to(torch.cfloat).numpy() return data.detach().cpu().numpy() else: try: return np.array(data) except: - assert False, "The input data for onnx model is {}, which is not supported " \ - "to convert to numpy ndarrays.".format(type(data)) + assert False, ( + "The input data for onnx model is {}, which is not supported " + "to convert to numpy ndarrays.".format(type(data)) + ) else: return data diff --git a/neural_compressor/adaptor/ox_utils/weight_only.py b/neural_compressor/adaptor/ox_utils/weight_only.py index 13bdf237cbc..694087420c9 100644 --- a/neural_compressor/adaptor/ox_utils/weight_only.py +++ b/neural_compressor/adaptor/ox_utils/weight_only.py @@ -16,23 +16,26 @@ # limitations under the License. """WeightOnly for onnxrt adaptor.""" -import sys -import os -import math import copy -import onnx import logging +import math +import os +import sys + import numpy as np +import onnx +from onnx import helper, numpy_helper from onnx import onnx_pb as onnx_proto -from neural_compressor.utils.utility import LazyImport + from neural_compressor.model.model import BaseModel from neural_compressor.model.onnx_model import ONNXModel -from onnx import numpy_helper, helper +from neural_compressor.utils.utility import LazyImport ort = LazyImport("onnxruntime") logger = logging.getLogger("neural_compressor") -def qdq_tensor(data, config, ratio=1.): + +def qdq_tensor(data, config, ratio=1.0): """Quant and dequant tensor per group. Args: @@ -47,9 +50,9 @@ def qdq_tensor(data, config, ratio=1.): scheme = config.get("scheme", "asym") if scheme == "sym": maxq = 2 ** (bit - 1) - 1 if bit != 1 else 0 - minq = -2 ** (bit - 1) if bit != 1 else -1 + minq = -(2 ** (bit - 1)) if bit != 1 else -1 elif scheme == "asym": - maxq = 2 ** bit - 1 + maxq = 2**bit - 1 minq = 0 rmin = np.min(data, axis=0, keepdims=True) * ratio @@ -57,29 +60,32 @@ def qdq_tensor(data, config, ratio=1.): if scheme == "sym": max_range = np.maximum(np.abs(rmin), np.abs(rmax)) scale = np.ones(rmax.shape, dtype="float32") - scale[max_range > 0] = np.array([float(i) / (maxq - minq) for i in \ - (max_range[max_range > 0] * 2.).flatten().tolist()], dtype="float32") + scale[max_range > 0] = np.array( + [float(i) / (maxq - minq) for i in (max_range[max_range > 0] * 2.0).flatten().tolist()], dtype="float32" + ) zero_point = np.zeros(scale.shape) else: scale = np.ones(rmax.shape, dtype="float32") - scale[rmin != rmax] = np.array([float(i) / (maxq - minq) for i in \ - (rmax - rmin)[rmin != rmax].flatten().tolist()], dtype="float32") + scale[rmin != rmax] = np.array( + [float(i) / (maxq - minq) for i in (rmax - rmin)[rmin != rmax].flatten().tolist()], dtype="float32" + ) zero_point = ((np.zeros(scale.shape) - rmin) / scale).round() return scale * (np.clip((data / scale + zero_point).round(), minq, maxq) - zero_point) + def rtn_quantize(model, tune_cfg, ratios={}): """Quant the model with round to nearst method. Args: model (ModelProto or ONNXModel): onnx model tune_cfg (dict): quantization config - For example, + For example, tune_cfg={ 'fc2': { - 'bits': 4, - 'group_size': 32, + 'bits': 4, + 'group_size': 32, 'scheme': 'sym', 'algorithm': 'RTN' } @@ -89,16 +95,16 @@ def rtn_quantize(model, tune_cfg, ratios={}): Returns: model: fake quantized ONNXModel """ - model = model if isinstance(model, BaseModel) else ONNXModel(model) + model = model if isinstance(model, BaseModel) else ONNXModel(model) for node in model.nodes(): if node.op_type in ["MatMul", "Attention"] and model.get_initializer(node.input[1]) is not None: weight = numpy_helper.to_array( - model.get_initializer(node.input[1]), - base_dir=os.path.dirname(model.model_path)).copy() + model.get_initializer(node.input[1]), base_dir=os.path.dirname(model.model_path) + ).copy() dtype = weight.dtype config = tune_cfg[node.name].get("weight", {}) - org_w_shape = weight.shape # ic, oc + org_w_shape = weight.shape # ic, oc group_size = config.get("group_size", -1) if config.get("group_size", -1) != -1 else org_w_shape[0] if org_w_shape[0] % group_size == 0: @@ -115,15 +121,15 @@ def rtn_quantize(model, tune_cfg, ratios={}): model.set_initializer(node.input[1], weight.astype(dtype), raw=True) return model + def get_weight_scale(weight, group_size): """Get the scale of weight.""" org_shape = weight.shape weight = np.reshape(weight, (group_size, -1)) if group_size != -1 else weight - scale = np.mean( - np.reshape(np.abs(weight) / np.max(np.abs(weight), axis=0, keepdims=True), org_shape), - axis=1) + scale = np.mean(np.reshape(np.abs(weight) / np.max(np.abs(weight), axis=0, keepdims=True), org_shape), axis=1) return scale + def apply_awq_scale(model, tune_cfg, absorb_pairs, output_dicts): """Apply scale for salient weight.""" best_scales = {} @@ -131,18 +137,21 @@ def apply_awq_scale(model, tune_cfg, absorb_pairs, output_dicts): new_added_mul_nodes = [] replace_input = [] updated_nodes = [] - + for parent, nodes in absorb_pairs.items(): if any([node.input[0] not in output_dicts for node in nodes]): - logger.warning("Miss input tensors of nodes {} during AWQ, skip it!".format( - ', '.join([node.name for node in nodes if node.input[0] not in output_dicts]))) + logger.warning( + "Miss input tensors of nodes {} during AWQ, skip it!".format( + ", ".join([node.name for node in nodes if node.input[0] not in output_dicts]) + ) + ) continue inp = np.concatenate(output_dicts[nodes[0].input[0]], axis=0) inp_scale = np.mean(np.reshape(np.abs(inp), (-1, inp[0].shape[-1])), axis=0) weight = [] org_out = [] config = tune_cfg.get(nodes[0].name, {}) - + # search scale best_error = float("inf") best_ratio = -1 @@ -153,8 +162,7 @@ def apply_awq_scale(model, tune_cfg, absorb_pairs, output_dicts): ratio = ratio * 1 / n_grid loss = 0 for node in nodes: - weight = numpy_helper.to_array(model.get_initializer(node.input[1]), - os.path.dirname(model.model_path)) + weight = numpy_helper.to_array(model.get_initializer(node.input[1]), os.path.dirname(model.model_path)) w_scale = get_weight_scale(weight, config.get("weight", {}).get("group_size", -1)) org_out = np.matmul(inp, weight) scales = np.clip(np.power(inp_scale, ratio) / np.power(w_scale, (1 - ratio)), 1e-4, None) @@ -171,8 +179,7 @@ def apply_awq_scale(model, tune_cfg, absorb_pairs, output_dicts): best_scale = scales for node in nodes: - tensor = numpy_helper.to_array(model.get_initializer(node.input[1]), - os.path.dirname(model.model_path)) + tensor = numpy_helper.to_array(model.get_initializer(node.input[1]), os.path.dirname(model.model_path)) new_tensor = tensor * best_scale model.set_initializer(node.input[1], new_tensor.astype(tensor.dtype), raw=True) output_dicts[node.input[0]] = output_dicts[node.input[0]] / np.reshape(best_scale, (1, -1)) @@ -183,27 +190,27 @@ def apply_awq_scale(model, tune_cfg, absorb_pairs, output_dicts): if parent.op_type in ["LayerNormalization", "BatchNormalization", "InstanceNormalization"]: # pragma: no cover for idx in [1, 2]: - tensor = numpy_helper.to_array(model.get_initializer(parent.input[idx]), - os.path.dirname(model.model_path)) + tensor = numpy_helper.to_array( + model.get_initializer(parent.input[idx]), os.path.dirname(model.model_path) + ) new_tensor = tensor / np.reshape(best_scale, (1, -1)) model.set_initializer(parent.input[idx], new_tensor.astype(tensor.dtype), raw=True) updated_nodes.append(parent.name) output_dicts[parent.output[0]] = output_dicts[parent.output[0]] / np.reshape(best_scale, (1, -1)) - elif parent.op_type in ["SimplifiedLayerNormalization", "MatMul", "Gemm", "Mul"] and \ - not all([model.get_initializer(inp) is None for inp in parent.input]): + elif parent.op_type in ["SimplifiedLayerNormalization", "MatMul", "Gemm", "Mul"] and not all( + [model.get_initializer(inp) is None for inp in parent.input] + ): for inp in parent.input: if model.get_initializer(inp) is not None: - tensor = numpy_helper.to_array(model.get_initializer(inp), - os.path.dirname(model.model_path)) + tensor = numpy_helper.to_array(model.get_initializer(inp), os.path.dirname(model.model_path)) new_tensor = tensor / np.reshape(best_scale, (1, -1)) model.set_initializer(inp, new_tensor.astype(tensor.dtype), raw=True) updated_nodes.append(parent.name) output_dicts[parent.output[0]] = output_dicts[parent.output[0]] / np.reshape(best_scale, (1, -1)) elif parent.op_type in ["Conv", "FusedConv"]: # pragma: no cover - tensor = numpy_helper.to_array(model.get_initializer(parent.input[2]), - os.path.dirname(model.model_path)) + tensor = numpy_helper.to_array(model.get_initializer(parent.input[2]), os.path.dirname(model.model_path)) new_tensor = tensor / np.reshape(best_scale, (1, -1)) model.set_initializer(parent.input[2], new_tensor.astype(tensor.dtype), raw=True) updated_nodes.append(parent.name) @@ -212,24 +219,25 @@ def apply_awq_scale(model, tune_cfg, absorb_pairs, output_dicts): else: # pragma: no cover # insert mul scale_tensor = helper.make_tensor( - name=parent.output[0] + "_weight_only_scale", - data_type=onnx_proto.TensorProto.FLOAT, - dims=best_scale.shape, - vals=(1. / best_scale).flatten().tolist()) + name=parent.output[0] + "_weight_only_scale", + data_type=onnx_proto.TensorProto.FLOAT, + dims=best_scale.shape, + vals=(1.0 / best_scale).flatten().tolist(), + ) new_init_tensors.append(scale_tensor) mul_output_name = parent.output[0] + "_weight_only_out" mul_node = helper.make_node( - "Mul", - inputs=[nodes[0].input[0], scale_tensor.name], - outputs=[mul_output_name], - name=nodes[0].input[0] + "_weight_only_mul" - ) + "Mul", + inputs=[nodes[0].input[0], scale_tensor.name], + outputs=[mul_output_name], + name=nodes[0].input[0] + "_weight_only_mul", + ) new_added_mul_nodes.append(mul_node) for node in nodes: replace_input.append([node, node.input[0], mul_node.output[0]]) updated_nodes.append(parent.name) output_dicts[mul_node.output[0]] = output_dicts[mul_node.input[0]] / np.reshape(best_scale, (1, -1)) - + model.add_nodes(new_added_mul_nodes) model.add_initializers(new_init_tensors) for node, old_input_name, new_input_name in replace_input: @@ -237,13 +245,17 @@ def apply_awq_scale(model, tune_cfg, absorb_pairs, output_dicts): return model, output_dicts + def apply_awq_clip(model, tune_cfg, absorb_pairs, output_dicts): """Apply clip for weight by checking mse.""" ratios = {} for parent, nodes in absorb_pairs.items(): if any([node.input[0] not in output_dicts for node in nodes]): - logger.warning("Miss input tensors of nodes {} during AWQ, skip it!".format( - ', '.join([node.name for node in nodes if node.input[0] not in output_dicts]))) + logger.warning( + "Miss input tensors of nodes {} during AWQ, skip it!".format( + ", ".join([node.name for node in nodes if node.input[0] not in output_dicts]) + ) + ) continue inp = np.concatenate(output_dicts[nodes[0].input[0]], axis=0) @@ -251,11 +263,11 @@ def apply_awq_clip(model, tune_cfg, absorb_pairs, output_dicts): for node in nodes: config = tune_cfg.get(node.name, {}) org_weight = numpy_helper.to_array( - model.get_initializer(node.input[1]), - base_dir=os.path.dirname(model.model_path)) - org_w_shape = org_weight.shape # ic, oc + model.get_initializer(node.input[1]), base_dir=os.path.dirname(model.model_path) + ) + org_w_shape = org_weight.shape # ic, oc group_size = config.get("group_size", -1) if config.get("group_size", -1) != -1 else org_w_shape[0] - org_out = np.matmul(inp, org_weight) # n_token, oc + org_out = np.matmul(inp, org_weight) # n_token, oc best_error = float("inf") best_ratio = 1 @@ -273,7 +285,7 @@ def apply_awq_clip(model, tune_cfg, absorb_pairs, output_dicts): part_weight = qdq_tensor(part_weight, config, ratios.get(node.input[1], 1)) weight[:index, :] = part_weight.reshape(index, -1) weight[index:, :] = qdq_tensor(weight[index:, :], config, ratios.get(node.input[1], 1)) - + cur_out = np.matmul(inp, weight) loss = np.mean(np.power((org_out - cur_out), 2)) is_best = loss < best_error @@ -284,6 +296,7 @@ def apply_awq_clip(model, tune_cfg, absorb_pairs, output_dicts): model = rtn_quantize(model, tune_cfg, ratios) return model + def prepare_inputs(model, n_samples, dataloader): """Prepare inputs for weight only quantization. @@ -297,27 +310,28 @@ def prepare_inputs(model, n_samples, dataloader): so: session options """ from importlib.util import find_spec + from neural_compressor.adaptor.ox_utils.util import to_numpy - + so = ort.SessionOptions() - if sys.version_info < (3, 11) and find_spec('onnxruntime_extensions'): # pragma: no cover + if sys.version_info < (3, 11) and find_spec("onnxruntime_extensions"): # pragma: no cover from onnxruntime_extensions import get_library_path + so.register_custom_ops_library(get_library_path()) if model.is_large_model: - onnx.save_model(model.model, - model.model_path + '_augment.onnx', - save_as_external_data=True, - all_tensors_to_one_file=True, - convert_attribute=False) - - session = ort.InferenceSession( - model.model.SerializeToString(), - so, - providers=ort.get_available_providers()) if not model.is_large_model else \ - ort.InferenceSession( - model.model_path + '_augment.onnx', - so, - providers=ort.get_available_providers()) + onnx.save_model( + model.model, + model.model_path + "_augment.onnx", + save_as_external_data=True, + all_tensors_to_one_file=True, + convert_attribute=False, + ) + + session = ( + ort.InferenceSession(model.model.SerializeToString(), so, providers=ort.get_available_providers()) + if not model.is_large_model + else ort.InferenceSession(model.model_path + "_augment.onnx", so, providers=ort.get_available_providers()) + ) inputs_names = [i.name for i in session.get_inputs()] del session @@ -326,34 +340,29 @@ def prepare_inputs(model, n_samples, dataloader): if ((i + 1) * dataloader.batch_size) > n_samples: break if len(inputs_names) != 1 or isinstance(data[0], dict): - assert len(data[0]) == len(inputs_names), "Input number mismatch, " \ - "require {} but get {}".format(len(inputs_names), len(data[0])) - + assert len(data[0]) == len(inputs_names), "Input number mismatch, " "require {} but get {}".format( + len(inputs_names), len(data[0]) + ) + if isinstance(data[0], dict): inputs.append(dict([(name, to_numpy(inp_data)) for name, inp_data in data[0].items()])) else: inputs.append(dict([(name, to_numpy(inp)) for name, inp in zip(inputs_names, data[0])])) return inputs, so -def awq_quantize(model, - tune_cfg, - dataloader, - n_samples=128, - auto_scale=True, - mse_range=True, - n_blocks=5 - ): + +def awq_quantize(model, tune_cfg, dataloader, n_samples=128, auto_scale=True, mse_range=True, n_blocks=5): """Quant the model with Activation-aware Weight quantization(AWQ) method. Args: model (ModelProto or ONNXModel): onnx model tune_cfg (dict): quantization config - For example, + For example, tune_cfg={ 'fc2': { - 'bits': 4, - 'group_size': 32, + 'bits': 4, + 'group_size': 32, 'scheme': 'sym', 'algorithm': 'AWQ' } @@ -388,20 +397,21 @@ def awq_quantize(model, model.add_tensors_to_outputs(dump_tensor) if model.is_large_model: - onnx.save_model(model.model, - model.model_path + '_augment.onnx', - save_as_external_data=True, - all_tensors_to_one_file=True, - convert_attribute=False) - - session = ort.InferenceSession( - model.model.SerializeToString(), - so, - providers=ort.get_available_providers()) if not model.is_large_model else \ - ort.InferenceSession( - model.model_path + '_augment.onnx', - so, - providers=ort.get_available_providers()) + onnx.save_model( + model.model, + model.model_path + "_augment.onnx", + save_as_external_data=True, + all_tensors_to_one_file=True, + convert_attribute=False, + ) + + session = ( + ort.InferenceSession(model.model.SerializeToString(), so, providers=ort.get_available_providers()) + if not model.is_large_model + else ort.InferenceSession( + model.model_path + "_augment.onnx", so, providers=ort.get_available_providers() + ) + ) for inp in inputs: for output_idx, output in enumerate(session.run(None, inp)): @@ -420,7 +430,8 @@ def awq_quantize(model, model.model.graph.output.MergeFrom(org_output) return model -def gptq(Ws, Hs, config, blocksize=128, percdamp=.01, actorder=False, mse=False, perchannel=True): + +def gptq(Ws, Hs, config, blocksize=128, percdamp=0.01, actorder=False, mse=False, perchannel=True): """Quant the model with Activation-aware Weight quantization(AWQ) method. Args: @@ -440,10 +451,10 @@ def gptq(Ws, Hs, config, blocksize=128, percdamp=.01, actorder=False, mse=False, group_size = config.get("weight", {}).get("group_size", -1) bits = config.get("weight", {}).get("bits", 8) scheme = config.get("weight", {}).get("scheme", "asym") - maxq = 2 ** bits - 1 - grid=100 - maxshrink=.8 - norm=2.4 + maxq = 2**bits - 1 + grid = 100 + maxshrink = 0.8 + norm = 2.4 def find_params(weight): org_shape = weight.shape @@ -499,7 +510,7 @@ def find_params(weight): scale, zp = find_params(W) dead = np.diag(H) == 0 H[dead, dead] = 1 - W[dead, :] = 0 # such channel makes no contribution to quantization computation + W[dead, :] = 0 # such channel makes no contribution to quantization computation # rearrange considering the diag's value if actorder: @@ -510,7 +521,7 @@ def find_params(weight): Q = np.zeros(W.shape) damp = percdamp * np.mean(np.diag(H)) diag = np.arange(shape[0]) - H[diag, diag] += damp # add a average value of + H[diag, diag] += damp # add a average value of H = np.linalg.cholesky(np.linalg.inv(H)).T Hinv = H for i1 in range(0, shape[0], blocksize): @@ -523,17 +534,17 @@ def find_params(weight): Losses1 = np.zeros(W1.shape) Hinv1 = Hinv[i1:i2, i1:i2] - for i in range(count): # within a block, channel wise + for i in range(count): # within a block, channel wise w = W1[i, :] d = Hinv1[i, i] if group_size != -1: if (i1 + i) % group_size == 0: - scale, zp = find_params(W[(i1 + i):(i1 + i + group_size), :]) + scale, zp = find_params(W[(i1 + i) : (i1 + i + group_size), :]) q = (scale * (np.clip(np.round(np.expand_dims(w, axis=1) / scale) + zp, 0, maxq) - zp)).flatten() Q1[i, :] = q - Losses1[i, :] = (w - q) ** 2 / d ** 2 + Losses1[i, :] = (w - q) ** 2 / d**2 err1 = (w - q) / d W1[i:, :] -= np.matmul(np.expand_dims(Hinv1[i:, i], axis=1), np.expand_dims(err1, axis=0)) @@ -552,27 +563,21 @@ def find_params(weight): del Ws return Qs -def gptq_quantize(model, - tune_cfg, - dataloader, - n_samples=128, - percdamp=.01, - blocksize=128, - actorder=False, - mse=False, - perchannel=True - ): + +def gptq_quantize( + model, tune_cfg, dataloader, n_samples=128, percdamp=0.01, blocksize=128, actorder=False, mse=False, perchannel=True +): """Quant the model with Activation-aware Weight quantization(AWQ) method. Args: model (ModelProto or ONNXModel): onnx model tune_cfg (dict): quantization config - For example, + For example, tune_cfg={ 'fc2': { - 'bits': 4, - 'group_size': 32, + 'bits': 4, + 'group_size': 32, 'scheme': 'sym', 'algorithm': 'GPTQ' } @@ -601,23 +606,26 @@ def gptq_quantize(model, model.add_tensors_to_outputs(dump_tensor) if model.is_large_model: - onnx.save_model(model.model, - model.model_path + '_augment.onnx', - save_as_external_data=True, - all_tensors_to_one_file=True, - convert_attribute=False) - - session = ort.InferenceSession( - model.model.SerializeToString(), - so, - providers=ort.get_available_providers()) if not model.is_large_model else \ - ort.InferenceSession( - model.model_path + '_augment.onnx', - so, - providers=ort.get_available_providers()) - - weights = [copy.deepcopy(numpy_helper.to_array(model.get_initializer(node.input[1]), - os.path.dirname(model.model_path))) for node in nodes] + onnx.save_model( + model.model, + model.model_path + "_augment.onnx", + save_as_external_data=True, + all_tensors_to_one_file=True, + convert_attribute=False, + ) + + session = ( + ort.InferenceSession(model.model.SerializeToString(), so, providers=ort.get_available_providers()) + if not model.is_large_model + else ort.InferenceSession(model.model_path + "_augment.onnx", so, providers=ort.get_available_providers()) + ) + + weights = [ + copy.deepcopy( + numpy_helper.to_array(model.get_initializer(node.input[1]), os.path.dirname(model.model_path)) + ) + for node in nodes + ] Hs = [np.zeros((i.shape[0], i.shape[0])) for i in weights] nsamples = 0 for inp in inputs: @@ -635,15 +643,16 @@ def gptq_quantize(model, Hs = [i + np.matmul(inp.T, inp) for i in Hs] model.remove_tensors_from_outputs(dump_tensor) - weights = gptq(weights, - Hs, - tune_cfg.get(nodes[0].name, {}), - blocksize=blocksize, - percdamp=percdamp, - actorder=actorder, - mse=mse, - perchannel=perchannel - ) + weights = gptq( + weights, + Hs, + tune_cfg.get(nodes[0].name, {}), + blocksize=blocksize, + percdamp=percdamp, + actorder=actorder, + mse=mse, + perchannel=perchannel, + ) for name, weight in zip([i.input[1] for i in nodes], weights): model.set_initializer(name, weight, raw=True) model.model.graph.output.MergeFrom(org_output) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 8770d2e7dc5..55445f7ee32 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -21,16 +21,19 @@ import os import re from collections import OrderedDict, UserDict, namedtuple -from packaging.version import Version -import yaml from functools import partial + +import yaml +from packaging.version import Version + from neural_compressor.utils.utility import dump_elapsed_time -from .adaptor import adaptor_registry, Adaptor -from ..utils.utility import LazyImport, CpuInfo, GLOBAL_STATE, MODE -from ..utils.utility import Statistics + +from ..data.dataloaders.base_dataloader import BaseDataLoader from ..utils import logger +from ..utils.utility import GLOBAL_STATE, MODE, CpuInfo, LazyImport, Statistics +from .adaptor import Adaptor, adaptor_registry from .query import QueryBackendCapability -from ..data.dataloaders.base_dataloader import BaseDataLoader + torch = LazyImport("torch") json = LazyImport("json") hvd = LazyImport("horovod.torch") @@ -43,9 +46,9 @@ def get_torch_version(): try: - torch_version = torch.__version__.split('+')[0] + torch_version = torch.__version__.split("+")[0] except ValueError as e: # pragma: no cover - assert False, 'Got an unknown version of torch: {}'.format(e) + assert False, "Got an unknown version of torch: {}".format(e) version = Version(torch_version) return version @@ -53,35 +56,39 @@ def get_torch_version(): def get_torch_white_list(approach): version = get_torch_version() import torch.quantization as tq + if version.release < Version("1.7.0").release: # pragma: no cover - white_list = \ - set(tq.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.keys()) \ - if approach == 'post_training_dynamic_quant' else \ - tq.default_mappings.DEFAULT_QCONFIG_PROPAGATE_WHITE_LIST + white_list = ( + set(tq.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.keys()) + if approach == "post_training_dynamic_quant" + else tq.default_mappings.DEFAULT_QCONFIG_PROPAGATE_WHITE_LIST + ) elif version.release < Version("1.8.0").release: # pragma: no cover - white_list = \ - set(tq.quantization_mappings.get_dynamic_quant_module_mappings().keys()) \ - if approach == 'post_training_dynamic_quant' else \ - tq.quantization_mappings.get_qconfig_propagation_list() + white_list = ( + set(tq.quantization_mappings.get_dynamic_quant_module_mappings().keys()) + if approach == "post_training_dynamic_quant" + else tq.quantization_mappings.get_qconfig_propagation_list() + ) else: - white_list = \ - set(tq.quantization_mappings.get_default_dynamic_quant_module_mappings().keys()) \ - if approach == 'post_training_dynamic_quant' else \ - tq.quantization_mappings.get_default_qconfig_propagation_list() + white_list = ( + set(tq.quantization_mappings.get_default_dynamic_quant_module_mappings().keys()) + if approach == "post_training_dynamic_quant" + else tq.quantization_mappings.get_default_qconfig_propagation_list() + ) return white_list -def pytorch_forward_wrapper(model, input, device='cpu', conf=None, running_mode='inference'): +def pytorch_forward_wrapper(model, input, device="cpu", conf=None, running_mode="inference"): version = get_torch_version() if isinstance(input, dict) or isinstance(input, UserDict): - if device == 'cpu': + if device == "cpu": output = model(**input) - elif device == 'ipex': + elif device == "ipex": # have to split the case to avoid exposing ipex.DEVICE outside # which require intel extension installed if version.release < Version("1.12.0").release: # pragma: no cover if running_mode == "calibration": - with ipex.quantization.calibrate(conf, default_recipe=True): # pylint: disable=E1101 + with ipex.quantization.calibrate(conf, default_recipe=True): # pylint: disable=E1101 output = model(**input) else: output = model(**input) @@ -89,16 +96,19 @@ def pytorch_forward_wrapper(model, input, device='cpu', conf=None, running_mode= output = model(**input) else: # pragma: no cover for inp in input.keys(): - input[inp] = input[inp].to("dpcpp" if device=="gpu" else device) \ - if isinstance(input[inp], torch.Tensor) else input[inp] + input[inp] = ( + input[inp].to("dpcpp" if device == "gpu" else device) + if isinstance(input[inp], torch.Tensor) + else input[inp] + ) output = model(**input) elif isinstance(input, list) or isinstance(input, tuple): - if device == 'cpu': + if device == "cpu": output = model(*input) - elif device == 'ipex': + elif device == "ipex": if version.release < Version("1.12.0").release: # pragma: no cover if running_mode == "calibration": - with ipex.quantization.calibrate(conf, default_recipe=True): # pylint: disable=E1101 + with ipex.quantization.calibrate(conf, default_recipe=True): # pylint: disable=E1101 output = model(*input) else: output = model(*input) @@ -106,17 +116,17 @@ def pytorch_forward_wrapper(model, input, device='cpu', conf=None, running_mode= output = model(*input) else: # pragma: no cover tmp_device = "dpcpp" if device == "gpu" else device - input = [inp.to(tmp_device) \ - if isinstance(inp, torch.Tensor) else inp - for inp in input] # pylint: disable=E1133 + input = [ + inp.to(tmp_device) if isinstance(inp, torch.Tensor) else inp for inp in input + ] # pylint: disable=E1133 output = model(*input) else: - if device == 'cpu' or not isinstance(input, torch.Tensor): + if device == "cpu" or not isinstance(input, torch.Tensor): output = model(input) - elif device == 'ipex': + elif device == "ipex": if version.release < Version("1.12.0").release: # pragma: no cover if running_mode == "calibration": - with ipex.quantization.calibrate(conf, default_recipe=True): # pylint: disable=E1101 + with ipex.quantization.calibrate(conf, default_recipe=True): # pylint: disable=E1101 output = model(input) else: output = model(input) @@ -135,11 +145,9 @@ def get_example_inputs(model, dataloader): return None try: for idx, (input, label) in enumerate(dataloader): - output = pytorch_forward_wrapper(model, - input) - if isinstance(input, (dict, UserDict)): # pragma: no cover - assert version.release >= Version("1.12.0").release, \ - "INC support IPEX version >= 1.12.0" + output = pytorch_forward_wrapper(model, input) + if isinstance(input, (dict, UserDict)): # pragma: no cover + assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0" if "label" in input.keys(): input.pop("label") if version.release <= Version("2.0.1").release: @@ -152,13 +160,11 @@ def get_example_inputs(model, dataloader): if isinstance(input, torch.Tensor): return input break - except Exception as e: # pragma: no cover + except Exception as e: # pragma: no cover for idx, input in enumerate(dataloader): - output = pytorch_forward_wrapper(model, - input) - if isinstance(input, (dict, UserDict)): # pragma: no cover - assert version.release >= Version("1.12.0").release, \ - "INC support IPEX version >= 1.12.0" + output = pytorch_forward_wrapper(model, input) + if isinstance(input, (dict, UserDict)): # pragma: no cover + assert version.release >= Version("1.12.0").release, "INC support IPEX version >= 1.12.0" if "label" in input.keys(): input.pop("label") if version.release <= Version("2.0.1").release: @@ -173,9 +179,11 @@ def get_example_inputs(model, dataloader): if idx == 0: assert False, "Please checkout the example_inputs format." + def get_ops_recursively(model, prefix, ops={}): """This is a helper function for `graph_info`, and it will get all ops from model. + Args: model (object): input model prefix (string): prefix of op name @@ -185,167 +193,182 @@ def get_ops_recursively(model, prefix, ops={}): """ version = get_torch_version() if version.release < Version("1.7.0").release: # pragma: no cover - white_list = \ - (set(torch.quantization.default_mappings.DEFAULT_MODULE_MAPPING.values()) | - set(torch.quantization.default_mappings.DEFAULT_QAT_MODULE_MAPPING.values()) | - set(torch.quantization.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.values()) | - set(torch.quantization.default_mappings.DEFAULT_MODULE_MAPPING.keys()) | - set(torch.quantization.default_mappings.DEFAULT_QAT_MODULE_MAPPING.keys()) | - set(torch.quantization.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.keys()) | - torch.quantization.default_mappings._INCLUDE_QCONFIG_PROPAGATE_LIST) + white_list = ( + set(torch.quantization.default_mappings.DEFAULT_MODULE_MAPPING.values()) + | set(torch.quantization.default_mappings.DEFAULT_QAT_MODULE_MAPPING.values()) + | set(torch.quantization.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.values()) + | set(torch.quantization.default_mappings.DEFAULT_MODULE_MAPPING.keys()) + | set(torch.quantization.default_mappings.DEFAULT_QAT_MODULE_MAPPING.keys()) + | set(torch.quantization.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING.keys()) + | torch.quantization.default_mappings._INCLUDE_QCONFIG_PROPAGATE_LIST + ) elif version.release < Version("1.8.0").release: # pragma: no cover white_list = torch.quantization.get_compare_output_module_list() else: white_list = torch.quantization.get_default_compare_output_module_list() for name, child in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name - if type(child) in white_list and not isinstance(child, torch.nn.Sequential) and \ - type(child) != torch.quantization.stubs.DeQuantStub: - ops[op_name] = unify_op_type_mapping[str(child.__class__.__name__)] \ - if str(child.__class__.__name__) in unify_op_type_mapping else \ - str(child.__class__.__name__) + op_name = prefix + "." + name if prefix != "" else name + if ( + type(child) in white_list + and not isinstance(child, torch.nn.Sequential) + and type(child) != torch.quantization.stubs.DeQuantStub + ): + ops[op_name] = ( + unify_op_type_mapping[str(child.__class__.__name__)] + if str(child.__class__.__name__) in unify_op_type_mapping + else str(child.__class__.__name__) + ) get_ops_recursively(child, op_name, ops) -def _cfg_to_qconfig(tune_cfg, observer_type='post_training_static_quant'): +def _cfg_to_qconfig(tune_cfg, observer_type="post_training_static_quant"): """Convert tune configure to quantization config for each op. - Args: - tune_cfg (dict): dictionary of tune configure for each op - observer_type (str, optional): specify observer type, Default is 'ptq_static', - options: 'ptq_dynamic', 'qat'. - - Returns: - op_qcfgs (dict): dictionary of quantization configure for each op + Args: + tune_cfg (dict): dictionary of tune configure for each op + observer_type (str, optional): specify observer type, Default is 'ptq_static', + options: 'ptq_dynamic', 'qat'. - tune_cfg should be a format like below: - { - 'fuse': {'int8': [['CONV2D', 'RELU', 'BN'], ['CONV2D', 'RELU']], - 'fp32': [['CONV2D', 'RELU', 'BN']]}, - 'calib_iteration': 10, - 'op': { - ('op1', 'CONV2D'): { - 'activation': {'dtype': 'uint8', - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'weight': {'dtype': 'int8', - 'algorithm': 'kl', - 'scheme':'asym', - 'granularity': 'per_channel'} - }, - ('op2', 'RELU): { - 'activation': {'dtype': 'int8', - 'scheme': 'asym', - 'granularity': 'per_tensor', - 'algorithm': 'minmax'} - }, - ('op3', 'CONV2D'): { - 'activation': {'dtype': 'fp32'}, - 'weight': {'dtype': 'fp32'} - }, - ... - } - } + Returns: + op_qcfgs (dict): dictionary of quantization configure for each op + + tune_cfg should be a format like below: + { + 'fuse': {'int8': [['CONV2D', 'RELU', 'BN'], ['CONV2D', 'RELU']], + 'fp32': [['CONV2D', 'RELU', 'BN']]}, + 'calib_iteration': 10, + 'op': { + ('op1', 'CONV2D'): { + 'activation': {'dtype': 'uint8', + 'algorithm': 'minmax', + 'scheme':'sym', + 'granularity': 'per_tensor'}, + 'weight': {'dtype': 'int8', + 'algorithm': 'kl', + 'scheme':'asym', + 'granularity': 'per_channel'} + }, + ('op2', 'RELU): { + 'activation': {'dtype': 'int8', + 'scheme': 'asym', + 'granularity': 'per_tensor', + 'algorithm': 'minmax'} + }, + ('op3', 'CONV2D'): { + 'activation': {'dtype': 'fp32'}, + 'weight': {'dtype': 'fp32'} + }, + ... + } + } """ op_qcfgs = OrderedDict() - op_qcfgs['bf16_ops_list'] = [] - for key in tune_cfg['op']: - value = tune_cfg['op'][key] + op_qcfgs["bf16_ops_list"] = [] + for key in tune_cfg["op"]: + value = tune_cfg["op"][key] assert isinstance(value, dict) - assert 'activation' in value - if ('weight' in value and value['weight']['dtype'] == 'fp32') or \ - ('weight' not in value and value['activation']['dtype'] == 'fp32'): + assert "activation" in value + if ("weight" in value and value["weight"]["dtype"] == "fp32") or ( + "weight" not in value and value["activation"]["dtype"] == "fp32" + ): op_qcfgs[key[0]] = None - elif ('weight' in value and value['weight']['dtype'] == 'bf16') or \ - ('weight' not in value and value['activation']['dtype'] == 'bf16'): - op_qcfgs['bf16_ops_list'].append(key) + elif ("weight" in value and value["weight"]["dtype"] == "bf16") or ( + "weight" not in value and value["activation"]["dtype"] == "bf16" + ): + op_qcfgs["bf16_ops_list"].append(key) op_qcfgs[key[0]] = None else: - if 'weight' in value: - weight = value['weight'] - scheme = weight['scheme'] - granularity = weight['granularity'] - algorithm = weight['algorithm'] - dtype = weight['dtype'] - if observer_type == 'quant_aware_training' and \ - key[1] not in ['Embedding', 'EmbeddingBag', 'LSTM', 'GRU', - 'LSTMCell', 'GRUCell', 'RNNCell']: + if "weight" in value: + weight = value["weight"] + scheme = weight["scheme"] + granularity = weight["granularity"] + algorithm = weight["algorithm"] + dtype = weight["dtype"] + if observer_type == "quant_aware_training" and key[1] not in [ + "Embedding", + "EmbeddingBag", + "LSTM", + "GRU", + "LSTMCell", + "GRUCell", + "RNNCell", + ]: weights_fake_quantize = _fake_quantize(algorithm, scheme, granularity, dtype) else: weights_observer = _observer(algorithm, scheme, granularity, dtype) else: - if observer_type == 'quant_aware_training': + if observer_type == "quant_aware_training": weights_fake_quantize = torch.quantization.default_weight_fake_quant else: weights_observer = torch.quantization.default_per_channel_weight_observer - activation = value['activation'] - scheme = activation['scheme'] - granularity = activation['granularity'] - algorithm = activation['algorithm'] - dtype = activation['dtype'] - compute_dtype = activation['compute_dtype'] \ - if 'compute_dtype' in activation \ - and activation['compute_dtype'] is not None \ - else 'uint8' - - if observer_type == 'quant_aware_training': - if key[1] in ['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell']: - activation_observer = _observer(algorithm, scheme, granularity, - dtype, 'post_training_dynamic_quant', compute_dtype) - - elif key[1] not in ['Embedding', 'EmbeddingBag']: - activation_fake_quantize = _fake_quantize(algorithm, scheme, granularity, dtype, - compute_dtype) + activation = value["activation"] + scheme = activation["scheme"] + granularity = activation["granularity"] + algorithm = activation["algorithm"] + dtype = activation["dtype"] + compute_dtype = ( + activation["compute_dtype"] + if "compute_dtype" in activation and activation["compute_dtype"] is not None + else "uint8" + ) + + if observer_type == "quant_aware_training": + if key[1] in ["LSTM", "GRU", "LSTMCell", "GRUCell", "RNNCell"]: + activation_observer = _observer( + algorithm, scheme, granularity, dtype, "post_training_dynamic_quant", compute_dtype + ) + + elif key[1] not in ["Embedding", "EmbeddingBag"]: + activation_fake_quantize = _fake_quantize(algorithm, scheme, granularity, dtype, compute_dtype) else: - activation_observer = \ - _observer(algorithm, scheme, granularity, dtype, observer_type, compute_dtype) - elif value['activation']['quant_mode'] == 'static': - activation_observer = _observer(algorithm, scheme, granularity, - dtype, 'post_training_static_quant', compute_dtype) - elif value['activation']['quant_mode'] == 'dynamic': - activation_observer = _observer(algorithm, scheme, granularity, - dtype, 'post_training_dynamic_quant', compute_dtype) + activation_observer = _observer(algorithm, scheme, granularity, dtype, observer_type, compute_dtype) + elif value["activation"]["quant_mode"] == "static": + activation_observer = _observer( + algorithm, scheme, granularity, dtype, "post_training_static_quant", compute_dtype + ) + elif value["activation"]["quant_mode"] == "dynamic": + activation_observer = _observer( + algorithm, scheme, granularity, dtype, "post_training_dynamic_quant", compute_dtype + ) version = get_torch_version() - if observer_type == 'quant_aware_training': - if key[1] in ['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell', - 'Embedding', 'EmbeddingBag']: + if observer_type == "quant_aware_training": + if key[1] in ["LSTM", "GRU", "LSTMCell", "GRUCell", "RNNCell", "Embedding", "EmbeddingBag"]: if version.release >= Version("1.11.0").release: - if key[1] in ['Embedding', 'EmbeddingBag']: + if key[1] in ["Embedding", "EmbeddingBag"]: qconfig = torch.quantization.float_qparams_weight_only_qconfig else: qconfig = torch.quantization.per_channel_dynamic_qconfig else: qconfig = torch.quantization.QConfigDynamic( - activation=activation_observer, weight=weights_observer) + activation=activation_observer, weight=weights_observer + ) else: - qconfig = torch.quantization.QConfig(activation=activation_fake_quantize, - weight=weights_fake_quantize) - elif value['activation']['quant_mode'] == 'static': - qconfig = torch.quantization.QConfig(activation=activation_observer, - weight=weights_observer) + qconfig = torch.quantization.QConfig( + activation=activation_fake_quantize, weight=weights_fake_quantize + ) + elif value["activation"]["quant_mode"] == "static": + qconfig = torch.quantization.QConfig(activation=activation_observer, weight=weights_observer) else: if version.release < Version("1.6.0").release: # pragma: no cover qconfig = torch.quantization.QConfigDynamic(weight=weights_observer) elif version.release >= Version("1.11.0").release: - if key[1] in ['Embedding', 'EmbeddingBag']: + if key[1] in ["Embedding", "EmbeddingBag"]: qconfig = torch.quantization.float_qparams_weight_only_qconfig else: qconfig = torch.quantization.per_channel_dynamic_qconfig else: - qconfig = torch.quantization.QConfigDynamic(activation=activation_observer, - weight=weights_observer) + qconfig = torch.quantization.QConfigDynamic(activation=activation_observer, weight=weights_observer) op_qcfgs[key[0]] = qconfig return op_qcfgs -def _cfgs_to_fx_cfgs(op_cfgs, observer_type='post_training_static_quant'): +def _cfgs_to_fx_cfgs(op_cfgs, observer_type="post_training_static_quant"): """Convert quantization config to a format that meets the requirements of torch.fx. Args: @@ -361,35 +384,39 @@ def _cfgs_to_fx_cfgs(op_cfgs, observer_type='post_training_static_quant'): "module_name": [("layer4.1.conv2", per_channel_weight_qconfig)]} """ version = get_torch_version() - if observer_type == 'post_training_dynamic_quant': + if observer_type == "post_training_dynamic_quant": model_qconfig = torch.quantization.default_dynamic_qconfig - elif observer_type == 'quant_aware_training': - model_qconfig = torch.quantization.QConfig( - activation=torch.quantization.FakeQuantize.with_args( - dtype=torch.quint8, - qscheme=torch.per_tensor_affine, - reduce_range=REDUCE_RANGE), - weight=torch.quantization.default_weight_fake_quant) \ - if version.release < Version("1.10.0").release else \ - torch.quantization.QConfig( - activation=torch.quantization.FusedMovingAvgObsFakeQuantize.with_args( - dtype=torch.quint8, - qscheme=torch.per_tensor_affine, - reduce_range=REDUCE_RANGE), - weight=torch.quantization.default_fused_per_channel_wt_fake_quant) + elif observer_type == "quant_aware_training": + model_qconfig = ( + torch.quantization.QConfig( + activation=torch.quantization.FakeQuantize.with_args( + dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=REDUCE_RANGE + ), + weight=torch.quantization.default_weight_fake_quant, + ) + if version.release < Version("1.10.0").release + else torch.quantization.QConfig( + activation=torch.quantization.FusedMovingAvgObsFakeQuantize.with_args( + dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=REDUCE_RANGE + ), + weight=torch.quantization.default_fused_per_channel_wt_fake_quant, + ) + ) else: model_qconfig = torch.quantization.QConfig( activation=torch.quantization.HistogramObserver.with_args(reduce_range=REDUCE_RANGE), - weight=torch.quantization.default_per_channel_weight_observer) + weight=torch.quantization.default_per_channel_weight_observer, + ) if version.release >= Version("1.13.0").release: # pragma: no cover from torch.ao.quantization import QConfigMapping + fx_op_cfgs = QConfigMapping() - if observer_type != 'post_training_dynamic_quant': + if observer_type != "post_training_dynamic_quant": fx_op_cfgs.set_global(model_qconfig) else: fx_op_cfgs = dict() - if observer_type != 'post_training_dynamic_quant': + if observer_type != "post_training_dynamic_quant": fx_op_cfgs[""] = model_qconfig op_tuple_cfg_list = [] @@ -408,20 +435,16 @@ def _cfgs_to_fx_cfgs(op_cfgs, observer_type='post_training_static_quant'): if version.release < Version("1.13.0").release: # pragma: no cover fx_op_cfgs["module_name"] = op_tuple_cfg_list - elif observer_type != 'post_training_dynamic_quant': + elif observer_type != "post_training_dynamic_quant": from torch.ao.quantization import get_default_qconfig_mapping - for name, q_config in get_default_qconfig_mapping().to_dict()['object_type']: + + for name, q_config in get_default_qconfig_mapping().to_dict()["object_type"]: fx_op_cfgs.set_object_type(name, q_config) return fx_op_cfgs -def _observer(algorithm, - scheme, - granularity, - dtype, - observer_type='post_training_static_quant', - compute_dtype='uint8'): +def _observer(algorithm, scheme, granularity, dtype, observer_type="post_training_static_quant", compute_dtype="uint8"): """Construct an observer module, In forward, observer will update the statistics of the observed Tensor. And they should provide a `calculate_qparams` function that computes the quantization parameters given the collected statistics. @@ -437,74 +460,82 @@ def _observer(algorithm, Returns: oberser (object) """ - from .torch_utils.util import match_datatype_pattern, calculate_quant_min_max, _get_signed_and_bits - if observer_type == 'post_training_dynamic_quant' and \ - get_torch_version().release >= Version("1.6.0").release: + from .torch_utils.util import _get_signed_and_bits, calculate_quant_min_max, match_datatype_pattern + + if observer_type == "post_training_dynamic_quant" and get_torch_version().release >= Version("1.6.0").release: return torch.quantization.default_dynamic_quant_observer - compute_dtype_dict = {'int8': torch.qint8, 'uint8': torch.quint8, 'None': None} + compute_dtype_dict = {"int8": torch.qint8, "uint8": torch.quint8, "None": None} if compute_dtype in compute_dtype_dict: compute_dtype = compute_dtype_dict[compute_dtype] else: # pragma: no cover assert False, "Unsupport compute_dtype with {}".format(compute_dtype) - quant_min, quant_max = None, None - dtype_dict = {'int8': torch.qint8, 'uint8': torch.quint8, 'fp32': torch.float} + quant_min, quant_max = None, None + dtype_dict = {"int8": torch.qint8, "uint8": torch.quint8, "fp32": torch.float} if dtype in dtype_dict: torch_dtype = dtype_dict[dtype] else: # pragma: no cover - #TODO to handle int4 + # TODO to handle int4 if match_datatype_pattern(dtype): - logger.info((f"Currently, PyTorch does not natively support {dtype},"+ \ - f"it will simulate its numerics instead.")) + logger.info( + (f"Currently, PyTorch does not natively support {dtype}," + "it will simulate its numerics instead.") + ) unsigned, num_bits = _get_signed_and_bits(dtype) torch_dtype = torch.quint8 if unsigned else torch.qint8 quant_min, quant_max = calculate_quant_min_max(unsigned, num_bits) - logger.info((f"For {dtype}, replace it with {torch_dtype} and " + \ - f"set quant_min: {quant_min}, quant_max: {quant_max}")) - else: # pragma: no cover + logger.info( + ( + f"For {dtype}, replace it with {torch_dtype} and " + + f"set quant_min: {quant_min}, quant_max: {quant_max}" + ) + ) + else: # pragma: no cover assert False, "Unsupport dtype with {}".format(dtype) - if algorithm == 'placeholder' or torch_dtype == torch.float: # pragma: no cover - return torch.quantization.PlaceholderObserver \ - if get_torch_version().release < Version("1.8.0").release \ - else torch.quantization.PlaceholderObserver.with_args(dtype=torch_dtype, - compute_dtype=compute_dtype) - if algorithm == 'minmax': - if granularity == 'per_channel': + if algorithm == "placeholder" or torch_dtype == torch.float: # pragma: no cover + return ( + torch.quantization.PlaceholderObserver + if get_torch_version().release < Version("1.8.0").release + else torch.quantization.PlaceholderObserver.with_args(dtype=torch_dtype, compute_dtype=compute_dtype) + ) + if algorithm == "minmax": + if granularity == "per_channel": observer = torch.quantization.PerChannelMinMaxObserver - if scheme == 'sym': + if scheme == "sym": qscheme = torch.per_channel_symmetric - elif scheme == 'asym_float': + elif scheme == "asym_float": qscheme = torch.per_channel_affine_float_qparams else: qscheme = torch.per_channel_affine else: - assert granularity == 'per_tensor' + assert granularity == "per_tensor" observer = torch.quantization.MinMaxObserver - if scheme == 'sym': + if scheme == "sym": qscheme = torch.per_tensor_symmetric else: - assert scheme == 'asym' + assert scheme == "asym" qscheme = torch.per_tensor_affine else: - assert algorithm == 'kl' + assert algorithm == "kl" observer = torch.quantization.HistogramObserver - assert granularity == 'per_tensor' - if scheme == 'sym': + assert granularity == "per_tensor" + if scheme == "sym": qscheme = torch.per_tensor_symmetric else: - assert scheme == 'asym' + assert scheme == "asym" qscheme = torch.per_tensor_affine - return observer.with_args(qscheme=qscheme, - dtype=torch_dtype, - reduce_range=(REDUCE_RANGE and scheme == 'asym'), - quant_min=quant_min, - quant_max=quant_max) + return observer.with_args( + qscheme=qscheme, + dtype=torch_dtype, + reduce_range=(REDUCE_RANGE and scheme == "asym"), + quant_min=quant_min, + quant_max=quant_max, + ) -def _fake_quantize(algorithm, scheme, granularity, dtype, compute_dtype='uint8'): +def _fake_quantize(algorithm, scheme, granularity, dtype, compute_dtype="uint8"): """Construct a fake quantize module, In forward, fake quantize module will update the statistics of the observed Tensor and fake quantize the input. They should also provide a `calculate_qparams` function @@ -521,65 +552,65 @@ def _fake_quantize(algorithm, scheme, granularity, dtype, compute_dtype='uint8') fake quantization (object) """ version = get_torch_version() - if scheme == 'asym_float' \ - and version.release >= Version("1.7.0").release: # pragma: no cover + if scheme == "asym_float" and version.release >= Version("1.7.0").release: # pragma: no cover return torch.quantization.default_float_qparams_observer - if algorithm == 'placeholder' or dtype == 'fp32': # pragma: no cover + if algorithm == "placeholder" or dtype == "fp32": # pragma: no cover return _observer(algorithm, scheme, granularity, dtype, compute_dtype=compute_dtype) - fake_quant = torch.quantization.FakeQuantize \ - if version.release < Version("1.10.0").release else \ - torch.quantization.FusedMovingAvgObsFakeQuantize - if algorithm == 'minmax': - if granularity == 'per_channel': + fake_quant = ( + torch.quantization.FakeQuantize + if version.release < Version("1.10.0").release + else torch.quantization.FusedMovingAvgObsFakeQuantize + ) + if algorithm == "minmax": + if granularity == "per_channel": observer = torch.quantization.MovingAveragePerChannelMinMaxObserver - if scheme == 'sym': + if scheme == "sym": qscheme = torch.per_channel_symmetric else: - assert scheme == 'asym' + assert scheme == "asym" qscheme = torch.per_channel_affine else: - assert granularity == 'per_tensor' + assert granularity == "per_tensor" observer = torch.quantization.MovingAverageMinMaxObserver - if scheme == 'sym': + if scheme == "sym": qscheme = torch.per_tensor_symmetric else: - assert scheme == 'asym' + assert scheme == "asym" qscheme = torch.per_tensor_affine else: # pragma: no cover # Histogram observer is too slow for quantization aware training - assert algorithm == 'kl' + assert algorithm == "kl" observer = torch.quantization.HistogramObserver - assert granularity == 'per_tensor' - if scheme == 'sym': + assert granularity == "per_tensor" + if scheme == "sym": qscheme = torch.per_tensor_symmetric else: - assert scheme == 'asym' + assert scheme == "asym" qscheme = torch.per_tensor_affine - if dtype == 'int8': + if dtype == "int8": qmin = -128 qmax = 127 dtype = torch.qint8 else: - assert dtype == 'uint8' + assert dtype == "uint8" qmin = 0 qmax = 255 dtype = torch.quint8 - return fake_quant.with_args(observer=observer, - quant_min=qmin, - quant_max=qmax, - dtype=dtype, - qscheme=qscheme, - reduce_range=(REDUCE_RANGE and scheme == 'asym')) + return fake_quant.with_args( + observer=observer, + quant_min=qmin, + quant_max=qmax, + dtype=dtype, + qscheme=qscheme, + reduce_range=(REDUCE_RANGE and scheme == "asym"), + ) -def _propagate_qconfig(model, - op_qcfgs, - is_qat_convert=False, - approach='post_training_static_quant'): +def _propagate_qconfig(model, op_qcfgs, is_qat_convert=False, approach="post_training_static_quant"): """Propagate qconfig through the module hierarchy and assign `qconfig` - attribute on each leaf module + attribute on each leaf module. Args: model (object): input model @@ -594,15 +625,15 @@ def _propagate_qconfig(model, None, module is modified inplace with qconfig attached """ fallback_ops = [] - _propagate_qconfig_recursively(model, '', op_qcfgs) + _propagate_qconfig_recursively(model, "", op_qcfgs) - if approach != 'post_training_dynamic_quant': + if approach != "post_training_dynamic_quant": for k, v in op_qcfgs.items(): if v is None and not is_qat_convert: fallback_ops.append(k) if fallback_ops and not is_qat_convert: - _fallback_quantizable_ops_recursively(model, '', fallback_ops, op_qcfgs) + _fallback_quantizable_ops_recursively(model, "", fallback_ops, op_qcfgs) def _propagate_qconfig_recursively(model, prefix, op_qcfgs, qconfig_parent=None): @@ -630,13 +661,13 @@ def _propagate_qconfig_recursively(model, prefix, op_qcfgs, qconfig_parent=None) version = get_torch_version() if version.release >= Version("1.8.0").release: child.qconfig = torch.quantization.QConfig( - activation=torch.quantization.MinMaxObserver.with_args( - reduce_range=REDUCE_RANGE), - weight=torch.quantization.default_per_channel_weight_observer) - _propagate_qconfig_recursively(child, op_name + '.', op_qcfgs, qconfig_son) + activation=torch.quantization.MinMaxObserver.with_args(reduce_range=REDUCE_RANGE), + weight=torch.quantization.default_per_channel_weight_observer, + ) + _propagate_qconfig_recursively(child, op_name + ".", op_qcfgs, qconfig_son) -def _find_quantized_op_num(module, op_qcfgs, prefix='', op_count=0): +def _find_quantized_op_num(module, op_qcfgs, prefix="", op_count=0): """This is a helper function for `_fallback_quantizable_ops_recursively` Args: @@ -649,9 +680,8 @@ def _find_quantized_op_num(module, op_qcfgs, prefix='', op_count=0): the quantizable op quantity in this module """ for name_tmp, child_tmp in module.named_children(): - op_name = prefix + '.' + name_tmp if prefix != '' else name_tmp - if op_name in op_qcfgs.keys() and \ - type(child_tmp) != torch.quantization.QuantStub: + op_name = prefix + "." + name_tmp if prefix != "" else name_tmp + if op_name in op_qcfgs.keys() and type(child_tmp) != torch.quantization.QuantStub: op_count += 1 else: op_count = _find_quantized_op_num(child_tmp, op_qcfgs, op_name, op_count) @@ -670,28 +700,29 @@ def _fallback_quantizable_ops_recursively(model, prefix, fallback_ops, op_qcfgs) Returns: None """ + class DequantQuantWrapper(torch.nn.Module): """A wrapper class that wraps the input module, adds DeQuantStub and surround the call to module with call to dequant. this is used by fallback layer when the data type of quantized op is input:int8/output:int8. - This is used by the fallback utility functions to add the dequant and - quant modules, before `convert` function `QuantStub` will just be observer, - it observes the input tensor, after `convert`, `QuantStub` - will be swapped to `nnq.Quantize` which does actual quantization. Similarly - for `DeQuantStub`. + This is used by the fallback utility functions to add the dequant and + quant modules, before `convert` function `QuantStub` will just be observer, + it observes the input tensor, after `convert`, `QuantStub` + will be swapped to `nnq.Quantize` which does actual quantization. Similarly + for `DeQuantStub`. """ + def __init__(self, module, observer=None): super(DequantQuantWrapper, self).__init__() if not module.qconfig and observer: - weights_observer = observer('minmax', 'asym', 'per_channel', 'int8') - activation_observer = observer('minmax', 'sym', 'per_tensor', 'uint8') - module.qconfig = torch.quantization.QConfig(activation=activation_observer, - weight=weights_observer) - self.add_module('quant', torch.quantization.QuantStub(module.qconfig)) - self.add_module('dequant', torch.quantization.DeQuantStub()) - self.add_module('module', module) + weights_observer = observer("minmax", "asym", "per_channel", "int8") + activation_observer = observer("minmax", "sym", "per_tensor", "uint8") + module.qconfig = torch.quantization.QConfig(activation=activation_observer, weight=weights_observer) + self.add_module("quant", torch.quantization.QuantStub(module.qconfig)) + self.add_module("dequant", torch.quantization.DeQuantStub()) + self.add_module("module", module) version = get_torch_version() if version.release >= Version("1.8.0").release: self.dequant.qconfig = module.qconfig @@ -743,7 +774,7 @@ def add_relu(self, x, y): return self.quant(r) for name, child in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name if op_name in fallback_ops: child.qconfig = None quantize_op_num = _find_quantized_op_num(model, op_qcfgs, prefix=prefix) @@ -751,7 +782,8 @@ def add_relu(self, x, y): found = False for name_tmp, child_tmp in model.named_children(): if isinstance(child_tmp, torch.quantization.QuantStub) or isinstance( - child_tmp, torch.quantization.DeQuantStub): + child_tmp, torch.quantization.DeQuantStub + ): model._modules[name_tmp] = torch.nn.Identity() found = True if not found: @@ -769,115 +801,99 @@ class TemplateAdaptor(Adaptor): Args: framework_specific_info (dict): dictionary of tuning configure from yaml file. """ + def __init__(self, framework_specific_info): super(TemplateAdaptor, self).__init__(framework_specific_info) import torch.quantization as tq + self.version = get_torch_version() # set torch random seed - random_seed = framework_specific_info['random_seed'] + random_seed = framework_specific_info["random_seed"] torch.manual_seed(random_seed) self.bf16_ops = [] - self.use_bf16 = framework_specific_info.get('use_bf16', True) - self.device = framework_specific_info['device'] - self.q_dataloader = framework_specific_info['q_dataloader'] - self.q_func = framework_specific_info.get('q_func', None) - self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK) - self.workspace_path = framework_specific_info['workspace_path'] + self.use_bf16 = framework_specific_info.get("use_bf16", True) + self.device = framework_specific_info["device"] + self.q_dataloader = framework_specific_info["q_dataloader"] + self.q_func = framework_specific_info.get("q_func", None) + self.benchmark = GLOBAL_STATE.STATE == MODE.BENCHMARK + self.workspace_path = framework_specific_info["workspace_path"] self.is_baseline = False if GLOBAL_STATE.STATE == MODE.BENCHMARK else True self.query_handler = None - self.approach = '' + self.approach = "" self.pre_optimized_model = None self.sub_module_list = None - self.default_qconfig = framework_specific_info.get('default_qconfig', None) + self.default_qconfig = framework_specific_info.get("default_qconfig", None) self.performance_only = framework_specific_info.get("performance_only", False) self.example_inputs = framework_specific_info.get("example_inputs", None) if isinstance(self.example_inputs, (list, tuple)): self.example_inputs = tuple(self.example_inputs) elif isinstance(self.example_inputs, (dict, UserDict)): self.example_inputs = dict(self.example_inputs) - if 'recipes' in framework_specific_info: - self.recipes = framework_specific_info['recipes'] + if "recipes" in framework_specific_info: + self.recipes = framework_specific_info["recipes"] else: self.recipes = None - if 'approach' in framework_specific_info: # pragma: no cover - self.approach = framework_specific_info['approach'] - if framework_specific_info['approach'] in ["post_training_static_quant", - "post_training_auto_quant"]: - if self.version.release < Version("1.7.0").release: # pragma: no cover + if "approach" in framework_specific_info: # pragma: no cover + self.approach = framework_specific_info["approach"] + if framework_specific_info["approach"] in ["post_training_static_quant", "post_training_auto_quant"]: + if self.version.release < Version("1.7.0").release: # pragma: no cover self.q_mapping = tq.default_mappings.DEFAULT_MODULE_MAPPING - elif self.version.release < Version("1.8.0").release: # pragma: no cover - self.q_mapping = \ - tq.quantization_mappings.get_static_quant_module_mappings() + elif self.version.release < Version("1.8.0").release: # pragma: no cover + self.q_mapping = tq.quantization_mappings.get_static_quant_module_mappings() else: - self.q_mapping = \ - tq.quantization_mappings.get_default_static_quant_module_mappings() - elif framework_specific_info['approach'] == "quant_aware_training": - if self.version.release < Version("1.7.0").release: # pragma: no cover + self.q_mapping = tq.quantization_mappings.get_default_static_quant_module_mappings() + elif framework_specific_info["approach"] == "quant_aware_training": + if self.version.release < Version("1.7.0").release: # pragma: no cover self.q_mapping = tq.default_mappings.DEFAULT_QAT_MODULE_MAPPING - elif self.version.release < Version("1.8.0").release: # pragma: no cover - self.q_mapping = \ - tq.quantization_mappings.get_qat_module_mappings() + elif self.version.release < Version("1.8.0").release: # pragma: no cover + self.q_mapping = tq.quantization_mappings.get_qat_module_mappings() else: - self.q_mapping = \ - tq.quantization_mappings.get_default_qat_module_mappings() - elif framework_specific_info['approach'] == "post_training_dynamic_quant": + self.q_mapping = tq.quantization_mappings.get_default_qat_module_mappings() + elif framework_specific_info["approach"] == "post_training_dynamic_quant": if self.version.release < Version("1.7.0").release: - self.q_mapping = \ - tq.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING + self.q_mapping = tq.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING elif self.version.release < Version("1.8.0").release: - self.q_mapping = \ - tq.quantization_mappings.get_dynamic_quant_module_mappings() + self.q_mapping = tq.quantization_mappings.get_dynamic_quant_module_mappings() else: - self.q_mapping = \ - tq.quantization_mappings.get_default_dynamic_quant_module_mappings() - elif framework_specific_info['approach'] == "post_training_weight_only": + self.q_mapping = tq.quantization_mappings.get_default_dynamic_quant_module_mappings() + elif framework_specific_info["approach"] == "post_training_weight_only": pass else: if not self.benchmark: assert False, "Unsupport approach: {}".format(self.approach) - # TODO: will be removed once 'op_type_dict' and 'op_name_dicts' + # TODO: will be removed once 'op_type_dict' and 'op_name_dicts' # for quant_aware_training can be handled in strategy - self.qat_optype_wise = framework_specific_info.get('qat_optype_wise', None) - self.qat_op_wise = framework_specific_info.get('qat_op_wise', None) - + self.qat_optype_wise = framework_specific_info.get("qat_optype_wise", None) + self.qat_op_wise = framework_specific_info.get("qat_op_wise", None) + self.fp32_results = [] self.fp32_preds_as_label = False if self.version.release >= Version("1.8").release: static_quant_mapping = tq.quantization_mappings.get_default_static_quant_module_mappings() - self.fused_op_list = \ - [static_quant_mapping[key] for key in static_quant_mapping if "intrinsic." in str(key)] + self.fused_op_list = [static_quant_mapping[key] for key in static_quant_mapping if "intrinsic." in str(key)] self.fused_dict = {} def calib_func(self, model, dataloader, tmp_iterations, conf=None): try: for idx, (input, label) in enumerate(dataloader): - output = pytorch_forward_wrapper(model, - input, - device=self.device, - conf=conf, - running_mode='calibration') + output = pytorch_forward_wrapper( + model, input, device=self.device, conf=conf, running_mode="calibration" + ) if idx >= tmp_iterations - 1: break except Exception as e: for idx, input in enumerate(dataloader): - output = pytorch_forward_wrapper(model, - input, - device=self.device, - conf=conf, - running_mode='calibration') + output = pytorch_forward_wrapper( + model, input, device=self.device, conf=conf, running_mode="calibration" + ) if idx >= tmp_iterations - 1: break - def model_calibration(self, - q_model, - dataloader, - iterations=1, - conf=None, - calib_sampling_size=1): + def model_calibration(self, q_model, dataloader, iterations=1, conf=None, calib_sampling_size=1): assert iterations > 0 with torch.no_grad(): if isinstance(dataloader, BaseDataLoader): @@ -887,28 +903,28 @@ def model_calibration(self, if calib_sampling_size % (batch_size - i) == 0: calib_batch_size = batch_size - i if i != 0: - logger.warning("Reset `calibration.dataloader.batch_size` field " - "to {}".format(calib_batch_size) + - " to make sure the sampling_size is " - "divisible exactly by batch size") + logger.warning( + "Reset `calibration.dataloader.batch_size` field " + "to {}".format(calib_batch_size) + " to make sure the sampling_size is " + "divisible exactly by batch size" + ) break tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size)) dataloader.batch(calib_batch_size) self.calib_func(q_model, dataloader, tmp_iterations, conf) except Exception: # pragma: no cover - logger.warning("Fail to forward with batch size={}, set to {} now.".format( - batch_size, 1)) + logger.warning("Fail to forward with batch size={}, set to {} now.".format(batch_size, 1)) dataloader.batch(1) self.calib_func(q_model, dataloader, calib_sampling_size, conf) else: # pragma: no cover - if hasattr(dataloader, 'batch_size') and \ - calib_sampling_size % dataloader.batch_size != 0: + if hasattr(dataloader, "batch_size") and calib_sampling_size % dataloader.batch_size != 0: logger.warning( - "Please note that calibration sampling size {} " \ - "isn't divisible exactly by batch size {}. " \ - "So the real sampling size is {}.". - format(calib_sampling_size, dataloader.batch_size, - dataloader.batch_size * iterations)) + "Please note that calibration sampling size {} " + "isn't divisible exactly by batch size {}. " + "So the real sampling size is {}.".format( + calib_sampling_size, dataloader.batch_size, dataloader.batch_size * iterations + ) + ) self.calib_func(q_model, dataloader, iterations, conf) @@ -929,30 +945,29 @@ def eval_func(self, model, dataloader, postprocess, metrics, measurer, iteration output, label = postprocess((output, label)) if metrics: for metric in metrics: - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): + if not hasattr(metric, "compare_label") or ( + hasattr(metric, "compare_label") and metric.compare_label + ): metric.update(output, label) # If distributed dataloader, gather all outputs to update metric - if getattr(dataloader, 'distributed', False) or \ - isinstance(dataloader.sampler, \ - torch.utils.data.distributed.DistributedSampler): + if getattr(dataloader, "distributed", False) or isinstance( + dataloader.sampler, torch.utils.data.distributed.DistributedSampler + ): hvd.init() for metric in metrics: metric.hvd = hvd if self.fp32_preds_as_label: - self.fp32_results.append(output) if self.is_baseline else \ - results.append(output) + self.fp32_results.append(output) if self.is_baseline else results.append(output) if idx + 1 == iteration: break - except Exception as e: # pragma: no cover + except Exception as e: # pragma: no cover logger.warning("The dataloader didn't include label, will try input without label!") for idx, input in enumerate(dataloader): - if (isinstance(input, dict) or isinstance(input, UserDict)): + if isinstance(input, dict) or isinstance(input, UserDict): if not self.benchmark: - assert "label" in input, \ - "The dataloader must include label to measure the metric!" + assert "label" in input, "The dataloader must include label to measure the metric!" label = input["label"].to("cpu") elif not self.benchmark: assert False, "The dataloader must include label to measure the metric!" @@ -980,50 +995,41 @@ def eval_func(self, model, dataloader, postprocess, metrics, measurer, iteration if metrics and not self.benchmark: for metric in metrics: - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): + if not hasattr(metric, "compare_label") or ( + hasattr(metric, "compare_label") and metric.compare_label + ): metric.update(output, label) # If distributed dataloader, gather all outputs to update metric - if getattr(dataloader, 'distributed', False) or \ - isinstance(dataloader.sampler, \ - torch.utils.data.distributed.DistributedSampler): + if getattr(dataloader, "distributed", False) or isinstance( + dataloader.sampler, torch.utils.data.distributed.DistributedSampler + ): hvd.init() for metric in metrics: metric.hvd = hvd if self.fp32_preds_as_label: - self.fp32_results.append(output) if self.is_baseline else \ - results.append(output) + self.fp32_results.append(output) if self.is_baseline else results.append(output) if idx + 1 == iteration: break return results - def model_eval(self, - model, - dataloader, - postprocess=None, - metrics=None, - measurer=None, - iteration=-1, - conf=None): + def model_eval(self, model, dataloader, postprocess=None, metrics=None, measurer=None, iteration=-1, conf=None): with torch.no_grad(): if metrics: for metric in metrics: metric.reset() if isinstance(dataloader, BaseDataLoader) and not self.benchmark: try: - results = self.eval_func(model, dataloader, postprocess, metrics, measurer, - iteration, conf) + results = self.eval_func(model, dataloader, postprocess, metrics, measurer, iteration, conf) except Exception: # pragma: no cover - logger.warning("Fail to forward with batch size={}, set to {} now.".format( - dataloader.batch_size, 1)) + logger.warning( + "Fail to forward with batch size={}, set to {} now.".format(dataloader.batch_size, 1) + ) dataloader.batch(1) - results = self.eval_func(model, dataloader, postprocess, metrics, measurer, - iteration, conf) + results = self.eval_func(model, dataloader, postprocess, metrics, measurer, iteration, conf) else: # pragma: no cover - results = self.eval_func(model, dataloader, postprocess, metrics, measurer, - iteration, conf) + results = self.eval_func(model, dataloader, postprocess, metrics, measurer, iteration, conf) if self.fp32_preds_as_label: if self.is_baseline: @@ -1064,133 +1070,150 @@ def _get_quantizable_ops(self, model): q_capability (dictionary): tuning capability for each op from model. """ quantizable_ops = [] - self.block_wise =[] - self._get_quantizable_ops_recursively(model, '', quantizable_ops) + self.block_wise = [] + self._get_quantizable_ops_recursively(model, "", quantizable_ops) q_capability = {} - q_capability['block_wise'] = None - q_capability['optypewise'] = OrderedDict() - q_capability['opwise'] = OrderedDict() + q_capability["block_wise"] = None + q_capability["optypewise"] = OrderedDict() + q_capability["opwise"] = OrderedDict() # add block ops if self.block_wise: logger.debug(f"*** Found {len(self.block_wise)} blocks: {self.block_wise}") - q_capability['block_wise'] = self.block_wise[::-1] if self.block_wise else None - + q_capability["block_wise"] = self.block_wise[::-1] if self.block_wise else None + quant_datatypes = self.query_handler.get_quant_datatypes() if self.approach == "quant_aware_training": - capability_pair = [(self.query_handler.get_quantization_capability()['quant_aware'], 'static')] - fp32_config = {'activation': {'dtype': 'fp32'}, 'weight': {'dtype': 'fp32'}} + capability_pair = [(self.query_handler.get_quantization_capability()["quant_aware"], "static")] + fp32_config = {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}} # Ignore LayerNorm, InstanceNorm3d and Embedding quantizable ops, # due to huge accuracy regression in PyTorch. if isinstance(self, PyTorch_IPEXAdaptor): additional_skipped_module_classes = {} else: - additional_skipped_module_classes = {'LayerNorm', 'InstanceNorm3d', 'Dropout'} - no_fp32_ops = {'QuantStub'} + additional_skipped_module_classes = {"LayerNorm", "InstanceNorm3d", "Dropout"} + no_fp32_ops = {"QuantStub"} for pair in capability_pair: capability, mode = pair for q_op in quantizable_ops: - if q_op not in q_capability['opwise']: - q_capability['opwise'][q_op] = [] - if q_op[1] not in q_capability['optypewise']: - q_capability['optypewise'][q_op[1]] = [] - - op_cfg = copy.deepcopy(capability[q_op[1]]) if q_op[1] in capability \ - else copy.deepcopy(capability['default']) + if q_op not in q_capability["opwise"]: + q_capability["opwise"][q_op] = [] + if q_op[1] not in q_capability["optypewise"]: + q_capability["optypewise"][q_op[1]] = [] + + op_cfg = ( + copy.deepcopy(capability[q_op[1]]) + if q_op[1] in capability + else copy.deepcopy(capability["default"]) + ) - op_cfg['activation']['quant_mode'] = mode if q_op[1] not in \ - ['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell'] else 'dynamic' + op_cfg["activation"]["quant_mode"] = ( + mode if q_op[1] not in ["LSTM", "GRU", "LSTMCell", "GRUCell", "RNNCell"] else "dynamic" + ) # skip the op that only include fp32 if q_op[1] not in additional_skipped_module_classes: - if op_cfg not in q_capability['opwise'][q_op]: - q_capability['opwise'][q_op].append(op_cfg) - if op_cfg not in q_capability['optypewise'][q_op[1]]: - q_capability['optypewise'][q_op[1]].append(op_cfg) + if op_cfg not in q_capability["opwise"][q_op]: + q_capability["opwise"][q_op].append(op_cfg) + if op_cfg not in q_capability["optypewise"][q_op[1]]: + q_capability["optypewise"][q_op[1]].append(op_cfg) if q_op[1] not in no_fp32_ops: - if fp32_config not in q_capability['opwise'][q_op]: - q_capability['opwise'][q_op].append(fp32_config) - if fp32_config not in q_capability['optypewise'][q_op[1]]: - q_capability['optypewise'][q_op[1]].append(fp32_config) + if fp32_config not in q_capability["opwise"][q_op]: + q_capability["opwise"][q_op].append(fp32_config) + if fp32_config not in q_capability["optypewise"][q_op[1]]: + q_capability["optypewise"][q_op[1]].append(fp32_config) elif self.approach == "post_training_weight_only": - capability_pair = [(self.query_handler.get_quantization_capability('weight_only_integer'), 'weight_only')] - fp32_config = {'activation': {'dtype': 'fp32'}, 'weight': {'dtype': 'fp32'}} + capability_pair = [(self.query_handler.get_quantization_capability("weight_only_integer"), "weight_only")] + fp32_config = {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}} for pair in capability_pair: capability, mode = pair for q_op in quantizable_ops: - if q_op not in q_capability['opwise']: - q_capability['opwise'][q_op] = [] - if q_op[1] not in q_capability['optypewise']: - q_capability['optypewise'][q_op[1]] = [] - op_cfg = copy.deepcopy(capability[q_op[1]]) if q_op[1] in capability \ - else copy.deepcopy(capability['default']) - op_cfg['activation']['quant_mode'] = mode - if op_cfg not in q_capability['opwise'][q_op]: - q_capability['opwise'][q_op].append(op_cfg) - q_capability['opwise'][q_op].append(fp32_config) - if op_cfg not in q_capability['optypewise'][q_op[1]]: - q_capability['optypewise'][q_op[1]].append(op_cfg) - q_capability['optypewise'][q_op[1]].append(fp32_config) + if q_op not in q_capability["opwise"]: + q_capability["opwise"][q_op] = [] + if q_op[1] not in q_capability["optypewise"]: + q_capability["optypewise"][q_op[1]] = [] + op_cfg = ( + copy.deepcopy(capability[q_op[1]]) + if q_op[1] in capability + else copy.deepcopy(capability["default"]) + ) + op_cfg["activation"]["quant_mode"] = mode + if op_cfg not in q_capability["opwise"][q_op]: + q_capability["opwise"][q_op].append(op_cfg) + q_capability["opwise"][q_op].append(fp32_config) + if op_cfg not in q_capability["optypewise"][q_op[1]]: + q_capability["optypewise"][q_op[1]].append(op_cfg) + q_capability["optypewise"][q_op[1]].append(fp32_config) else: - if 'weight_only_integer' in quant_datatypes: # TODO: need to enhance - quant_datatypes.remove('weight_only_integer') + if "weight_only_integer" in quant_datatypes: # TODO: need to enhance + quant_datatypes.remove("weight_only_integer") for datatype in quant_datatypes: if self.approach == "post_training_dynamic_quant": capability_pair = [ - (self.query_handler.get_quantization_capability(datatype).get('dynamic', {}), 'dynamic')] + (self.query_handler.get_quantization_capability(datatype).get("dynamic", {}), "dynamic") + ] elif self.approach == "post_training_static_quant": capability_pair = [ - (self.query_handler.get_quantization_capability(datatype).get('static', {}), 'static')] + (self.query_handler.get_quantization_capability(datatype).get("static", {}), "static") + ] else: capability_pair = [ - (self.query_handler.get_quantization_capability(datatype).get('static', {}), 'static'), - (self.query_handler.get_quantization_capability(datatype).get('dynamic', {}), 'dynamic')] + (self.query_handler.get_quantization_capability(datatype).get("static", {}), "static"), + (self.query_handler.get_quantization_capability(datatype).get("dynamic", {}), "dynamic"), + ] - fp32_config = {'activation': {'dtype': 'fp32'}, 'weight': {'dtype': 'fp32'}} + fp32_config = {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}} # Ignore LayerNorm, InstanceNorm3d and Embedding quantizable ops, # due to huge accuracy regression in PyTorch. if isinstance(self, PyTorch_IPEXAdaptor): additional_skipped_module_classes = {} else: - additional_skipped_module_classes = {'LayerNorm', 'InstanceNorm3d', 'Dropout'} - no_fp32_ops = {'QuantStub'} + additional_skipped_module_classes = {"LayerNorm", "InstanceNorm3d", "Dropout"} + no_fp32_ops = {"QuantStub"} for pair in capability_pair: capability, mode = pair for q_op in quantizable_ops: op_cfg = None - if q_op not in q_capability['opwise']: - q_capability['opwise'][q_op] = [] - if q_op[1] not in q_capability['optypewise']: - q_capability['optypewise'][q_op[1]] = [] + if q_op not in q_capability["opwise"]: + q_capability["opwise"][q_op] = [] + if q_op[1] not in q_capability["optypewise"]: + q_capability["optypewise"][q_op[1]] = [] - if mode == 'static' and q_op[1] in ['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell']: + if mode == "static" and q_op[1] in ["LSTM", "GRU", "LSTMCell", "GRUCell", "RNNCell"]: continue - op_cfg = copy.deepcopy(capability[q_op[1]]) if q_op[1] in capability \ - else copy.deepcopy(capability.get('default', fp32_config)) + op_cfg = ( + copy.deepcopy(capability[q_op[1]]) + if q_op[1] in capability + else copy.deepcopy(capability.get("default", fp32_config)) + ) - op_cfg['activation']['quant_mode'] = mode if q_op[1] not in \ - ['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell'] else 'dynamic' + op_cfg["activation"]["quant_mode"] = ( + mode if q_op[1] not in ["LSTM", "GRU", "LSTMCell", "GRUCell", "RNNCell"] else "dynamic" + ) # skip the op that only include fp32 if q_op[1] not in additional_skipped_module_classes: - if op_cfg not in q_capability['opwise'][q_op]: - q_capability['opwise'][q_op].append(op_cfg) - if op_cfg not in q_capability['optypewise'][q_op[1]]: - q_capability['optypewise'][q_op[1]].append(op_cfg) + if op_cfg not in q_capability["opwise"][q_op]: + q_capability["opwise"][q_op].append(op_cfg) + if op_cfg not in q_capability["optypewise"][q_op[1]]: + q_capability["optypewise"][q_op[1]].append(op_cfg) if q_op[1] not in no_fp32_ops: - if fp32_config not in q_capability['opwise'][q_op]: - q_capability['opwise'][q_op].append(fp32_config) - if fp32_config not in q_capability['optypewise'][q_op[1]]: - q_capability['optypewise'][q_op[1]].append(fp32_config) + if fp32_config not in q_capability["opwise"][q_op]: + q_capability["opwise"][q_op].append(fp32_config) + if fp32_config not in q_capability["optypewise"][q_op[1]]: + q_capability["optypewise"][q_op[1]].append(fp32_config) # get bf16 capability - if self.use_bf16 and (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') and \ - (self.version.release >= Version("1.11.0").release): + if ( + self.use_bf16 + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + and (self.version.release >= Version("1.11.0").release) + ): self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16") bf16_ops = [] - self._get_bf16_ops_recursively(model, '', bf16_ops) + self._get_bf16_ops_recursively(model, "", bf16_ops) mixed_capability = self._combine_capability(bf16_ops, q_capability) return mixed_capability return q_capability @@ -1209,29 +1232,35 @@ def _get_bf16_ops_recursively(self, model, prefix, bf16_ops): """ for name, child in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name - if str(child.__class__.__name__) in self.bf16_ops \ - and type(child) != torch.nn.Sequential \ - and type(child) != torch.quantization.stubs.DeQuantStub: - bf16_ops.append((op_name, unify_op_type_mapping[str(child.__class__.__name__)] - if str(child.__class__.__name__) in unify_op_type_mapping else - str(child.__class__.__name__))) + op_name = prefix + "." + name if prefix != "" else name + if ( + str(child.__class__.__name__) in self.bf16_ops + and type(child) != torch.nn.Sequential + and type(child) != torch.quantization.stubs.DeQuantStub + ): + bf16_ops.append( + ( + op_name, + unify_op_type_mapping[str(child.__class__.__name__)] + if str(child.__class__.__name__) in unify_op_type_mapping + else str(child.__class__.__name__), + ) + ) elif self.is_fused_module(child): continue else: self._get_bf16_ops_recursively(child, op_name, bf16_ops) def _combine_capability(self, bf16_ops, q_capability): - bf16_config = {'activation': {'dtype': 'bf16'}, 'weight': {'dtype': 'bf16'}} - fp32_config = {'activation': {'dtype': 'fp32'}, 'weight': {'dtype': 'fp32'}} + bf16_config = {"activation": {"dtype": "bf16"}, "weight": {"dtype": "bf16"}} + fp32_config = {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}} for bf16_op in bf16_ops: - if bf16_op in q_capability['opwise'] and \ - bf16_config not in q_capability['opwise'][bf16_op]: - q_capability['opwise'][bf16_op].append(bf16_config) + if bf16_op in q_capability["opwise"] and bf16_config not in q_capability["opwise"][bf16_op]: + q_capability["opwise"][bf16_op].append(bf16_config) else: - q_capability['opwise'][bf16_op] = [bf16_config, fp32_config] - if bf16_op[1] not in q_capability['optypewise']: - q_capability['optypewise'][bf16_op[1]] = [bf16_config, fp32_config] + q_capability["opwise"][bf16_op] = [bf16_config, fp32_config] + if bf16_op[1] not in q_capability["optypewise"]: + q_capability["optypewise"][bf16_op[1]] = [bf16_config, fp32_config] return q_capability def get_fused_list(self, model): @@ -1264,18 +1293,20 @@ def get_fused_list(self, model): fp32_int8_ops.append(fp32_op_name) continue else: - is_fused_module =False + is_fused_module = False in_fused_loop = False elif op_name == fp32_op_name and not in_fused_loop: in_fused_loop = True fp32_int8_ops.append(fp32_op_name) - elif in_fused_loop and \ - op_name[: prefix_index if prefix_index > -1 else 0] == \ - fp32_op_name[: prefix_fp32_index if prefix_fp32_index > -1 else 0]: + elif ( + in_fused_loop + and op_name[: prefix_index if prefix_index > -1 else 0] + == fp32_op_name[: prefix_fp32_index if prefix_fp32_index > -1 else 0] + ): if "BatchNorm" in str(type(module)): fp32_int8_ops.append(fp32_op_name) continue - elif fp32_type_name in type_name.split(".")[-1][-len(fp32_type_name) - 2:]: + elif fp32_type_name in type_name.split(".")[-1][-len(fp32_type_name) - 2 :]: fp32_int8_ops.append(fp32_op_name) in_fused_loop = False break @@ -1305,7 +1336,7 @@ def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None optype_list = torch.quantization.get_default_qconfig_propagation_list() supported_optype = [] for optype in optype_list: - op_type = str(optype).rstrip('\'>').split('.')[-1] + op_type = str(optype).rstrip("'>").split(".")[-1] if "intrinsic." not in str(optype) and op_type not in exclude_list: supported_optype.append(optype) inspect_node_list = [] @@ -1315,18 +1346,22 @@ def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None inspect_node_list.append(name) return inspect_node_list, tune_cfg - def inspect_tensor(self, - model, - dataloader, - op_list=None, - iteration_list=None, - inspect_type='activation', - save_to_disk=False, - save_path=None, - quantization_cfg=None): + def inspect_tensor( + self, + model, + dataloader, + op_list=None, + iteration_list=None, + inspect_type="activation", + save_to_disk=False, + save_path=None, + quantization_cfg=None, + ): assert self.version.release >= Version("1.8").release, "Inspect_tensor only support torch 1.8 or above!" - from neural_compressor.utils.utility import dump_data_to_local from torch import dequantize + + from neural_compressor.utils.utility import dump_data_to_local + is_quantized = model.is_quantized op_list_ = [] fp32_int8_map = {} @@ -1335,27 +1370,25 @@ def inspect_tensor(self, for key in self.fused_dict: if op_name in self.fused_dict[key]: op_list_.pop() - fp32_int8_map[op_name] = \ - {'activation': self.fused_dict[key][-1], 'weight': self.fused_dict[key][0]} + fp32_int8_map[op_name] = {"activation": self.fused_dict[key][-1], "weight": self.fused_dict[key][0]} if not is_quantized: op_list_.append(self.fused_dict[key][-1]) elif key not in op_list_: op_list_.append(key) break - assert min(iteration_list) > 0, \ - "Iteration number should great zero, 1 means first iteration." + assert min(iteration_list) > 0, "Iteration number should great zero, 1 means first iteration." iterations = max(iteration_list) if iteration_list is not None else -1 new_model = self._pre_eval_hook(model, op_list=op_list_, iteration_list=iteration_list) self.evaluate(new_model, dataloader, iteration=iterations) observer_dict = {} ret = {} - if inspect_type == 'activation' or inspect_type == 'all': + if inspect_type == "activation" or inspect_type == "all": if self.version.release >= Version("2.0.0").release: from torch.quantization.quantize import _get_observer_dict as get_observer_dict else: from torch.quantization import get_observer_dict - ret['activation'] = [] + ret["activation"] = [] get_observer_dict(new_model.model, observer_dict) if iteration_list is None: iteration_list = [1] @@ -1370,111 +1403,122 @@ def inspect_tensor(self, if type(value) is list: summary[op_name] = {} for index in range(len(value)): - summary[op_name].update({ - op_name + ".output" + str(index): - dequantize(value[index]).numpy() - if value[index].is_quantized else value[index].numpy() - }) + summary[op_name].update( + { + op_name + ".output" + str(index): dequantize(value[index]).numpy() + if value[index].is_quantized + else value[index].numpy() + } + ) else: summary[op_name] = { - op_name + ".output0": - dequantize(value).numpy() if value.is_quantized else value.numpy() + op_name + ".output0": dequantize(value).numpy() if value.is_quantized else value.numpy() } else: if bool(self.fused_dict): if is_quantized: for a in fp32_int8_map: if op_name == a: - tensor_name = fp32_int8_map[a]['weight'] + tensor_name = fp32_int8_map[a]["weight"] if type(value) is list: summary[tensor_name] = {} for index in range(len(value)): - summary[tensor_name].update({ - tensor_name + ".output" + str(index): - dequantize(value[index]).numpy() - if value[index].is_quantized else - value[index].numpy() - }) + summary[tensor_name].update( + { + tensor_name + + ".output" + + str(index): dequantize(value[index]).numpy() + if value[index].is_quantized + else value[index].numpy() + } + ) else: summary[tensor_name] = { - tensor_name + ".output0": - dequantize(value).numpy() - if value.is_quantized else value.numpy() + tensor_name + ".output0": dequantize(value).numpy() + if value.is_quantized + else value.numpy() } else: for a in fp32_int8_map: # pragma: no cover - if op_name == fp32_int8_map[a]['activation']: - tensor_name = fp32_int8_map[a]['weight'] + if op_name == fp32_int8_map[a]["activation"]: + tensor_name = fp32_int8_map[a]["weight"] if type(value) is list: summary[tensor_name] = {} for index in range(len(value)): - summary[tensor_name].update({ - tensor_name + ".output" + str(index): - dequantize(value[index]).numpy() - if value[index].is_quantized else - value[index].numpy() - }) + summary[tensor_name].update( + { + tensor_name + + ".output" + + str(index): dequantize(value[index]).numpy() + if value[index].is_quantized + else value[index].numpy() + } + ) else: summary[tensor_name] = { - tensor_name + ".output0": - dequantize(value).numpy() - if value.is_quantized else value.numpy() + tensor_name + ".output0": dequantize(value).numpy() + if value.is_quantized + else value.numpy() } - ret['activation'].append(summary) + ret["activation"].append(summary) - if inspect_type == 'weight' or inspect_type == 'all': - ret['weight'] = {} + if inspect_type == "weight" or inspect_type == "all": + ret["weight"] = {} state_dict = new_model._model.state_dict() for key in state_dict: if not isinstance(state_dict[key], torch.Tensor): continue - if 'weight' not in key and 'bias' not in key: + if "weight" not in key and "bias" not in key: continue - op = key[:key.rfind('.')] - op = op.replace('._packed_params', '') + op = key[: key.rfind(".")] + op = op.replace("._packed_params", "") if op in op_list: - if op in ret['weight']: - ret['weight'][op].update({ - key: - dequantize(state_dict[key]).numpy() - if state_dict[key].is_quantized else state_dict[key].detach().numpy() - }) + if op in ret["weight"]: + ret["weight"][op].update( + { + key: dequantize(state_dict[key]).numpy() + if state_dict[key].is_quantized + else state_dict[key].detach().numpy() + } + ) else: - ret['weight'][op] = { - key: - dequantize(state_dict[key]).numpy() - if state_dict[key].is_quantized else state_dict[key].detach().numpy() + ret["weight"][op] = { + key: dequantize(state_dict[key]).numpy() + if state_dict[key].is_quantized + else state_dict[key].detach().numpy() } else: if bool(self.fused_dict): if is_quantized: for a in fp32_int8_map: if op == a: - tensor_name = fp32_int8_map[a]['weight'] - if tensor_name in ret['weight']: - ret['weight'][tensor_name].update({ - key: - dequantize(state_dict[key]).numpy() - if state_dict[key].is_quantized else - state_dict[key].detach().numpy() - }) + tensor_name = fp32_int8_map[a]["weight"] + if tensor_name in ret["weight"]: + ret["weight"][tensor_name].update( + { + key: dequantize(state_dict[key]).numpy() + if state_dict[key].is_quantized + else state_dict[key].detach().numpy() + } + ) else: - ret['weight'][tensor_name] = \ - {key: dequantize(state_dict[key]).numpy() - if state_dict[key].is_quantized else - state_dict[key].detach().numpy()} + ret["weight"][tensor_name] = { + key: dequantize(state_dict[key]).numpy() + if state_dict[key].is_quantized + else state_dict[key].detach().numpy() + } break else: - ret['weight'] = None + ret["weight"] = None if save_to_disk: if not save_path: save_path = self.workspace_path - dump_data_to_local(ret, save_path, 'inspect_result.pkl') + dump_data_to_local(ret, save_path, "inspect_result.pkl") return ret @@ -1505,6 +1549,7 @@ def _with_args(cls_or_self, **kwargs): >>> id(foo_instance1) == id(foo_instance2) False """ + class _PartialWrapper(object): def __init__(self, p): self.p = p @@ -1520,7 +1565,7 @@ def __repr__(self): r = _PartialWrapper(partial(cls_or_self, **kwargs)) return r - ABC = ABCMeta(str("ABC"), (object, ), {}) # compatible with Python 2 *and* 3: + ABC = ABCMeta(str("ABC"), (object,), {}) # compatible with Python 2 *and* 3: class _RecordingObserver(ABC, torch.nn.Module): """The module is mainly for debug and records the tensor values during runtime. @@ -1528,6 +1573,7 @@ class _RecordingObserver(ABC, torch.nn.Module): Args: iteration_list (list, optional): indexs of iteration which to dump tensor. """ + def __init__(self, iteration_list=None, **kwargs): super(_RecordingObserver, self).__init__(**kwargs) self.output_tensors_dict = OrderedDict() @@ -1535,15 +1581,15 @@ def __init__(self, iteration_list=None, **kwargs): self.iteration_list = iteration_list def forward(self, x): - if (self.iteration_list is None and self.current_iter == 1) or \ - (self.iteration_list is not None and - self.current_iter in self.iteration_list): + if (self.iteration_list is None and self.current_iter == 1) or ( + self.iteration_list is not None and self.current_iter in self.iteration_list + ): if type(x) is tuple or type(x) is list: - self.output_tensors_dict[self.current_iter] = \ - [i.to("cpu") if i.device != 'cpu' else i.clone() for i in x] + self.output_tensors_dict[self.current_iter] = [ + i.to("cpu") if i.device != "cpu" else i.clone() for i in x + ] else: - self.output_tensors_dict[self.current_iter] = \ - x.to("cpu") if x.device != "cpu" else x.clone() + self.output_tensors_dict[self.current_iter] = x.to("cpu") if x.device != "cpu" else x.clone() self.current_iter += 1 return x @@ -1554,7 +1600,7 @@ def get_tensor_value(self): with_args = classmethod(_with_args) def _observer_forward_hook(module, input, output): - """Forward hook that calls observer on the output + """Forward hook that calls observer on the output. Args: module (object): input module @@ -1583,26 +1629,25 @@ def _add_observer_(module, op_list=None, prefix=""): """ for name, child in module.named_children(): op_name = name if prefix == "" else prefix + "." + name - if isinstance(child, torch.nn.quantized.FloatFunctional) and \ - (op_list is None or op_name in op_list): - if hasattr(child, 'qconfig') and child.qconfig is not None and ( - op_list is None or op_name in op_list): - child.activation_post_process = \ - child.qconfig.activation() - elif hasattr(child, 'qconfig') and child.qconfig is not None and \ - (op_list is None or op_name in op_list): + if isinstance(child, torch.nn.quantized.FloatFunctional) and (op_list is None or op_name in op_list): + if ( + hasattr(child, "qconfig") + and child.qconfig is not None + and (op_list is None or op_name in op_list) + ): + child.activation_post_process = child.qconfig.activation() + elif ( + hasattr(child, "qconfig") and child.qconfig is not None and (op_list is None or op_name in op_list) + ): # observer and hook will be gone after we swap the module - child.add_module('activation_post_process', child.qconfig.activation()) + child.add_module("activation_post_process", child.qconfig.activation()) child.register_forward_hook(_observer_forward_hook) else: _add_observer_(child, op_list, op_name) - def _propagate_qconfig_helper(module, - qconfig_dict, - white_list=None, - qconfig_parent=None, - prefix='', - fused=False): + def _propagate_qconfig_helper( + module, qconfig_dict, white_list=None, qconfig_parent=None, prefix="", fused=False + ): """This is a helper function for `propagate_qconfig_` Args: @@ -1621,11 +1666,10 @@ def _propagate_qconfig_helper(module, None, module is modified inplace with qconfig attached """ module.qconfig = qconfig_parent - if hasattr(module, '_modules'): + if hasattr(module, "_modules"): for name, child in module.named_children(): - module_prefix = prefix + '.' + name if prefix else name - _propagate_qconfig_helper(child, qconfig_dict, white_list, qconfig_parent, - module_prefix) + module_prefix = prefix + "." + name if prefix else name + _propagate_qconfig_helper(child, qconfig_dict, white_list, qconfig_parent, module_prefix) def _prepare(model, inplace=True, op_list=[], white_list=None): """The model will be attached with observer or fake quant modules, and qconfig @@ -1643,22 +1687,22 @@ def _prepare(model, inplace=True, op_list=[], white_list=None): """ if not inplace: model = copy.deepcopy(model) - _propagate_qconfig_helper(model, - qconfig_dict={}, - white_list=white_list, - qconfig_parent=model.qconfig) + _propagate_qconfig_helper(model, qconfig_dict={}, white_list=white_list, qconfig_parent=model.qconfig) # sanity check common API misusage - if not any(hasattr(m, 'qconfig') and m.qconfig for m in model.modules()): # pragma: no cover - logger.warn("None of the submodule got qconfig applied. Make sure you " - "passed correct configuration through `qconfig_dict` or " - "by assigning the `.qconfig` attribute directly on submodules") + if not any(hasattr(m, "qconfig") and m.qconfig for m in model.modules()): # pragma: no cover + logger.warn( + "None of the submodule got qconfig applied. Make sure you " + "passed correct configuration through `qconfig_dict` or " + "by assigning the `.qconfig` attribute directly on submodules" + ) _add_observer_(model, op_list=op_list) return model model = model if model.is_quantized else copy.deepcopy(model) model._model.qconfig = torch.quantization.QConfig( weight=torch.quantization.default_debug_observer, - activation=_RecordingObserver.with_args(iteration_list=iteration_list)) + activation=_RecordingObserver.with_args(iteration_list=iteration_list), + ) _prepare(model._model, op_list=op_list) return model @@ -1674,18 +1718,12 @@ def is_fused_module(self, module): (bool): is fused or not """ op_type = str(type(module)) - if 'fused' in op_type: + if "fused" in op_type: return True else: return False - def calculate_hessian_trace(self, - fp32_model, - dataloader, - q_model, - criterion, - enable_act=False - ): + def calculate_hessian_trace(self, fp32_model, dataloader, q_model, criterion, enable_act=False): """Calculate hessian trace. Args: @@ -1699,17 +1737,26 @@ def calculate_hessian_trace(self, hessian_trace(Dict[Tuple, float]), key: (op_name, op_type); value: hessian trace. """ from .torch_utils.hawq_metric import hawq_top - op_to_traces = hawq_top(fp32_model=fp32_model, - dataloader=dataloader, - q_model=q_model, - criterion=criterion, - enable_act=enable_act) + + op_to_traces = hawq_top( + fp32_model=fp32_model, dataloader=dataloader, q_model=q_model, criterion=criterion, enable_act=enable_act + ) return op_to_traces - def smooth_quant(self, model, dataloader, calib_iter, alpha=0.5, folding=False, - percentile=None, op_types=None, scales_per_op=None, force_re_smooth=False, - record_max_info=False): - """ convert the model by smooth quant. + def smooth_quant( + self, + model, + dataloader, + calib_iter, + alpha=0.5, + folding=False, + percentile=None, + op_types=None, + scales_per_op=None, + force_re_smooth=False, + record_max_info=False, + ): + """Convert the model by smooth quant. Args: model: origin FP32 model @@ -1727,42 +1774,37 @@ def smooth_quant(self, model, dataloader, calib_iter, alpha=0.5, folding=False, model: A modified fp32 model, inplace=True. """ # Note: we should make sure smoothquant is only executed once with inplacing fp32 model. - if hasattr(model._model, '_smoothquant_optimized') and model._model._smoothquant_optimized: + if hasattr(model._model, "_smoothquant_optimized") and model._model._smoothquant_optimized: logger.info("The model is already optimized by SmoothQuant algorithm, skip it.") return model - if self.__class__.__name__ == 'PyTorch_IPEXAdaptor' and self.version.release < \ - Version("2.1").release: + if self.__class__.__name__ == "PyTorch_IPEXAdaptor" and self.version.release < Version("2.1").release: if folding is None: folding = True - logger.info( - "IPEX version >= 2.1 is required for SmoothQuant folding=False, reset folding=True.") + logger.info("IPEX version >= 2.1 is required for SmoothQuant folding=False, reset folding=True.") else: assert folding, "IPEX version >= 2.1 is required for SmoothQuant folding=False." - if not hasattr(self, 'sq') or force_re_smooth: + if not hasattr(self, "sq") or force_re_smooth: from .torch_utils.smooth_quant import TorchSmoothQuant - self.sq = TorchSmoothQuant(model._model, dataloader=dataloader, - example_inputs=self.example_inputs, q_func=self.q_func) + + self.sq = TorchSmoothQuant( + model._model, dataloader=dataloader, example_inputs=self.example_inputs, q_func=self.q_func + ) kwargs = {} ## different backends may have different default values - self.sq.record_max_info = record_max_info # whether record the max info of input and weight. - if op_types != None: + self.sq.record_max_info = record_max_info # whether record the max info of input and weight. + if op_types is not None: kwargs["op_types"] = op_types - if percentile != None: - kwargs['percentile'] = percentile - if scales_per_op != None: - kwargs['scales_per_op'] = scales_per_op - model._model = self.sq.transform( - alpha=alpha, - folding=folding, - calib_iter=calib_iter, - **kwargs - ) + if percentile is not None: + kwargs["percentile"] = percentile + if scales_per_op is not None: + kwargs["scales_per_op"] = scales_per_op + model._model = self.sq.transform(alpha=alpha, folding=folding, calib_iter=calib_iter, **kwargs) if self.sq.record_max_info: model.sq_max_info = self.sq.max_value_info return model def _apply_pre_optimization(self, model, tune_cfg, recover=False): - """update model parameters based on tune_cfg. + """Update model parameters based on tune_cfg. Args: model (torch.nn.Module): smoothquant optimized model. @@ -1776,15 +1818,16 @@ def _apply_pre_optimization(self, model, tune_cfg, recover=False): sq_max_info = model.sq_max_info if sq_max_info: from .torch_utils.smooth_quant import TorchSmoothQuant + tsq = TorchSmoothQuant(q_model, None) - alpha = tune_cfg['recipe_cfgs']['smooth_quant_args']['alpha'] + alpha = tune_cfg["recipe_cfgs"]["smooth_quant_args"]["alpha"] for op_name, info in sq_max_info.items(): - if alpha == 'auto': - alpha = info['alpha'] + if alpha == "auto": + alpha = info["alpha"] absorb_layer = op_name - absorbed_layer = info['absorbed_layer'] - input_minmax = info['input_minmax'] - weight_max = info['weight_max'] + absorbed_layer = info["absorbed_layer"] + input_minmax = info["input_minmax"] + weight_max = info["weight_max"] abs_input_max = torch.max(torch.abs(input_minmax[0]), torch.abs(input_minmax[1])) input_power = torch.pow(abs_input_max, alpha) weight_power = torch.pow(weight_max, 1 - alpha) @@ -1794,11 +1837,11 @@ def _apply_pre_optimization(self, model, tune_cfg, recover=False): scale = 1.0 / scale for layer in absorbed_layer: tsq._scale_layer_weight(layer, scale) - tsq._absorb_scales(absorb_layer, 1.0/scale) + tsq._absorb_scales(absorb_layer, 1.0 / scale) logger.debug(f"Current smoothquant scale of {op_name} is {scale}, alpha is {alpha}") def qdq_quantize(self, model, tune_cfg): - """insert quant, dequant pairs before linear to simulate quantization. + """Insert quant, dequant pairs before linear to simulate quantization. Args: model (torch.nn.Module): smoothquant optimized model. @@ -1808,64 +1851,70 @@ def qdq_quantize(self, model, tune_cfg): model: qdq quantized model. """ q_model = model._model - from .torch_utils.util import fetch_module, set_module from .torch_utils.model_wrapper import QDQLinear, SQLinearWrapper + from .torch_utils.util import fetch_module, set_module + smoothquant_scale_info = {} fallback_op_name_list = [] stats_result = {} - for (op_name, op_type), qconfig in tune_cfg['op'].items(): - if op_type == 'Linear' and qconfig['weight']['dtype'] != 'int8': + for (op_name, op_type), qconfig in tune_cfg["op"].items(): + if op_type == "Linear" and qconfig["weight"]["dtype"] != "int8": fallback_op_name_list.append(op_name) sq_max_info = model.sq_max_info if sq_max_info: - assert not q_model._smoothquant_optimized, \ - "The model is already optimized by smoothquant, cannot apply new alpha." + assert ( + not q_model._smoothquant_optimized + ), "The model is already optimized by smoothquant, cannot apply new alpha." for _, info in sq_max_info.items(): - alpha = info['alpha'] - absorbed_layer = info['absorbed_layer'] - input_minmax = info['input_minmax'] - weight_max = info['weight_max'] + alpha = info["alpha"] + absorbed_layer = info["absorbed_layer"] + input_minmax = info["input_minmax"] + weight_max = info["weight_max"] abs_input_max = torch.max(torch.abs(input_minmax[0]), torch.abs(input_minmax[1])) input_power = torch.pow(abs_input_max, alpha) weight_power = torch.pow(weight_max, 1 - alpha) scale = torch.clip(input_power / weight_power, min=1e-5) for op_name in absorbed_layer: module = fetch_module(q_model, op_name) - new_module = SQLinearWrapper(module, 1.0/scale, input_minmax, alpha) + new_module = SQLinearWrapper(module, 1.0 / scale, input_minmax, alpha) set_module(q_model, op_name, new_module) logger.debug(f"Current SmoothQuant alpha of {op_name} is {alpha}") - smoothquant_op_info = {'sq_linear': {}, 'qdq_linear': []} - stats_result['SQLinearWrapper'] = {'INT8(QDQ)': 0, 'BF16': 0, 'FP32': 0} + smoothquant_op_info = {"sq_linear": {}, "qdq_linear": []} + stats_result["SQLinearWrapper"] = {"INT8(QDQ)": 0, "BF16": 0, "FP32": 0} for name, module in q_model.named_modules(): if isinstance(module, SQLinearWrapper): - smoothquant_op_info['sq_linear'][name] = module.input_scale + smoothquant_op_info["sq_linear"][name] = module.input_scale if name not in fallback_op_name_list: smoothquant_scale_info[name] = { - 'input_scale_for_mul': module.input_scale, - 'quant_scale': module.scale, - 'quant_zero_point': module.zero_point, - 'quant_dtype': module.dtype, - } - smoothquant_op_info['qdq_linear'].append(name+'.sq_linear') + "input_scale_for_mul": module.input_scale, + "quant_scale": module.scale, + "quant_zero_point": module.zero_point, + "quant_dtype": module.dtype, + } + smoothquant_op_info["qdq_linear"].append(name + ".sq_linear") new_module = QDQLinear(module.sq_linear, module.scale, module.zero_point, module.dtype) - set_module(q_model, name+'.sq_linear', new_module) - stats_result['SQLinearWrapper']['INT8(QDQ)'] += 1 + set_module(q_model, name + ".sq_linear", new_module) + stats_result["SQLinearWrapper"]["INT8(QDQ)"] += 1 else: - stats_result['SQLinearWrapper']['FP32'] += 1 + stats_result["SQLinearWrapper"]["FP32"] += 1 - tune_cfg['recipe_cfgs']['smoothquant_op_info'] = smoothquant_op_info + tune_cfg["recipe_cfgs"]["smoothquant_op_info"] = smoothquant_op_info model._model = q_model model.q_config = copy.deepcopy(tune_cfg) - field_names=["Op Type", "Total", "INT8", "BF16", "FP32"] - output_data = [[ - op_type, sum(stats_result[op_type].values()), stats_result[op_type]['INT8(QDQ)'], - stats_result[op_type]['BF16'], stats_result[op_type]['FP32']] - for op_type in stats_result.keys()] - Statistics(output_data, - header='Mixed Precision Statistics', - field_names=field_names).print_stat() + field_names = ["Op Type", "Total", "INT8", "BF16", "FP32"] + output_data = [ + [ + op_type, + sum(stats_result[op_type].values()), + stats_result[op_type]["INT8(QDQ)"], + stats_result[op_type]["BF16"], + stats_result[op_type]["FP32"], + ] + for op_type in stats_result.keys() + ] + Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() return model @@ -1875,7 +1924,7 @@ def qdq_quantize(self, model, tune_cfg): "ConvReLU3d": "Conv3d", "LinearReLU": "Linear", "ConvBn2d": "Conv2d", - "ConvBnReLU2d": "Conv2d" + "ConvBnReLU2d": "Conv2d", } @@ -1886,6 +1935,7 @@ class PyTorchAdaptor(TemplateAdaptor): Args: framework_specific_info (dict): dictionary of tuning configure from yaml file. """ + def __init__(self, framework_specific_info): super(PyTorchAdaptor, self).__init__(framework_specific_info) """ @@ -1922,8 +1972,7 @@ def __init__(self, framework_specific_info): query_config_file = "pytorch_gpu.yaml" else: # pragma: no cover assert False, "Unsupport this device {}".format(self.device) - self.query_handler = PyTorchQuery( - local_config_file=os.path.join(os.path.dirname(__file__), query_config_file)) + self.query_handler = PyTorchQuery(local_config_file=os.path.join(os.path.dirname(__file__), query_config_file)) self.white_list = get_torch_white_list(self.approach) @@ -1945,27 +1994,27 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): Returns: (object): quantized model """ - assert isinstance(model._model, torch.nn.Module), \ - "The model passed in is not the instance of torch.nn.Module" + assert isinstance(model._model, torch.nn.Module), "The model passed in is not the instance of torch.nn.Module" if self.performance_only: q_model = model else: try: q_model = copy.deepcopy(model) except Exception as e: # pragma: no cover - logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format( - repr(e))) + logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format(repr(e))) q_model = model # For smoothquant optimized model - recipe_cfgs = tune_cfg.get('recipe_cfgs', None) - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) \ - and not recipe_cfgs['smooth_quant_args']['folding'] \ - and self.approach != 'post_training_dynamic_quant': + recipe_cfgs = tune_cfg.get("recipe_cfgs", None) + if ( + recipe_cfgs + and recipe_cfgs.get("smooth_quant", False) + and not recipe_cfgs["smooth_quant_args"]["folding"] + and self.approach != "post_training_dynamic_quant" + ): return self.qdq_quantize(q_model, tune_cfg) - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) \ - and recipe_cfgs['smooth_quant_args']['folding']: + if recipe_cfgs and recipe_cfgs.get("smooth_quant", False) and recipe_cfgs["smooth_quant_args"]["folding"]: self._apply_pre_optimization(q_model, tune_cfg) # For tensorboard display @@ -1974,8 +2023,8 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): self.tune_cfg["reduce_range"] = REDUCE_RANGE self.tune_cfg["framework"] = "pytorch" op_cfgs = _cfg_to_qconfig(tune_cfg, self.approach) - self.tune_cfg['bf16_ops_list'] = op_cfgs['bf16_ops_list'] - del op_cfgs['bf16_ops_list'] + self.tune_cfg["bf16_ops_list"] = op_cfgs["bf16_ops_list"] + del op_cfgs["bf16_ops_list"] gc.collect() if self.version.release < Version("2.0.0").release: @@ -1983,63 +2032,60 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): else: from torch.quantization.quantize import _add_observer_ as add_observer_ - if self.approach == 'quant_aware_training': + if self.approach == "quant_aware_training": q_model._model.train() else: q_model._model.eval() - if self.version.release < Version("1.7.0").release or \ - self.approach != 'quant_aware_training': + if self.version.release < Version("1.7.0").release or self.approach != "quant_aware_training": _propagate_qconfig(q_model._model, op_cfgs, approach=self.approach) # sanity check common API misusage - if not any(hasattr(m, 'qconfig') and m.qconfig for m in q_model._model.modules()): - logger.warn("None of the submodule got qconfig applied. Make sure you " - "passed correct configuration through `qconfig_dict` or " - "by assigning the `.qconfig` attribute directly on submodules.") + if not any(hasattr(m, "qconfig") and m.qconfig for m in q_model._model.modules()): + logger.warn( + "None of the submodule got qconfig applied. Make sure you " + "passed correct configuration through `qconfig_dict` or " + "by assigning the `.qconfig` attribute directly on submodules." + ) - if self.approach in ['post_training_static_quant', 'post_training_auto_quant']: + if self.approach in ["post_training_static_quant", "post_training_auto_quant"]: add_observer_(q_model._model) if q_func is None: - iterations = tune_cfg.get('calib_iteration', 1) - self.model_calibration(q_model._model, - dataloader, - iterations, - calib_sampling_size=tune_cfg.get('calib_sampling_size', 1)) + iterations = tune_cfg.get("calib_iteration", 1) + self.model_calibration( + q_model._model, dataloader, iterations, calib_sampling_size=tune_cfg.get("calib_sampling_size", 1) + ) else: q_func(q_model._model) - elif self.approach == 'quant_aware_training': + elif self.approach == "quant_aware_training": if self.version.release >= Version("1.7.0").release: _propagate_qconfig(q_model._model, op_cfgs, is_qat_convert=True) - torch.quantization.convert(q_model._model, - mapping=self.q_mapping, - inplace=True, - remove_qconfig=False) + torch.quantization.convert(q_model._model, mapping=self.q_mapping, inplace=True, remove_qconfig=False) _propagate_qconfig(q_model._model, op_cfgs) - add_observer_(q_model._model, self.white_list, - set(self.q_mapping.values())) + add_observer_(q_model._model, self.white_list, set(self.q_mapping.values())) else: # pragma: no cover add_observer_(q_model._model) torch.quantization.convert(q_model._model, self.q_mapping, inplace=True) # q_func can be created by neural_compressor internal or passed by user. It's critical to # distinguish how q_func is passed since neural_compressor built-in functions accept neural_compressor # model and user defined func should accept framework model. - q_model._model = q_func( - q_model if getattr(q_func, 'builtin', None) else q_model._model) + q_model._model = q_func(q_model if getattr(q_func, "builtin", None) else q_model._model) assert q_model._model is not None, "Please return a trained model in train function!" q_model._model.eval() - if self.approach == 'quant_aware_training': + if self.approach == "quant_aware_training": torch.quantization.convert(q_model._model, inplace=True) else: torch.quantization.convert(q_model._model, mapping=self.q_mapping, inplace=True) - if len(self.tune_cfg['bf16_ops_list']) > 0 and \ - (self.version.release >= Version("1.11.0").release) and \ - (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + if ( + len(self.tune_cfg["bf16_ops_list"]) > 0 + and (self.version.release >= Version("1.11.0").release) + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + ): # pragma: no cover q_model._model = torch_utils.bf16_convert.Convert(q_model._model, self.tune_cfg) self.fused_dict = self.get_fused_list(q_model.model) q_model.q_config = copy.deepcopy(self.tune_cfg) - if self.approach != 'post_training_dynamic_quant': + if self.approach != "post_training_dynamic_quant": self._get_scale_zeropoint(q_model._model, q_model.q_config) q_model.is_quantized = True @@ -2047,15 +2093,17 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): torch_utils.util.get_embedding_contiguous(q_model._model) return q_model - def evaluate(self, - model, - dataloader, - postprocess=None, - metrics=None, - measurer=None, - iteration=-1, - tensorboard=False, - fp32_baseline=False): + def evaluate( + self, + model, + dataloader, + postprocess=None, + metrics=None, + measurer=None, + iteration=-1, + tensorboard=False, + fp32_baseline=False, + ): """Execute the evaluate process on the specified model. Args: @@ -2076,8 +2124,7 @@ def evaluate(self, model = self._pre_eval_hook(model) model_ = model._model - assert isinstance( - model_, torch.nn.Module), "The model passed in is not the instance of torch.nn.Module" + assert isinstance(model_, torch.nn.Module), "The model passed in is not the instance of torch.nn.Module" model_.eval() if self.device == "cpu": model_.to("cpu") @@ -2086,8 +2133,9 @@ def evaluate(self, model_.to("dpcpp") if metrics: - self.fp32_preds_as_label = any([hasattr(metric, "compare_label") and \ - not metric.compare_label for metric in metrics]) + self.fp32_preds_as_label = any( + [hasattr(metric, "compare_label") and not metric.compare_label for metric in metrics] + ) acc = self.model_eval(model_, dataloader, postprocess, metrics, measurer, iteration) if tensorboard: @@ -2097,36 +2145,44 @@ def evaluate(self, def _pre_hook_for_qat(self, dataloader=None): # self.model._model is needed here. self.model._model.qconfig = torch.quantization.QConfig( - activation=torch.quantization.FakeQuantize.with_args(dtype=torch.quint8, - qscheme=torch.per_tensor_affine, - reduce_range=REDUCE_RANGE), - weight=torch.quantization.default_weight_fake_quant) + activation=torch.quantization.FakeQuantize.with_args( + dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=REDUCE_RANGE + ), + weight=torch.quantization.default_weight_fake_quant, + ) self.non_quant_dict = self.get_non_quant_modules(self.model.kwargs) quantizable_ops = [] - self._get_quantizable_ops_recursively(self.model._model, '', quantizable_ops) + self._get_quantizable_ops_recursively(self.model._model, "", quantizable_ops) bf16_ops = [] - if self.version.release >= Version("1.11.0").release and self.use_bf16 and \ - (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + if ( + self.version.release >= Version("1.11.0").release + and self.use_bf16 + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + ): # pragma: no cover self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16") - self._get_bf16_ops_recursively(self.model._model, '', bf16_ops) + self._get_bf16_ops_recursively(self.model._model, "", bf16_ops) bf16_ops_list = [(op) for op in bf16_ops if op not in quantizable_ops] self.model.model.training = True torch.quantization.prepare_qat(self.model._model, inplace=True) # This is a flag for reloading self.model.q_config = { - 'is_oneshot': True, - 'framework': 'pytorch', - 'reduce_range': REDUCE_RANGE, - 'approach': 'quant_aware_training', - 'bf16_ops_list': bf16_ops_list, + "is_oneshot": True, + "framework": "pytorch", + "reduce_range": REDUCE_RANGE, + "approach": "quant_aware_training", + "bf16_ops_list": bf16_ops_list, } def _post_hook_for_qat(self): torch.quantization.convert(self.model._model, inplace=True) - if self.model.q_config is not None and len(self.model.q_config['bf16_ops_list']) > 0 and \ - self.version.release >= Version("1.11.0").release and self.use_bf16 and \ - (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + if ( + self.model.q_config is not None + and len(self.model.q_config["bf16_ops_list"]) > 0 + and self.version.release >= Version("1.11.0").release + and self.use_bf16 + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + ): # pragma: no cover self.model._model = torch_utils.bf16_convert.Convert(self.model._model, self.model.q_config) def _pre_hook_for_hvd(self, dataloader=None): @@ -2134,8 +2190,7 @@ def _pre_hook_for_hvd(self, dataloader=None): hvd.init() hvd.broadcast_parameters(self.model._model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(self.optimizer, root_rank=0) - self.optimizer = hvd.DistributedOptimizer( - self.optimizer, named_parameters=self.model._model.named_parameters()) + self.optimizer = hvd.DistributedOptimizer(self.optimizer, named_parameters=self.model._model.named_parameters()) def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kwargs): """Execute the train process on the specified model. @@ -2157,18 +2212,18 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kw optimizer = optimizer_tuple[0](model_.parameters(), **optimizer_tuple[1]) self.optimizer = optimizer criterion = criterion_tuple[0](**criterion_tuple[1]) - start_epochs = kwargs['kwargs']['start_epoch'] - end_epochs = kwargs['kwargs']['end_epoch'] - iters = kwargs['kwargs']['iteration'] + start_epochs = kwargs["kwargs"]["start_epoch"] + end_epochs = kwargs["kwargs"]["end_epoch"] + iters = kwargs["kwargs"]["iteration"] if hooks is not None: - on_train_begin = hooks['on_train_begin'] - on_train_end = hooks['on_train_end'] - on_epoch_begin = hooks['on_epoch_begin'] - on_epoch_end = hooks['on_epoch_end'] - on_step_begin = hooks['on_step_begin'] - on_step_end = hooks['on_step_end'] - on_after_compute_loss = hooks['on_after_compute_loss'] - on_before_optimizer_step = hooks['on_before_optimizer_step'] + on_train_begin = hooks["on_train_begin"] + on_train_end = hooks["on_train_end"] + on_epoch_begin = hooks["on_epoch_begin"] + on_epoch_end = hooks["on_epoch_end"] + on_step_begin = hooks["on_step_begin"] + on_step_end = hooks["on_step_end"] + on_after_compute_loss = hooks["on_after_compute_loss"] + on_before_optimizer_step = hooks["on_before_optimizer_step"] if hooks is not None: on_train_begin() for nepoch in range(start_epochs, end_epochs): @@ -2177,16 +2232,16 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kw cnt = 0 if hooks is not None: on_epoch_begin(nepoch) - if getattr(dataloader, 'distributed', False) \ - or isinstance(dataloader.sampler, \ - torch.utils.data.distributed.DistributedSampler): + if getattr(dataloader, "distributed", False) or isinstance( + dataloader.sampler, torch.utils.data.distributed.DistributedSampler + ): dataloader.sampler.set_epoch(nepoch) for image, target in dataloader: # TODO: to support adjust lr with epoch target = target.to(device) if hooks is not None: on_step_begin(cnt) - print('.', end='', flush=True) + print(".", end="", flush=True) cnt += 1 output = pytorch_forward_wrapper(model_, image, device=device) loss = criterion(output, target) @@ -2214,6 +2269,7 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kw def _dump_model_op_stats(self, model, tune_cfg): """This is a function to dump quantizable ops of model to user. + Args: model (object): input model tune_cfg (dict): quantization config @@ -2224,55 +2280,54 @@ def _dump_model_op_stats(self, model, tune_cfg): ignore_log = False modules = dict(model.named_modules()) # fetch quantizable ops supported in Neural Compressor from tune_cfg - for key in tune_cfg['op']: + for key in tune_cfg["op"]: op_name = key[0] - op_type = str(type(modules[op_name])).rstrip('\'>').split('.')[-1] - if op_type == 'BF16ModuleWrapper': # pragma: no cover - op_type = str(type(modules[op_name].module)).rstrip('\'>').split('.')[-1] - if op_type == 'DequantQuantWrapper': - op_type = str(type(modules[op_name].module)).rstrip('\'>').split('.')[-1] - if 'Functional' in op_type: - op_type = op_name.split('.')[-1] + op_type = str(type(modules[op_name])).rstrip("'>").split(".")[-1] + if op_type == "BF16ModuleWrapper": # pragma: no cover + op_type = str(type(modules[op_name].module)).rstrip("'>").split(".")[-1] + if op_type == "DequantQuantWrapper": + op_type = str(type(modules[op_name].module)).rstrip("'>").split(".")[-1] + if "Functional" in op_type: + op_type = op_name.split(".")[-1] if op_type not in res.keys(): - res[op_type] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - value = tune_cfg['op'][key] + res[op_type] = {"INT8": 0, "BF16": 0, "FP32": 0} + value = tune_cfg["op"][key] # Special cases: QuantStub, Embedding - if ('weight' in value and value['weight']['dtype'] == 'fp32') or \ - ('weight' not in value and value['activation']['dtype'] == 'fp32'): - res[op_type]['FP32'] += 1 - elif value['activation']['dtype'] == 'bf16': # pragma: no cover - res[op_type]['BF16'] += 1 + if ("weight" in value and value["weight"]["dtype"] == "fp32") or ( + "weight" not in value and value["activation"]["dtype"] == "fp32" + ): + res[op_type]["FP32"] += 1 + elif value["activation"]["dtype"] == "bf16": # pragma: no cover + res[op_type]["BF16"] += 1 else: - res[op_type]['INT8'] += 1 + res[op_type]["INT8"] += 1 # fetch other quantizable ops supported in PyTorch from model for name, child in modules.items(): - op_type = str(type(child)).rstrip('\'>').split('.')[-1] - if tune_cfg['approach'] != 'post_training_dynamic_quant': - if op_type == 'DeQuantize': + op_type = str(type(child)).rstrip("'>").split(".")[-1] + if tune_cfg["approach"] != "post_training_dynamic_quant": + if op_type == "DeQuantize": if op_type not in res.keys(): - res[op_type] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - res[op_type]['INT8'] += 1 - if op_type in self.non_quant_dict['skipped_module_classes']: + res[op_type] = {"INT8": 0, "BF16": 0, "FP32": 0} + res[op_type]["INT8"] += 1 + if op_type in self.non_quant_dict["skipped_module_classes"]: ignore_log = True if op_type not in res.keys(): - res[op_type] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - res[op_type]['FP32'] += 1 + res[op_type] = {"INT8": 0, "BF16": 0, "FP32": 0} + res[op_type]["FP32"] += 1 # show results to users if ignore_log: - logger.info("Ignore LayerNorm, InstanceNorm3d and Embedding quantizable ops" \ - " due to accuracy issue in PyTorch.") - - field_names=["Op Type", "Total", "INT8", "BF16", "FP32"] - output_data = [[ - op_type, sum(res[op_type].values()), - res[op_type]['INT8'], res[op_type]['BF16'], res[op_type]['FP32']] - for op_type in res.keys()] - - Statistics(output_data, - header='Mixed Precision Statistics', - field_names=field_names).print_stat() - self.optype_statistics = field_names, output_data + logger.info( + "Ignore LayerNorm, InstanceNorm3d and Embedding quantizable ops" " due to accuracy issue in PyTorch." + ) + + field_names = ["Op Type", "Total", "INT8", "BF16", "FP32"] + output_data = [ + [op_type, sum(res[op_type].values()), res[op_type]["INT8"], res[op_type]["BF16"], res[op_type]["FP32"]] + for op_type in res.keys() + ] + Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() + self.optype_statistics = field_names, output_data def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): """This is a helper function for `query_fw_capability`, @@ -2290,25 +2345,33 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): for op_name, child in model.named_modules(): if self.is_fused_module(child): for name, _ in child.named_children(): - module_prefix = op_name + '.' + name + module_prefix = op_name + "." + name if module_prefix in module_dict: module_dict.pop(module_prefix) # remove sub-modules of fused modules for op_name, child in module_dict.items(): # there is accuracy issue in quantized LayerNorm op in pytorch <1.8.1, # so remove it here - if op_name in self.non_quant_dict['skipped_module_names'] or \ - str(child.__class__.__name__) in \ - self.non_quant_dict['skipped_module_classes']: + if ( + op_name in self.non_quant_dict["skipped_module_names"] + or str(child.__class__.__name__) in self.non_quant_dict["skipped_module_classes"] + ): continue - if type(child) in self.white_list and type(child) != torch.nn.Sequential and \ - type(child) != torch.quantization.stubs.DeQuantStub: + if ( + type(child) in self.white_list + and type(child) != torch.nn.Sequential + and type(child) != torch.quantization.stubs.DeQuantStub + ): quantizable_ops.append( - (op_name, unify_op_type_mapping[str(child.__class__.__name__)] - if str(child.__class__.__name__) in unify_op_type_mapping else str( - child.__class__.__name__))) + ( + op_name, + unify_op_type_mapping[str(child.__class__.__name__)] + if str(child.__class__.__name__) in unify_op_type_mapping + else str(child.__class__.__name__), + ) + ) def _get_scale_zeropoint(self, model, tune_cfg): - """get activation scale and zero_point for converted model. + """Get activation scale and zero_point for converted model. Args: model (dir): Int8 model converted from fp32 model. @@ -2320,11 +2383,11 @@ def _get_scale_zeropoint(self, model, tune_cfg): None """ modules = dict(model.named_modules()) - for key, value in tune_cfg['op'].items(): - if hasattr(modules[key[0]], 'scale'): - value['activation']['scale'] = float(modules[key[0]].scale) - if hasattr(modules[key[0]], 'zero_point'): - value['activation']['zero_point'] = int(modules[key[0]].zero_point) + for key, value in tune_cfg["op"].items(): + if hasattr(modules[key[0]], "scale"): + value["activation"]["scale"] = float(modules[key[0]].scale) + if hasattr(modules[key[0]], "zero_point"): + value["activation"]["zero_point"] = int(modules[key[0]].zero_point) def is_fused_child(self, op_name): """This is a helper function for `_post_eval_hook` @@ -2334,14 +2397,12 @@ def is_fused_child(self, op_name): Returns: (bool): if this op is fused - """ for key in self.fused_dict: if op_name in self.fused_dict[key]: return True return False - def _post_eval_hook(self, model, **args): """The function is used to do some post process after complete evaluation. Here, it used to dump quantizable op's output tensor. @@ -2353,6 +2414,7 @@ def _post_eval_hook(self, model, **args): None """ from torch.utils.tensorboard import SummaryWriter + if self.version.release >= Version("2.0.0").release: from torch.quantization.quantize import _get_observer_dict as get_observer_dict else: @@ -2360,19 +2422,18 @@ def _post_eval_hook(self, model, **args): model = model._model - if args is not None and 'accuracy' in args: - accuracy = args['accuracy'] + if args is not None and "accuracy" in args: + accuracy = args["accuracy"] else: - accuracy = '' + accuracy = "" if self.dump_times == 0: - writer = SummaryWriter('runs/eval/baseline' + '_acc' + str(accuracy), model) + writer = SummaryWriter("runs/eval/baseline" + "_acc" + str(accuracy), model) else: - writer = SummaryWriter( - 'runs/eval/tune_' + str(self.dump_times) + '_acc' + str(accuracy), model) + writer = SummaryWriter("runs/eval/tune_" + str(self.dump_times) + "_acc" + str(accuracy), model) if self.dump_times == 0: - for (input, _) in self.q_dataloader: + for input, _ in self.q_dataloader: if isinstance(input, dict) or isinstance(input, UserDict): if self.device == "gpu": for inp in input.keys(): @@ -2405,8 +2466,7 @@ def _post_eval_hook(self, model, **args): op = op_name if summary[op_name + ".output"][iter].is_quantized: - writer.add_histogram(op + "/Output/int8", - torch.dequantize(summary[op_name + ".output"][iter])) + writer.add_histogram(op + "/Output/int8", torch.dequantize(summary[op_name + ".output"][iter])) else: writer.add_histogram(op + "/Output/fp32", summary[op_name + ".output"][iter]) @@ -2414,17 +2474,17 @@ def _post_eval_hook(self, model, **args): for key in state_dict: if not isinstance(state_dict[key], torch.Tensor): continue - op = key[:key.rfind('.')] + op = key[: key.rfind(".")] if self.is_fused_child(op) is True: # fused child tensorboard tag will be merge - weight = key[key.rfind('.') + 1:] - op = op[:op.rfind('.')] + '/' + weight + weight = key[key.rfind(".") + 1 :] + op = op[: op.rfind(".")] + "/" + weight else: - weight = key[key.rfind('.') + 1:] - op = key[:key.rfind('.')] + '/' + weight + weight = key[key.rfind(".") + 1 :] + op = key[: key.rfind(".")] + "/" + weight # To merge ._packed_params - op = op.replace('._packed_params', '') + op = op.replace("._packed_params", "") if state_dict[key].is_quantized: writer.add_histogram(op + "/int8", torch.dequantize(state_dict[key])) @@ -2444,10 +2504,10 @@ def set_tensor(self, model, tensor_dict): state_dict = model._model.state_dict() tensor_name = None for key in tensor_dict.keys(): - end = key.rfind('.') + end = key.rfind(".") op_name = key[:end] state_op_name = None - weight_bias = key[end + 1:] + weight_bias = key[end + 1 :] for op in self.fused_dict: if op_name in self.fused_dict[op]: if model.is_quantized: @@ -2455,32 +2515,27 @@ def set_tensor(self, model, tensor_dict): else: state_op_name = self.fused_dict[op][0] # elif op_name in self.fused_dict[op]: - # state_op_name = op + # state_op_name = op if state_op_name is None: state_op_name = op_name for state_dict_key in state_dict.keys(): - state_key_end = state_dict_key.rfind('.') - state_key = state_dict_key[:state_key_end].replace('._packed_params', '') + state_key_end = state_dict_key.rfind(".") + state_key = state_dict_key[:state_key_end].replace("._packed_params", "") if weight_bias in state_dict_key and state_op_name == state_key: tensor_name = state_dict_key assert tensor_name is not None, key + " is not in the state dict" tensor = torch.from_numpy(tensor_dict[key]) dtype = state_dict[tensor_name].dtype if state_dict[tensor_name].is_quantized: - if 'channel' in str(state_dict[tensor_name].qscheme()): + if "channel" in str(state_dict[tensor_name].qscheme()): scales = state_dict[tensor_name].q_per_channel_scales() zero_points = state_dict[tensor_name].q_per_channel_zero_points() axis = state_dict[tensor_name].q_per_channel_axis() - state_dict[tensor_name] = torch.quantize_per_channel(tensor, - scales, - zero_points, - axis, - dtype=dtype) - elif 'tensor' in str(state_dict[tensor_name].qscheme()): + state_dict[tensor_name] = torch.quantize_per_channel(tensor, scales, zero_points, axis, dtype=dtype) + elif "tensor" in str(state_dict[tensor_name].qscheme()): scales = state_dict[tensor_name].q_scale() zero_points = state_dict[tensor_name].q_zero_point() - state_dict[tensor_name] = torch.quantize_per_tensor( - tensor, scales, zero_points, dtype) + state_dict[tensor_name] = torch.quantize_per_tensor(tensor, scales, zero_points, dtype) else: state_dict[tensor_name] = tensor model._model.load_state_dict(state_dict) @@ -2513,15 +2568,15 @@ def get_non_quant_modules(self, model_kwargs): skipped_module_names = model_kwargs.get("non_quant_module_name", []) skipped_module_classes = model_kwargs.get("non_quant_module_class", []) custom_non_quant_dict = { - 'skipped_module_names': skipped_module_names, - 'skipped_module_classes': skipped_module_classes + "skipped_module_names": skipped_module_names, + "skipped_module_classes": skipped_module_classes, } # Ignore LayerNorm, InstanceNorm3d and Embedding quantizable ops, # due to huge accuracy regression in PyTorch. - additional_skipped_module_classes = ['LayerNorm', 'InstanceNorm3d', 'Embedding', 'Dropout'] - if self.approach == 'post_training_dynamic_quant': - additional_skipped_module_classes.remove('Embedding') - custom_non_quant_dict['skipped_module_classes'] += additional_skipped_module_classes + additional_skipped_module_classes = ["LayerNorm", "InstanceNorm3d", "Embedding", "Dropout"] + if self.approach == "post_training_dynamic_quant": + additional_skipped_module_classes.remove("Embedding") + custom_non_quant_dict["skipped_module_classes"] += additional_skipped_module_classes return custom_non_quant_dict @@ -2538,9 +2593,7 @@ def get_non_quant_modules(self, model_kwargs): "Linear_Relu": "Linear", "": "Linear", "": "MaxPool2d", - 're': { - "= 1.10.0" + assert self.approach in [ + "post_training_static_quant", + "post_training_auto_quant", + ], "IPEX in INC only supports approach is static or auto" + assert not self.version.release < Version("1.10.0").release, "INC support IPEX version >= 1.10.0" # check smoothquant folding value - recipe_cfgs = tune_cfg.get('recipe_cfgs', None) - if 'smooth_quant_args' in recipe_cfgs and 'folding' in recipe_cfgs['smooth_quant_args']: - if recipe_cfgs['smooth_quant_args']['folding'] is None: + recipe_cfgs = tune_cfg.get("recipe_cfgs", None) + if "smooth_quant_args" in recipe_cfgs and "folding" in recipe_cfgs["smooth_quant_args"]: + if recipe_cfgs["smooth_quant_args"]["folding"] is None: if self.version.release < Version("2.1").release: folding = True else: - folding=False + folding = False else: - folding = recipe_cfgs['smooth_quant_args']['folding'] + folding = recipe_cfgs["smooth_quant_args"]["folding"] # Update model parameter when smoothquant folding = False - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) \ - and not folding and self.approach != 'post_training_dynamic_quant': + if ( + recipe_cfgs + and recipe_cfgs.get("smooth_quant", False) + and not folding + and self.approach != "post_training_dynamic_quant" + ): return self.qdq_quantize(model, q_model, tune_cfg, dataloader, q_func) # Update model parameter when smoothquant folding = True - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) and folding: + if recipe_cfgs and recipe_cfgs.get("smooth_quant", False) and folding: self._apply_pre_optimization(model, tune_cfg) - assert self.approach != 'quant_aware_training', \ - "Intel PyTorch Extension didn't support quantization aware training mode" - assert not self.version.release < Version("1.10.0").release, \ - "INC support IPEX version >= 1.10.0" + assert ( + self.approach != "quant_aware_training" + ), "Intel PyTorch Extension didn't support quantization aware training mode" + assert not self.version.release < Version("1.10.0").release, "INC support IPEX version >= 1.10.0" qscheme = self._cfg_to_qconfig(tune_cfg) # Update json file in self.ipex_config_path - iterations = tune_cfg.get('calib_iteration', 1) + iterations = tune_cfg.get("calib_iteration", 1) model._model.eval() inplace = True if self.performance_only else False if self.version.release >= Version("1.12.0").release: # Check save_qconf_summary part is a workaroud for IPEX bug. # Sometimes the prepared model from get_op_capablitiy loss this attribute - if not hasattr(model._model, "save_qconf_summary") or \ - not hasattr(model._model, "load_qconf_summary"): + if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"): from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig + if self.version.release >= Version("2.1").release: static_qconfig = ipex.quantization.default_static_qconfig_mapping else: - static_qconfig = QConfig(activation=MinMaxObserver.with_args( - qscheme=torch.per_tensor_affine, dtype=torch.quint8), - weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, \ - qscheme=torch.per_channel_symmetric)) + static_qconfig = QConfig( + activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8), + weight=PerChannelMinMaxObserver.with_args( + dtype=torch.qint8, qscheme=torch.per_channel_symmetric + ), + ) if isinstance(self.example_inputs, dict): - model._model = ipex.quantization.prepare(model._model, static_qconfig, - example_kwarg_inputs=self.example_inputs, inplace=inplace) + model._model = ipex.quantization.prepare( + model._model, static_qconfig, example_kwarg_inputs=self.example_inputs, inplace=inplace + ) else: - model._model = ipex.quantization.prepare(model._model, static_qconfig, - example_inputs=self.example_inputs, inplace=inplace) + model._model = ipex.quantization.prepare( + model._model, static_qconfig, example_inputs=self.example_inputs, inplace=inplace + ) model._model.load_qconf_summary(qconf_summary=self.ipex_config_path) if q_func is not None: q_func(model._model) else: - self.model_calibration(model._model, dataloader, iterations, None, - tune_cfg.get('calib_sampling_size', 1)) + self.model_calibration( + model._model, dataloader, iterations, None, tune_cfg.get("calib_sampling_size", 1) + ) model._model.save_qconf_summary(qconf_summary=self.ipex_config_path) self._ipex_post_quant_process(model, q_model, dataloader, inplace=inplace) else: # for IPEX version < 1.12 - ipex_conf = ipex.quantization.QuantConf(configure_file=self.ipex_config_path, - qscheme=qscheme) # pylint: disable=E1101 - self.model_calibration(q_model._model, dataloader, iterations, ipex_conf, - tune_cfg.get('calib_sampling_size', 1)) + ipex_conf = ipex.quantization.QuantConf( + configure_file=self.ipex_config_path, qscheme=qscheme + ) # pylint: disable=E1101 + self.model_calibration( + q_model._model, dataloader, iterations, ipex_conf, tune_cfg.get("calib_sampling_size", 1) + ) ipex_conf.save(self.ipex_config_path) - ipex_conf = ipex.quantization.QuantConf(self.ipex_config_path) # pylint: disable=E1101 - q_model._model = ipex.quantization.convert(q_model._model, - ipex_conf, - self.example_inputs, - inplace=True) # pylint: disable=E1121 + ipex_conf = ipex.quantization.QuantConf(self.ipex_config_path) # pylint: disable=E1101 + q_model._model = ipex.quantization.convert( + q_model._model, ipex_conf, self.example_inputs, inplace=True + ) # pylint: disable=E1121 # Recover model parameter when smoothquant folding = True, due to IPEX bug #1 - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) \ - and recipe_cfgs['smooth_quant_args']['folding'] and not inplace: + if ( + recipe_cfgs + and recipe_cfgs.get("smooth_quant", False) + and recipe_cfgs["smooth_quant_args"]["folding"] + and not inplace + ): self._apply_pre_optimization(model, tune_cfg, recover=True) - with open(self.ipex_config_path, 'r') as f: + with open(self.ipex_config_path, "r") as f: q_model.tune_cfg = json.load(f) q_model.ipex_config_path = self.ipex_config_path if self.version.release >= Version("1.12.0").release: @@ -2683,8 +2749,11 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): return q_model def _ipex_post_quant_process(self, model, q_model, dataloader, inplace=False): - if self.use_bf16 and (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') and \ - (self.version.release >= Version("1.11.0").release): + if ( + self.use_bf16 + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + and (self.version.release >= Version("1.11.0").release) + ): with torch.no_grad(): with torch.cpu.amp.autocast(): q_model._model = ipex.quantization.convert(model._model, inplace=inplace) @@ -2696,8 +2765,9 @@ def _ipex_post_quant_process(self, model, q_model, dataloader, inplace=False): q_model._model = torch.jit.freeze(q_model._model.eval()) except: if isinstance(self.example_inputs, dict): - q_model._model = torch.jit.trace(q_model._model, example_kwarg_inputs=self.example_inputs, - strict=False) + q_model._model = torch.jit.trace( + q_model._model, example_kwarg_inputs=self.example_inputs, strict=False + ) else: q_model._model = torch.jit.trace(q_model._model, self.example_inputs, strict=False) q_model._model = torch.jit.freeze(q_model._model.eval()) @@ -2712,8 +2782,9 @@ def _ipex_post_quant_process(self, model, q_model, dataloader, inplace=False): q_model._model = torch.jit.freeze(q_model._model.eval()) except: if isinstance(self.example_inputs, dict): - q_model._model = torch.jit.trace(q_model._model, example_kwarg_inputs=self.example_inputs, - strict=False) + q_model._model = torch.jit.trace( + q_model._model, example_kwarg_inputs=self.example_inputs, strict=False + ) else: q_model._model = torch.jit.trace(q_model._model, self.example_inputs, strict=False) q_model._model = torch.jit.freeze(q_model._model.eval()) @@ -2724,6 +2795,7 @@ def _ipex_post_quant_process(self, model, q_model, dataloader, inplace=False): def _dump_model_op_stats(self, tune_cfg): """This is a function to dump quantizable ops of model to user. + Args: tune_cfg (dict): quantization config Returns: @@ -2735,17 +2807,26 @@ def _dump_model_op_stats(self, tune_cfg): op_type = "" for op in op_type_list: if "class" in op: - op_type = op[op.rfind(".") + 1: op.rfind("'")] \ - if op_type == "" else op_type + "&" + op[op.rfind(".") + 1: op.rfind("'")] + op_type = ( + op[op.rfind(".") + 1 : op.rfind("'")] + if op_type == "" + else op_type + "&" + op[op.rfind(".") + 1 : op.rfind("'")] + ) elif "method" in op: start = op.find("'") + 1 if start > 1: - op_type = op[start: op.find("'", start)] \ - if op_type == "" else op_type + "&" + op[start: op.find("'", start)] + op_type = ( + op[start : op.find("'", start)] + if op_type == "" + else op_type + "&" + op[start : op.find("'", start)] + ) else: start = op.find("method") + 7 - op_type = op[start: op.find(" ", start)] \ - if op_type == "" else op_type + "&" + op[start: op.find(" ", start)] + op_type = ( + op[start : op.find(" ", start)] + if op_type == "" + else op_type + "&" + op[start : op.find(" ", start)] + ) else: op_type = op if op_type == "" else op_type + "&" + op if op_type not in res.keys(): @@ -2755,72 +2836,71 @@ def _dump_model_op_stats(self, tune_cfg): elif v["weight"]["dtype"] == "fp32": res[op_type]["FP32"] += 1 - output_data = [[ - op_type, - sum(res[op_type].values()), res[op_type]['INT8'], res[op_type]['BF16'], - res[op_type]['FP32'] - ] for op_type in res.keys()] + output_data = [ + [op_type, sum(res[op_type].values()), res[op_type]["INT8"], res[op_type]["BF16"], res[op_type]["FP32"]] + for op_type in res.keys() + ] - Statistics(output_data, - header='Mixed Precision Statistics', - field_names=["Op Type", "Total", "INT8", "BF16", "FP32"]).print_stat() + Statistics( + output_data, header="Mixed Precision Statistics", field_names=["Op Type", "Total", "INT8", "BF16", "FP32"] + ).print_stat() def _cfg_to_qconfig(self, tune_cfg): """Convert tune configure to quantization config for each op. - Args: - tune_cfg (dict): dictionary of tune configure for each op - ipex_config_path: configure file of Intel PyTorch Extension - - tune_cfg should be a format like below: - { - 'calib_iteration': 10, - 'op': { - ('op1', 'CONV2D'): { - 'activation': {'dtype': 'uint8', - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'weight': {'dtype': 'int8', - 'algorithm': 'kl', - 'scheme':'asym', - 'granularity': 'per_channel'} - }, - ('op2', 'RELU): { - 'activation': {'dtype': 'int8', - 'scheme': 'asym', - 'granularity': 'per_tensor', - 'algorithm': 'minmax'} - }, - ('op3', 'CONV2D'): { - 'activation': {'dtype': 'fp32'}, - 'weight': {'dtype': 'fp32'} - }, - ... - } - } + Args: + tune_cfg (dict): dictionary of tune configure for each op + ipex_config_path: configure file of Intel PyTorch Extension + + tune_cfg should be a format like below: + { + 'calib_iteration': 10, + 'op': { + ('op1', 'CONV2D'): { + 'activation': {'dtype': 'uint8', + 'algorithm': 'minmax', + 'scheme':'sym', + 'granularity': 'per_tensor'}, + 'weight': {'dtype': 'int8', + 'algorithm': 'kl', + 'scheme':'asym', + 'granularity': 'per_channel'} + }, + ('op2', 'RELU): { + 'activation': {'dtype': 'int8', + 'scheme': 'asym', + 'granularity': 'per_tensor', + 'algorithm': 'minmax'} + }, + ('op3', 'CONV2D'): { + 'activation': {'dtype': 'fp32'}, + 'weight': {'dtype': 'fp32'} + }, + ... + } + } """ assert self.cfgs is not None, "No configure for IPEX int8 model..." - if self.version.release < Version("1.12.0").release: # pragma: no cover - for key in tune_cfg['op']: + if self.version.release < Version("1.12.0").release: # pragma: no cover + for key in tune_cfg["op"]: try: - scheme = tune_cfg['op'][key]['activation']['scheme'] + scheme = tune_cfg["op"][key]["activation"]["scheme"] except: - scheme = 'asym' - if scheme not in ['asym', 'sym']: - scheme = 'asym' + scheme = "asym" + if scheme not in ["asym", "sym"]: + scheme = "asym" break - for key in tune_cfg['op']: - value = tune_cfg['op'][key] + for key in tune_cfg["op"]: + value = tune_cfg["op"][key] pattern = self.get_pattern(key, self.fuse_ops) assert isinstance(value, dict) - assert 'activation' in value - if value['activation']['dtype'] == 'fp32': - if 'weight' in value: - assert value['weight']['dtype'] == 'fp32' + assert "activation" in value + if value["activation"]["dtype"] == "fp32": + if "weight" in value: + assert value["weight"]["dtype"] == "fp32" for op_cfg in self.cfgs: if op_cfg["id"] == key[0]: - if key[1] in ['relu_', 'add_']: + if key[1] in ["relu_", "add_"]: continue num_inputs = len(op_cfg["inputs_quantized"]) num_outputs = len(op_cfg["outputs_quantized"]) @@ -2829,24 +2909,24 @@ def _cfg_to_qconfig(self, tune_cfg): for o_num in range(num_outputs): op_cfg["outputs_quantized"][o_num] = False if pattern: - if pattern[1] in ['relu_', 'add_']: + if pattern[1] in ["relu_", "add_"]: continue - tune_cfg['op'][pattern]['activation']['dtype'] = 'fp32' - if 'weight' in tune_cfg['op'][pattern]: - tune_cfg['op'][pattern]['weight']['dtype'] = 'fp32' + tune_cfg["op"][pattern]["activation"]["dtype"] = "fp32" + if "weight" in tune_cfg["op"][pattern]: + tune_cfg["op"][pattern]["weight"]["dtype"] = "fp32" else: for op_cfg in self.cfgs: if op_cfg["id"] == key[0]: - if key[1] in ['relu_', 'add_']: + if key[1] in ["relu_", "add_"]: continue num_inputs = len(op_cfg["inputs_quantized"]) num_outputs = len(op_cfg["outputs_quantized"]) for i_num in range(num_inputs): - op_cfg["inputs_quantized"][i_num] = \ - self.default_cfgs[key[0]]["inputs_quantized"][i_num] + op_cfg["inputs_quantized"][i_num] = self.default_cfgs[key[0]]["inputs_quantized"][i_num] for o_num in range(num_outputs): - op_cfg["outputs_quantized"][o_num] = \ - self.default_cfgs[key[0]]["outputs_quantized"][o_num] + op_cfg["outputs_quantized"][o_num] = self.default_cfgs[key[0]]["outputs_quantized"][ + o_num + ] with open(self.ipex_config_path, "w") as write_f: json.dump(self.cfgs, write_f) if scheme == "asym": @@ -2855,33 +2935,34 @@ def _cfg_to_qconfig(self, tune_cfg): return torch.per_tensor_symmetric else: op_infos = copy.deepcopy(self.op_infos_from_cfgs) - self.cfgs = torch_utils.util.check_cfg_and_qconfig(tune_cfg['op'], - self.cfgs, - op_infos, - self.output_tensor_id_op_name) + self.cfgs = torch_utils.util.check_cfg_and_qconfig( + tune_cfg["op"], self.cfgs, op_infos, self.output_tensor_id_op_name + ) with open(self.ipex_config_path, "w") as write_f: json.dump(self.cfgs, write_f, indent=4) return None - def get_pattern(self, fallback_op, fuse_ops): # pragma: no cover + def get_pattern(self, fallback_op, fuse_ops): # pragma: no cover for fuse_pattern in fuse_ops: if fuse_pattern[0] == fallback_op: - if fuse_pattern[1] in ['relu_', 'add_']: + if fuse_pattern[1] in ["relu_", "add_"]: return None else: return fuse_pattern[1] return None - def evaluate(self, - model, - dataloader, - postprocess=None, - metrics=None, - measurer=None, - iteration=-1, - tensorboard=False, - fp32_baseline=False): + def evaluate( + self, + model, + dataloader, + postprocess=None, + metrics=None, + measurer=None, + iteration=-1, + tensorboard=False, + fp32_baseline=False, + ): """Execute the evaluate process on the specified model. Args: @@ -2906,13 +2987,17 @@ def evaluate(self, model_.eval() if metrics: - self.fp32_preds_as_label = any([hasattr(metric, "compare_label") and \ - not metric.compare_label for metric in metrics]) + self.fp32_preds_as_label = any( + [hasattr(metric, "compare_label") and not metric.compare_label for metric in metrics] + ) - ipex_config = (self.ipex_config_path if not self.benchmark else None) + ipex_config = self.ipex_config_path if not self.benchmark else None if self.version.release < Version("1.12.0").release: - conf = (ipex.quantization.QuantConf(configure_file=ipex_config) # pylint: disable=E1101 - if not self.is_baseline else None) + conf = ( + ipex.quantization.QuantConf(configure_file=ipex_config) # pylint: disable=E1101 + if not self.is_baseline + else None + ) else: conf = None @@ -2934,6 +3019,7 @@ def query_fw_capability(self, model): def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): """This is a helper function for `query_fw_capability`, and it will get all quantizable ops from model. + Args: model (object): input model prefix (string): prefix of op name @@ -2941,18 +3027,18 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): Returns: None """ - + # group ops by postion for transform-based model from .torch_utils.pattern_detector import TransformerBasedModelBlockPatternDetector + detector = TransformerBasedModelBlockPatternDetector(model) detect_result = detector.detect_block() attention_block = detect_result.get("attention_blocks", None) - ffn_blocks = detect_result.get("ffn_blocks", None) + ffn_blocks = detect_result.get("ffn_blocks", None) logger.info(f"Attention Blocks: {len(attention_block)}") logger.info(f"FFN Blocks: {len(ffn_blocks)}") if not os.path.exists(self.ipex_config_path): - assert isinstance(model, torch.nn.Module), \ - "The model passed in is not the instance of torch.nn.Module" + assert isinstance(model, torch.nn.Module), "The model passed in is not the instance of torch.nn.Module" if hasattr(model, "save_qconf_summary"): os.makedirs(os.path.dirname(self.ipex_config_path), exist_ok=True) @@ -2968,9 +3054,10 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): # create a quantization config file for intel pytorch extension model os.makedirs(os.path.dirname(self.ipex_config_path), exist_ok=True) if self.version.release < Version("1.12.0").release: - assert self.q_func is None, ("IPEX < 1.12.0 didn't support calibration function, " - "Please use IPEX >= 1.12.0!") - ipex_conf = ipex.quantization.QuantConf(qscheme=torch.per_tensor_symmetric) # pylint: disable=E1101 + assert self.q_func is None, ( + "IPEX < 1.12.0 didn't support calibration function, " "Please use IPEX >= 1.12.0!" + ) + ipex_conf = ipex.quantization.QuantConf(qscheme=torch.per_tensor_symmetric) # pylint: disable=E1101 self.model_calibration( model, self.q_dataloader, @@ -2978,39 +3065,51 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): ) ipex_conf.save(self.ipex_config_path) else: - if self.approach in ['post_training_static_quant', 'post_training_auto_quant']: - assert self.q_dataloader is not None or self.example_inputs is not None, \ - "IPEX need q_dataloader or example_inputs to prepare the model" + if self.approach in ["post_training_static_quant", "post_training_auto_quant"]: + assert ( + self.q_dataloader is not None or self.example_inputs is not None + ), "IPEX need q_dataloader or example_inputs to prepare the model" from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig + if self.version.release >= Version("2.1").release: # HistogramObserver will cause a performance issue. # static_qconfig = ipex.quantization.default_static_qconfig_mapping - qconfig = QConfig(activation=MinMaxObserver.with_args( - qscheme=torch.per_tensor_affine, dtype=torch.quint8), - weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, \ - qscheme=torch.per_channel_symmetric)) + qconfig = QConfig( + activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8), + weight=PerChannelMinMaxObserver.with_args( + dtype=torch.qint8, qscheme=torch.per_channel_symmetric + ), + ) from torch.ao.quantization import QConfigMapping + static_qconfig = QConfigMapping().set_global(qconfig) else: - static_qconfig = QConfig(activation=MinMaxObserver.with_args( - qscheme=torch.per_tensor_affine, dtype=torch.quint8), - weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, \ - qscheme=torch.per_channel_symmetric)) + static_qconfig = QConfig( + activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8), + weight=PerChannelMinMaxObserver.with_args( + dtype=torch.qint8, qscheme=torch.per_channel_symmetric + ), + ) # For smoothquant optimized model, need ipex version >= 2.1 - if self.recipes and self.recipes.get('smooth_quant', False) \ - and self.version.release >= Version("2.1").release: # pragma: no cover - smooth_quant_args = self.recipes.get('smooth_quant_args', {}) - folding = smooth_quant_args.get('folding', False) + if ( + self.recipes + and self.recipes.get("smooth_quant", False) + and self.version.release >= Version("2.1").release + ): # pragma: no cover + smooth_quant_args = self.recipes.get("smooth_quant_args", {}) + folding = smooth_quant_args.get("folding", False) if not folding: static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) if self.example_inputs is None: self.example_inputs = get_example_inputs(model, self.q_dataloader) if isinstance(self.example_inputs, dict): - model = ipex.quantization.prepare(model, static_qconfig, - example_kwarg_inputs=self.example_inputs, inplace=True) + model = ipex.quantization.prepare( + model, static_qconfig, example_kwarg_inputs=self.example_inputs, inplace=True + ) else: - model = ipex.quantization.prepare(model, static_qconfig, - example_inputs=self.example_inputs, inplace=True) + model = ipex.quantization.prepare( + model, static_qconfig, example_inputs=self.example_inputs, inplace=True + ) if self.q_dataloader is not None or self.example_inputs is not None: self._simple_inference(model, self.q_dataloader, iterations=1) @@ -3024,22 +3123,25 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): model.save_qconf_summary(qconf_summary=self.ipex_config_path) if isinstance(self.q_dataloader, BaseDataLoader): self.q_dataloader.batch(batch_size) - logger.info('Recovery `calibration.dataloader.batchsize` {} according \ - to config.yaml'.format(batch_size)) + logger.info( + "Recovery `calibration.dataloader.batchsize` {} according \ + to config.yaml".format( + batch_size + ) + ) map_op_name_to_fqn = {} - with open(self.ipex_config_path, 'r') as f: + with open(self.ipex_config_path, "r") as f: self.cfgs = json.load(f) - if self.version.release < Version("1.12.0").release: # pragma: no cover + if self.version.release < Version("1.12.0").release: # pragma: no cover self.default_cfgs = copy.deepcopy(self.cfgs) self.fuse_ops = self.get_fuse_ops(self.cfgs) for op_cfg in self.cfgs: if op_cfg["name"] in unify_op_type_mapping_ipex: - quantizable_ops.append((op_cfg["id"], - unify_op_type_mapping_ipex[op_cfg["name"]])) + quantizable_ops.append((op_cfg["id"], unify_op_type_mapping_ipex[op_cfg["name"]])) else: re_flag = False - for pattern, unify_op_type in unify_op_type_mapping_ipex['re'].items(): + for pattern, unify_op_type in unify_op_type_mapping_ipex["re"].items(): if re.match(pattern, op_cfg["name"]): re_flag = True quantizable_ops.append((op_cfg["id"], unify_op_type)) @@ -3047,26 +3149,29 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): if not re_flag: quantizable_ops.append((op_cfg["id"], op_cfg["name"])) else: - ops_name, op_infos_from_cfgs, input_tensor_id_op_name, \ - output_tensor_id_op_name = torch_utils.util.paser_cfgs(self.cfgs) - quantizable_op_names = torch_utils.util.get_quantizable_ops_from_cfgs(ops_name, - op_infos_from_cfgs, - input_tensor_id_op_name) + ( + ops_name, + op_infos_from_cfgs, + input_tensor_id_op_name, + output_tensor_id_op_name, + ) = torch_utils.util.paser_cfgs(self.cfgs) + quantizable_op_names = torch_utils.util.get_quantizable_ops_from_cfgs( + ops_name, op_infos_from_cfgs, input_tensor_id_op_name + ) for name in quantizable_op_names: # name : list if len(name) == 1: module_key = name[0][0] op_cfg_id = name[0][2] - ipex_op_type = self.cfgs[module_key]['q_op_infos'][op_cfg_id]['op_type'] - module_fqn = self.cfgs[module_key]['q_op_infos'][op_cfg_id].get('fqn', None) - + ipex_op_type = self.cfgs[module_key]["q_op_infos"][op_cfg_id]["op_type"] + module_fqn = self.cfgs[module_key]["q_op_infos"][op_cfg_id].get("fqn", None) + if ipex_op_type in unify_op_type_mapping_ipex: - quantizable_ops.append((tuple(name), - unify_op_type_mapping_ipex[ipex_op_type])) + quantizable_ops.append((tuple(name), unify_op_type_mapping_ipex[ipex_op_type])) map_op_name_to_fqn[(tuple(name), ipex_op_type)] = module_fqn else: re_flag = False - for pattern, unify_op_type in unify_op_type_mapping_ipex['re'].items(): + for pattern, unify_op_type in unify_op_type_mapping_ipex["re"].items(): if re.match(pattern, ipex_op_type): re_flag = True quantizable_ops.append((tuple(name), unify_op_type)) @@ -3080,14 +3185,14 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): for op_name in name: module_key = op_name[0] op_cfg_id = op_name[2] - single_op_type = self.cfgs[module_key]['q_op_infos'][op_cfg_id]['op_type'] + single_op_type = self.cfgs[module_key]["q_op_infos"][op_cfg_id]["op_type"] if single_op_type in unify_op_type_mapping_ipex: single_op_type = unify_op_type_mapping_ipex[single_op_type] op_type += "&" + single_op_type if op_type else single_op_type quantizable_ops.append((tuple(name), op_type)) _module_key = name[0][0] _op_cfg_id = name[0][2] - module_fqn = self.cfgs[_module_key]['q_op_infos'][_op_cfg_id]['fqn'] + module_fqn = self.cfgs[_module_key]["q_op_infos"][_op_cfg_id]["fqn"] map_op_name_to_fqn[(tuple(name), op_type)] = module_fqn self.op_infos_from_cfgs = op_infos_from_cfgs self.output_tensor_id_op_name = output_tensor_id_op_name @@ -3099,44 +3204,43 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): logger.info(ffn_blocks) self.block_wise = ffn_blocks - def get_fuse_ops(self, default_cfgs): # pragma: no cover - elt_wise = ['relu', 'sigmoid', 'gelu'] - inplace_ops = ['relu_', 'add_'] + def get_fuse_ops(self, default_cfgs): # pragma: no cover + elt_wise = ["relu", "sigmoid", "gelu"] + inplace_ops = ["relu_", "add_"] op_patterns = [] num_ops = len(default_cfgs) for cur_id in range(num_ops): - cur_op = default_cfgs[cur_id]['name'] - if cur_op == 'dropout': + cur_op = default_cfgs[cur_id]["name"] + if cur_op == "dropout": continue - inputs = default_cfgs[cur_id]['inputs_flow'] + inputs = default_cfgs[cur_id]["inputs_flow"] num_input = len(inputs) pre_ops = {} for i_num in range(num_input): inp = inputs[i_num] for pre_id in range(cur_id): - pre_op = default_cfgs[pre_id]['name'] - pre_out = default_cfgs[pre_id]['outputs_flow'] + pre_op = default_cfgs[pre_id]["name"] + pre_out = default_cfgs[pre_id]["outputs_flow"] num_out = len(pre_out) for o_num in range(num_out): if pre_out[o_num] == inp: - if cur_op in inplace_ops and (pre_op in ['conv2d', 'conv3d', 'linear' - ]): + if cur_op in inplace_ops and (pre_op in ["conv2d", "conv3d", "linear"]): op_patterns.append([(pre_id, pre_op), (cur_id, cur_op)]) - if cur_op in elt_wise and (pre_op - in ['conv2d', 'conv3d', 'linear', 'add']): + if cur_op in elt_wise and (pre_op in ["conv2d", "conv3d", "linear", "add"]): op_patterns.append([(pre_id, pre_op), (cur_id, cur_op)]) - if cur_op == 'add': + if cur_op == "add": pre_ops[i_num] = [pre_id, pre_op] if len(pre_ops) > 0: for key, value in pre_ops.items(): - if value[1] in ['conv2d', 'conv3d', 'linear'] and \ - default_cfgs[cur_id]['inputs_quantized'][key] == False: + if ( + value[1] in ["conv2d", "conv3d", "linear"] + and default_cfgs[cur_id]["inputs_quantized"][key] is False + ): op_patterns.append([(value[0], value[1]), (cur_id, cur_op)]) return op_patterns def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): - assert not self.version.release < Version("2.1").release, \ - "IPEX version >= 2.1 is required for SmoothQuant." + assert not self.version.release < Version("2.1").release, "IPEX version >= 2.1 is required for SmoothQuant." inplace = True if self.performance_only else False # fetch SmoothQuant scale info from pre-optimized model @@ -3145,43 +3249,46 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): smoothquant_scale_info = {} from .torch_utils.model_wrapper import SQLinearWrapper from .torch_utils.util import fetch_module + for _, info in sq_max_info.items(): - alpha = info['alpha'] - absorbed_layer = info['absorbed_layer'] - input_minmax = info['input_minmax'] - weight_max = info['weight_max'] + alpha = info["alpha"] + absorbed_layer = info["absorbed_layer"] + input_minmax = info["input_minmax"] + weight_max = info["weight_max"] abs_input_max = torch.max(torch.abs(input_minmax[0]), torch.abs(input_minmax[1])) input_power = torch.pow(abs_input_max, alpha) weight_power = torch.pow(weight_max, 1 - alpha) scale = torch.clip(input_power / weight_power, min=1e-5) for op_name in absorbed_layer: module = copy.deepcopy(fetch_module(q_model._model, op_name)) - new_module = SQLinearWrapper(module, 1.0/scale, input_minmax, alpha) + new_module = SQLinearWrapper(module, 1.0 / scale, input_minmax, alpha) weight_scale = new_module._get_weight_scale() smoothquant_scale_info[op_name] = { - 'alpha': new_module.alpha, - 'input_scale_for_mul': new_module.input_scale, - 'input_scale_after_mul': new_module.scale, - 'input_zero_point_after_mul': new_module.zero_point, - 'input_dtype': new_module.dtype, - 'weight_scale_after_mul': weight_scale, + "alpha": new_module.alpha, + "input_scale_for_mul": new_module.input_scale, + "input_scale_after_mul": new_module.scale, + "input_zero_point_after_mul": new_module.zero_point, + "input_dtype": new_module.dtype, + "weight_scale_after_mul": weight_scale, } logger.debug(f"Current SmoothQuant alpha of {op_name} is {alpha}") # Check save_qconf_summary part is a workaroud for IPEX bug. # Sometimes the prepared model from get_op_capablitiy loss this attribute - if not hasattr(model._model, "save_qconf_summary") or \ - not hasattr(model._model, "load_qconf_summary"): + if not hasattr(model._model, "save_qconf_summary") or not hasattr(model._model, "load_qconf_summary"): static_qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) if isinstance(self.example_inputs, dict): - model._model = ipex.quantization.prepare(model._model, static_qconfig, - example_kwarg_inputs=self.example_inputs, inplace=inplace) + model._model = ipex.quantization.prepare( + model._model, static_qconfig, example_kwarg_inputs=self.example_inputs, inplace=inplace + ) else: - model._model = ipex.quantization.prepare(model._model, static_qconfig, - example_inputs=self.example_inputs, inplace=inplace) + model._model = ipex.quantization.prepare( + model._model, static_qconfig, example_inputs=self.example_inputs, inplace=inplace + ) # TODO: update_sq_scale is used to update observer, should fuse in _cfg_to_qconfig from .torch_utils.util import update_sq_scale + self._cfg_to_qconfig(tune_cfg) update_sq_scale(self.ipex_config_path, smoothquant_scale_info) model._model.load_qconf_summary(qconf_summary=self.ipex_config_path) @@ -3193,13 +3300,16 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): if q_func is not None: q_func(model._model) else: - iterations = tune_cfg.get('calib_iteration', 1) - self.model_calibration(model._model, dataloader, iterations, None, - tune_cfg.get('calib_sampling_size', 1)) + iterations = tune_cfg.get("calib_iteration", 1) + self.model_calibration( + model._model, dataloader, iterations, None, tune_cfg.get("calib_sampling_size", 1) + ) except: - logger.warning("The calibration failed when calibrating with ipex, "+\ - "using scale info from SmoothQuant for Linear and " +\ - "one iter calibration for other ops.") + logger.warning( + "The calibration failed when calibrating with ipex, " + + "using scale info from SmoothQuant for Linear and " + + "one iter calibration for other ops." + ) # update ipex_config.json with smoothquant_scale_info model._model.save_qconf_summary(qconf_summary=self.ipex_config_path) update_sq_scale(self.ipex_config_path, smoothquant_scale_info) @@ -3207,7 +3317,7 @@ def qdq_quantize(self, model, q_model, tune_cfg, dataloader, q_func): self._ipex_post_quant_process(model, q_model, dataloader, inplace=inplace) - with open(self.ipex_config_path, 'r') as f: + with open(self.ipex_config_path, "r") as f: q_model.tune_cfg = json.load(f) q_model.ipex_config_path = self.ipex_config_path self._dump_model_op_stats(tune_cfg) @@ -3227,13 +3337,9 @@ def save(self, model, path=None): pass - def inspect_tensor(self, - model, - dataloader, - op_list=None, - iteration_list=None, - inspect_type='activation', - save_to_disk=False): + def inspect_tensor( + self, model, dataloader, op_list=None, iteration_list=None, inspect_type="activation", save_to_disk=False + ): assert False, "Inspect_tensor didn't support IPEX backend now!" def _simple_inference(self, q_model, dataloader, iterations=1): @@ -3257,15 +3363,18 @@ class PyTorch_FXAdaptor(TemplateAdaptor): Args: framework_specific_info (dict): dictionary of tuning configure from yaml file. """ + def __init__(self, framework_specific_info): super(PyTorch_FXAdaptor, self).__init__(framework_specific_info) - assert self.version.release >= Version("1.8.0").release, \ - "Please use PyTroch 1.8 or higher version with pytorch_fx backend!" - if self.approach == 'post_training_dynamic_quant': - assert self.version.release >= Version("1.9.0").release, \ - "Please use PyTroch 1.9 or higher version for dynamic " \ - "quantization with pytorch_fx backend!" + assert ( + self.version.release >= Version("1.8.0").release + ), "Please use PyTroch 1.8 or higher version with pytorch_fx backend!" + if self.approach == "post_training_dynamic_quant": + assert self.version.release >= Version("1.9.0").release, ( + "Please use PyTroch 1.9 or higher version for dynamic " "quantization with pytorch_fx backend!" + ) import torch.quantization as tq + """ # Map for swapping float module to quantized ones, # and this dictionary will change with different PoTorch versions @@ -3298,13 +3407,11 @@ def __init__(self, framework_specific_info): query_config_file = "pytorch_cpu.yaml" else: # pragma: no cover assert False, "Unsupport this device {}".format(self.device) - self.query_handler = PyTorchQuery( - local_config_file=os.path.join(os.path.dirname(__file__), query_config_file)) + self.query_handler = PyTorchQuery(local_config_file=os.path.join(os.path.dirname(__file__), query_config_file)) - if self.approach == 'post_training_dynamic_quant': - self.white_list = \ - tq.quantization_mappings.get_default_dynamic_quant_module_mappings() - elif self.approach == 'post_training_static_quant': + if self.approach == "post_training_dynamic_quant": + self.white_list = tq.quantization_mappings.get_default_dynamic_quant_module_mappings() + elif self.approach == "post_training_static_quant": self.white_list = tq.quantization_mappings.get_default_static_quant_module_mappings() else: self.white_list = tq.quantization_mappings.get_default_qconfig_propagation_list() @@ -3323,8 +3430,7 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): (object): quantized model """ - assert isinstance(model._model, torch.nn.Module), \ - "The model passed in is not the instance of torch.nn.Module" + assert isinstance(model._model, torch.nn.Module), "The model passed in is not the instance of torch.nn.Module" if self.performance_only: q_model = model else: @@ -3332,22 +3438,22 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): q_model = copy.deepcopy(model) q_model.fp32_model = model.fp32_model except Exception as e: # pragma: no cover - logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format( - repr(e))) + logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format(repr(e))) q_model = model q_model._model.eval() # For smoothquant optimized model - recipe_cfgs = tune_cfg.get('recipe_cfgs', None) - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) \ - and not recipe_cfgs['smooth_quant_args']['folding'] \ - and self.approach != 'post_training_dynamic_quant': + recipe_cfgs = tune_cfg.get("recipe_cfgs", None) + if ( + recipe_cfgs + and recipe_cfgs.get("smooth_quant", False) + and not recipe_cfgs["smooth_quant_args"]["folding"] + and self.approach != "post_training_dynamic_quant" + ): return self.qdq_quantize(q_model, tune_cfg) - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) \ - and recipe_cfgs['smooth_quant_args']['folding']: + if recipe_cfgs and recipe_cfgs.get("smooth_quant", False) and recipe_cfgs["smooth_quant_args"]["folding"]: self._apply_pre_optimization(q_model, tune_cfg) - self.tune_cfg = tune_cfg self.tune_cfg["approach"] = self.approach self.tune_cfg["reduce_range"] = REDUCE_RANGE @@ -3358,46 +3464,55 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): self.example_inputs = None if self.default_qconfig is not None: default_qconfig = copy.deepcopy(self.default_qconfig) - default_qconfig['activation']['dtype'] = \ - self.default_qconfig['activation']['dtype'][0] - default_qconfig['weight']['dtype'] = self.default_qconfig['weight']['dtype'][0] + default_qconfig["activation"]["dtype"] = self.default_qconfig["activation"]["dtype"][0] + default_qconfig["weight"]["dtype"] = self.default_qconfig["weight"]["dtype"][0] self.tune_cfg["op"][("default_qconfig", "")] = default_qconfig op_cfgs = _cfg_to_qconfig(self.tune_cfg, self.approach) - self.tune_cfg['bf16_ops_list'] = op_cfgs['bf16_ops_list'] - del op_cfgs['bf16_ops_list'] + self.tune_cfg["bf16_ops_list"] = op_cfgs["bf16_ops_list"] + del op_cfgs["bf16_ops_list"] gc.collect() - from torch.quantization.quantize_fx import prepare_fx, convert_fx, prepare_qat_fx + from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx + if q_model.kwargs is not None: - self.prepare_custom_config_dict = q_model.kwargs.get('prepare_custom_config_dict', - None) - self.convert_custom_config_dict = q_model.kwargs.get('convert_custom_config_dict', - None) + self.prepare_custom_config_dict = q_model.kwargs.get("prepare_custom_config_dict", None) + self.convert_custom_config_dict = q_model.kwargs.get("convert_custom_config_dict", None) else: self.prepare_custom_config_dict, self.convert_custom_config_dict = None, None self.fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, self.approach) # for layer-wise quant # recipe_cfgs = tune_cfg.get('recipe_cfgs', None) - if recipe_cfgs and recipe_cfgs.get('layer_wise_quant', False) \ - and self.approach != 'post_training_dynamic_quant': + if ( + recipe_cfgs + and recipe_cfgs.get("layer_wise_quant", False) + and self.approach != "post_training_dynamic_quant" + ): from .torch_utils.layer_wise_quant import LayerWiseQuant - model_path = recipe_cfgs['layer_wise_quant_args'].get('model_path', None) - smooth_quant = recipe_cfgs['layer_wise_quant_args'].get('smooth_quant', False) - alpha = recipe_cfgs['layer_wise_quant_args'].get('smooth_quant_alpha', 0.5) - assert model_path is not None,\ - "the layer_wise_quant_args should have args model_path to load the weight of model." - device = recipe_cfgs['layer_wise_quant_args'].get('decvice', 'cpu') - lw_quant = LayerWiseQuant(q_model._model, model_path, self.fx_op_cfgs, calib_data=dataloader, - device=device, smooth_quant=smooth_quant, alpha=alpha) + model_path = recipe_cfgs["layer_wise_quant_args"].get("model_path", None) + smooth_quant = recipe_cfgs["layer_wise_quant_args"].get("smooth_quant", False) + alpha = recipe_cfgs["layer_wise_quant_args"].get("smooth_quant_alpha", 0.5) + assert ( + model_path is not None + ), "the layer_wise_quant_args should have args model_path to load the weight of model." + device = recipe_cfgs["layer_wise_quant_args"].get("decvice", "cpu") + lw_quant = LayerWiseQuant( + q_model._model, + model_path, + self.fx_op_cfgs, + calib_data=dataloader, + device=device, + smooth_quant=smooth_quant, + alpha=alpha, + ) q_model._model = lw_quant.quantize(clean_weight=False) - tune_cfg['recipe_cfgs']['lwq_layers'] = lw_quant.quantized_layers + tune_cfg["recipe_cfgs"]["lwq_layers"] = lw_quant.quantized_layers q_model.q_config = copy.deepcopy(tune_cfg) return q_model - - self.tune_cfg['fx_sub_module_list'] = self.sub_module_list - if self.approach == 'quant_aware_training': + + self.tune_cfg["fx_sub_module_list"] = self.sub_module_list + if self.approach == "quant_aware_training": q_model._model.train() if self.sub_module_list is None: tmp_model = q_model._model @@ -3407,31 +3522,27 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): q_model._model, self.fx_op_cfgs, example_inputs=self.example_inputs, - prepare_custom_config=self.prepare_custom_config_dict + prepare_custom_config=self.prepare_custom_config_dict, ) else: q_model._model = prepare_qat_fx( - q_model._model, - self.fx_op_cfgs, - prepare_custom_config_dict=self.prepare_custom_config_dict + q_model._model, self.fx_op_cfgs, prepare_custom_config_dict=self.prepare_custom_config_dict ) else: - logger.info('Fx trace of the entire model failed. ' + \ - 'We will conduct auto quantization') + logger.info("Fx trace of the entire model failed. " + "We will conduct auto quantization") PyTorch_FXAdaptor.prepare_sub_graph( self.sub_module_list, self.fx_op_cfgs, q_model._model, - prefix='', + prefix="", is_qat=True, example_inputs=self.example_inputs, - custom_config=self.prepare_custom_config_dict + custom_config=self.prepare_custom_config_dict, ) # q_func can be created by neural_compressor internal or passed by user. It's critical to # distinguish how q_func is passed since neural_compressor built-in functions accept # neural_compressor model and user defined func should accept framework model. - q_model._model = q_func( - q_model if getattr(q_func, 'builtin', None) else q_model._model) + q_model._model = q_func(q_model if getattr(q_func, "builtin", None) else q_model._model) assert q_model._model is not None, "Please return a trained model in train function!" q_model._model.eval() else: @@ -3443,27 +3554,24 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): q_model._model, self.fx_op_cfgs, example_inputs=self.example_inputs, - prepare_custom_config=self.prepare_custom_config_dict + prepare_custom_config=self.prepare_custom_config_dict, ) else: q_model._model = prepare_fx( - q_model._model, - self.fx_op_cfgs, - prepare_custom_config_dict=self.prepare_custom_config_dict + q_model._model, self.fx_op_cfgs, prepare_custom_config_dict=self.prepare_custom_config_dict ) else: - logger.info('Fx trace of the entire model failed, ' + \ - 'We will conduct auto quantization') + logger.info("Fx trace of the entire model failed, " + "We will conduct auto quantization") PyTorch_FXAdaptor.prepare_sub_graph( self.sub_module_list, self.fx_op_cfgs, q_model._model, - prefix='', + prefix="", example_inputs=self.example_inputs, - custom_config=self.prepare_custom_config_dict + custom_config=self.prepare_custom_config_dict, ) - if self.approach in ['post_training_static_quant', 'post_training_auto_quant']: - iterations = tune_cfg.get('calib_iteration', 1) + if self.approach in ["post_training_static_quant", "post_training_auto_quant"]: + iterations = tune_cfg.get("calib_iteration", 1) if q_func is not None: q_func(q_model._model) else: @@ -3471,56 +3579,52 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): q_model._model, dataloader, iterations, - calib_sampling_size=tune_cfg.get('calib_sampling_size', 1) + calib_sampling_size=tune_cfg.get("calib_sampling_size", 1), ) if self.sub_module_list is None: if self.version.release >= Version("1.13.0").release: # pragma: no cover # pylint: disable=E1123 - q_model._model = convert_fx( - q_model._model, - convert_custom_config=self.convert_custom_config_dict - ) + q_model._model = convert_fx(q_model._model, convert_custom_config=self.convert_custom_config_dict) else: - q_model._model = convert_fx( - q_model._model, - convert_custom_config_dict=self.convert_custom_config_dict - ) + q_model._model = convert_fx(q_model._model, convert_custom_config_dict=self.convert_custom_config_dict) torch_utils.util.append_attr(q_model._model, tmp_model) del tmp_model gc.collect() else: PyTorch_FXAdaptor.convert_sub_graph( - self.sub_module_list, - q_model._model, - prefix='', - custom_config=self.prepare_custom_config_dict + self.sub_module_list, q_model._model, prefix="", custom_config=self.prepare_custom_config_dict ) - if len(self.tune_cfg['bf16_ops_list']) > 0 and \ - self.version.release >= Version("1.11.0").release and self.use_bf16 and \ - (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + if ( + len(self.tune_cfg["bf16_ops_list"]) > 0 + and self.version.release >= Version("1.11.0").release + and self.use_bf16 + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + ): # pragma: no cover q_model._model = torch_utils.bf16_convert.Convert(q_model._model, self.tune_cfg) self.fused_dict = self.get_fused_list(q_model.model) q_model.is_quantized = True q_model.q_config = copy.deepcopy(self.tune_cfg) - if self.approach != 'post_training_dynamic_quant': + if self.approach != "post_training_dynamic_quant": self._get_scale_zeropoint(q_model._model, q_model.q_config) self._dump_model_op_stats(q_model._model, q_model.q_config, self.approach) torch_utils.util.get_embedding_contiguous(q_model._model) return q_model - def evaluate(self, - model, - dataloader, - postprocess=None, - metrics=None, - measurer=None, - iteration=-1, - tensorboard=False, - fp32_baseline=False): + def evaluate( + self, + model, + dataloader, + postprocess=None, + metrics=None, + measurer=None, + iteration=-1, + tensorboard=False, + fp32_baseline=False, + ): """Execute the evaluate process on the specified model. Args: @@ -3541,64 +3645,71 @@ def evaluate(self, self.is_baseline = fp32_baseline model_ = model._model - assert isinstance( - model_, torch.nn.Module), "The model passed in is not the instance of torch.nn.Module" + assert isinstance(model_, torch.nn.Module), "The model passed in is not the instance of torch.nn.Module" model_.eval() model_.to(self.device) if metrics: - self.fp32_preds_as_label = any([hasattr(metric, "compare_label") and \ - not metric.compare_label for metric in metrics]) + self.fp32_preds_as_label = any( + [hasattr(metric, "compare_label") and not metric.compare_label for metric in metrics] + ) return self.model_eval(model_, dataloader, postprocess, metrics, measurer, iteration) def _pre_hook_for_qat(self, dataloader=None): - q_cfgs = torch.quantization.QConfig( - activation=torch.quantization.FakeQuantize.with_args( - dtype=torch.quint8, - qscheme=torch.per_tensor_affine, - reduce_range=REDUCE_RANGE, - observer=torch.quantization.MovingAverageMinMaxObserver), - weight=torch.quantization.default_weight_fake_quant) \ - if self.version.release < Version("1.10.0").release else \ - torch.quantization.QConfig( - activation=torch.quantization.FusedMovingAvgObsFakeQuantize.with_args( - dtype=torch.quint8, - qscheme=torch.per_tensor_affine, - reduce_range=REDUCE_RANGE), - weight=torch.quantization.default_fused_per_channel_wt_fake_quant) + q_cfgs = ( + torch.quantization.QConfig( + activation=torch.quantization.FakeQuantize.with_args( + dtype=torch.quint8, + qscheme=torch.per_tensor_affine, + reduce_range=REDUCE_RANGE, + observer=torch.quantization.MovingAverageMinMaxObserver, + ), + weight=torch.quantization.default_weight_fake_quant, + ) + if self.version.release < Version("1.10.0").release + else torch.quantization.QConfig( + activation=torch.quantization.FusedMovingAvgObsFakeQuantize.with_args( + dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=REDUCE_RANGE + ), + weight=torch.quantization.default_fused_per_channel_wt_fake_quant, + ) + ) quantizable_ops = [] tmp_model = self.fuse_fx_model(self.model, is_qat=True) - self._get_quantizable_ops_recursively(tmp_model, '', quantizable_ops) + self._get_quantizable_ops_recursively(tmp_model, "", quantizable_ops) self._remove_fallback_ops_for_qat(quantizable_ops) bf16_ops = [] - if self.version.release >= Version("1.11.0").release and self.use_bf16 and \ - (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + if ( + self.version.release >= Version("1.11.0").release + and self.use_bf16 + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + ): # pragma: no cover self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16") - self._get_bf16_ops_recursively(tmp_model, '', bf16_ops) + self._get_bf16_ops_recursively(tmp_model, "", bf16_ops) bf16_ops_list = [(op) for op in bf16_ops if op not in quantizable_ops] quantized_ops = OrderedDict() for op in quantizable_ops: - if op[1] in [ - 'Embedding', 'EmbeddingBag', 'LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell' - ]: + if op[1] in ["Embedding", "EmbeddingBag", "LSTM", "GRU", "LSTMCell", "GRUCell", "RNNCell"]: quantized_ops[op[0]] = torch.quantization.default_dynamic_qconfig else: quantized_ops[op[0]] = q_cfgs # build op_config_dict to save module scale and zeropoint op_config_dict = {} for op in quantizable_ops: - op_config_dict[op] = {'weight': {'dtype': 'int8'}, 'activation': {'dtype': 'uint8'}} + op_config_dict[op] = {"weight": {"dtype": "int8"}, "activation": {"dtype": "uint8"}} - if self.version.release < Version("1.11.0").release: # pragma: no cover + if self.version.release < Version("1.11.0").release: # pragma: no cover quantized_ops["default_qconfig"] = None else: from torch.ao.quantization import default_embedding_qat_qconfig + for op in quantizable_ops: - if op[1] in ['Embedding', 'EmbeddingBag']: + if op[1] in ["Embedding", "EmbeddingBag"]: quantized_ops[op[0]] = default_embedding_qat_qconfig from torch.quantization.quantize_fx import prepare_qat_fx - fx_op_cfgs = _cfgs_to_fx_cfgs(quantized_ops, 'quant_aware_training') + + fx_op_cfgs = _cfgs_to_fx_cfgs(quantized_ops, "quant_aware_training") self.model._model.train() # PyTorch 1.13 and above version, need example_inputs for fx trace, but it not realy used, @@ -3609,8 +3720,7 @@ def _pre_hook_for_qat(self, dataloader=None): try: self.model.fp32_model = copy.deepcopy(self.model.fp32_model) except Exception as e: # pragma: no cover - logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format( - repr(e))) + logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format(repr(e))) if self.sub_module_list is None: if self.version.release >= Version("1.13.0").release: # pragma: no cover @@ -3619,84 +3729,96 @@ def _pre_hook_for_qat(self, dataloader=None): self.model._model, fx_op_cfgs, example_inputs=self.example_inputs, - prepare_custom_config=self.model.kwargs.get( - 'prepare_custom_config_dict', None) if self.model.kwargs is not None else None) + prepare_custom_config=self.model.kwargs.get("prepare_custom_config_dict", None) + if self.model.kwargs is not None + else None, + ) else: self.model._model = prepare_qat_fx( self.model._model, fx_op_cfgs, - prepare_custom_config_dict=self.model.kwargs.get( - 'prepare_custom_config_dict', None) if self.model.kwargs is not None else None) + prepare_custom_config_dict=self.model.kwargs.get("prepare_custom_config_dict", None) + if self.model.kwargs is not None + else None, + ) else: - logger.info('Fx trace of the entire model failed. ' + \ - 'We will conduct auto quantization') - PyTorch_FXAdaptor.prepare_sub_graph(self.sub_module_list, - fx_op_cfgs, - self.model._model, - prefix='', - is_qat=True, - example_inputs=self.example_inputs) + logger.info("Fx trace of the entire model failed. " + "We will conduct auto quantization") + PyTorch_FXAdaptor.prepare_sub_graph( + self.sub_module_list, + fx_op_cfgs, + self.model._model, + prefix="", + is_qat=True, + example_inputs=self.example_inputs, + ) # This is a flag for reloading self.model.q_config = { - 'calib_sampling_size': 100, # tmp arg for export API - 'is_oneshot': True, - 'framework': 'pytorch_fx', - 'reduce_range': REDUCE_RANGE, - 'quantizable_ops': quantizable_ops, - 'bf16_ops_list': bf16_ops_list, - 'op': op_config_dict, - 'sub_module_list': self.sub_module_list, - 'approach': 'quant_aware_training' + "calib_sampling_size": 100, # tmp arg for export API + "is_oneshot": True, + "framework": "pytorch_fx", + "reduce_range": REDUCE_RANGE, + "quantizable_ops": quantizable_ops, + "bf16_ops_list": bf16_ops_list, + "op": op_config_dict, + "sub_module_list": self.sub_module_list, + "approach": "quant_aware_training", } def _post_hook_for_qat(self): from torch.quantization.quantize_fx import convert_fx + if self.sub_module_list is None: if self.version > Version("1.12.1"): # pragma: no cover # pylint: disable=E1123 self.model._model = convert_fx( self.model._model, - convert_custom_config=self.model.kwargs.get( - 'convert_custom_config_dict', None) if self.model.kwargs is not None else None) + convert_custom_config=self.model.kwargs.get("convert_custom_config_dict", None) + if self.model.kwargs is not None + else None, + ) else: self.model._model = convert_fx( self.model._model, - convert_custom_config_dict=self.model.kwargs.get( - 'convert_custom_config_dict', None) if self.model.kwargs is not None else None) + convert_custom_config_dict=self.model.kwargs.get("convert_custom_config_dict", None) + if self.model.kwargs is not None + else None, + ) else: - PyTorch_FXAdaptor.convert_sub_graph(self.sub_module_list, \ - self.model._model, prefix='') + PyTorch_FXAdaptor.convert_sub_graph(self.sub_module_list, self.model._model, prefix="") - if self.approach != 'post_training_dynamic_quant': + if self.approach != "post_training_dynamic_quant": self._get_scale_zeropoint(self.model._model, self.model.q_config) - if len(self.model.q_config['bf16_ops_list']) > 0 and \ - self.version.release >= Version("1.11.0").release and self.use_bf16 and \ - (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover + if ( + len(self.model.q_config["bf16_ops_list"]) > 0 + and self.version.release >= Version("1.11.0").release + and self.use_bf16 + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + ): # pragma: no cover self.model._model = torch_utils.bf16_convert.Convert(self.model._model, self.model.q_config) self._dump_model_op_stats(self.model._model, self.model.q_config, self.approach) torch_utils.util.get_embedding_contiguous(self.model._model) def _get_fallback_ops_for_qat(self): # get fallback ops for quant aware training approach - fallback_ops = {'op_wise': [], 'optype_wise': []} - if self.qat_optype_wise is not None: # pragma: no cover + fallback_ops = {"op_wise": [], "optype_wise": []} + if self.qat_optype_wise is not None: # pragma: no cover for optype, optype_config in self.qat_optype_wise.items(): - if 'weight' in optype_config and optype_config['weight']['dtype'] == ['fp32']: - fallback_ops['optype_wise'].append(optype) - if self.qat_op_wise is not None: # pragma: no cover + if "weight" in optype_config and optype_config["weight"]["dtype"] == ["fp32"]: + fallback_ops["optype_wise"].append(optype) + if self.qat_op_wise is not None: # pragma: no cover for op, op_config in self.qat_op_wise.items(): - if 'weight' in op_config and op_config['weight']['dtype'] == ['fp32']: - fallback_ops['op_wise'].append(op) + if "weight" in op_config and op_config["weight"]["dtype"] == ["fp32"]: + fallback_ops["op_wise"].append(op) return fallback_ops - + def _remove_fallback_ops_for_qat(self, quantizable_ops): # remove fallback ops from quantizable_ops for quant aware training approach fallback_ops = self._get_fallback_ops_for_qat() remove_ops = [] - for (op_name, op_type) in quantizable_ops: - if op_name in fallback_ops['op_wise'] or op_type in fallback_ops['optype_wise']: + for op_name, op_type in quantizable_ops: + if op_name in fallback_ops["op_wise"] or op_type in fallback_ops["optype_wise"]: remove_ops.append((op_name, op_type)) - for (op_name, op_type) in remove_ops: + for op_name, op_type in remove_ops: quantizable_ops.remove((op_name, op_type)) def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kwargs): @@ -3718,20 +3840,20 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kw criterion = criterion_tuple[0](**criterion_tuple[1]) # prepare hooks first to ensure model will be converted correctly if hooks is not None: # pragma: no cover - on_train_begin = hooks['on_train_begin'] - on_train_end = hooks['on_train_end'] - on_epoch_begin = hooks['on_epoch_begin'] - on_epoch_end = hooks['on_epoch_end'] - on_step_begin = hooks['on_step_begin'] - on_step_end = hooks['on_step_end'] - on_after_compute_loss = hooks['on_after_compute_loss'] - on_before_optimizer_step = hooks['on_before_optimizer_step'] + on_train_begin = hooks["on_train_begin"] + on_train_end = hooks["on_train_end"] + on_epoch_begin = hooks["on_epoch_begin"] + on_epoch_end = hooks["on_epoch_end"] + on_step_begin = hooks["on_step_begin"] + on_step_end = hooks["on_step_end"] + on_after_compute_loss = hooks["on_after_compute_loss"] + on_before_optimizer_step = hooks["on_before_optimizer_step"] model._model.train() if hooks is not None: on_train_begin(dataloader) - start_epochs = kwargs['kwargs']['start_epoch'] - end_epochs = kwargs['kwargs']['end_epoch'] - iters = kwargs['kwargs']['iteration'] + start_epochs = kwargs["kwargs"]["start_epoch"] + end_epochs = kwargs["kwargs"]["end_epoch"] + iters = kwargs["kwargs"]["iteration"] model._model.to(device) for nepoch in range(start_epochs, end_epochs): cnt = 0 @@ -3741,7 +3863,7 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kw target = target.to(device) if hooks is not None: on_step_begin(cnt) - print('.', end='', flush=True) + print(".", end="", flush=True) cnt += 1 output = pytorch_forward_wrapper(model._model, input, device=device) loss = criterion(output, target) @@ -3769,6 +3891,7 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kw def _get_module_op_stats(self, model, tune_cfg, approach): """This is a function to get quantizable ops of model to user. + Args: model (object): input model tune_cfg (dict): quantization config @@ -3779,63 +3902,64 @@ def _get_module_op_stats(self, model, tune_cfg, approach): modules = dict(model.named_modules()) res = dict() - if approach == 'post_training_dynamic_quant': + if approach == "post_training_dynamic_quant": # fetch int8 and fp32 ops set by Neural Compressor from tune_cfg - for key in tune_cfg['op']: + for key in tune_cfg["op"]: op_type = key[1] - #build initial dict - if op_type not in res.keys(): # pragma: no cover - res[op_type] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - value = tune_cfg['op'][key] + # build initial dict + if op_type not in res.keys(): # pragma: no cover + res[op_type] = {"INT8": 0, "BF16": 0, "FP32": 0} + value = tune_cfg["op"][key] # Special cases: QuantStub, Embedding - if ('weight' in value and value['weight']['dtype'] == 'fp32') or \ - ('weight' not in value and value['activation']['dtype'] == 'fp32'): - res[op_type]['FP32'] += 1 - elif value['activation']['dtype'] == 'bf16': # pragma: no cover - res[op_type]['BF16'] += 1 + if ("weight" in value and value["weight"]["dtype"] == "fp32") or ( + "weight" not in value and value["activation"]["dtype"] == "fp32" + ): + res[op_type]["FP32"] += 1 + elif value["activation"]["dtype"] == "bf16": # pragma: no cover + res[op_type]["BF16"] += 1 else: - res[op_type]['INT8'] += 1 + res[op_type]["INT8"] += 1 else: quantized_mode = False for node in model.graph.nodes: - if node.op == 'call_module': + if node.op == "call_module": if node.target not in modules: # pragma: no cover continue op_class = type(modules[node.target]) op_type = str(op_class.__name__) - if 'quantized' in str(op_class) \ - or (quantized_mode and 'pooling' in str(op_class)): + if "quantized" in str(op_class) or (quantized_mode and "pooling" in str(op_class)): if op_type not in res.keys(): - res[op_type] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - res[op_type]['INT8'] += 1 + res[op_type] = {"INT8": 0, "BF16": 0, "FP32": 0} + res[op_type]["INT8"] += 1 elif op_class in self.white_list: if op_type not in res.keys(): - res[op_type] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - res[op_type]['FP32'] += 1 + res[op_type] = {"INT8": 0, "BF16": 0, "FP32": 0} + res[op_type]["FP32"] += 1 continue - elif node.op == 'call_function': + elif node.op == "call_function": op_type = str(node.target.__name__) else: op_type = node.target # skip input and output - if not "quantize_per" in op_type and not quantized_mode: + if "quantize_per" not in op_type and not quantized_mode: continue # skip zero_pioint and scale if "zero_point" in op_type or "scale" in op_type: continue - #build initial dict + # build initial dict if op_type not in res.keys(): - res[op_type] = {'INT8': 0, 'BF16': 0, 'FP32': 0} + res[op_type] = {"INT8": 0, "BF16": 0, "FP32": 0} if "quantize_per" in op_type and not quantized_mode: quantized_mode = True elif "dequantize" in op_type and quantized_mode: quantized_mode = False - res[op_type]['INT8'] += 1 + res[op_type]["INT8"] += 1 return res - def _get_sub_module_op_stats(self, model, tune_cfg, approach, res, prefix=''): + def _get_sub_module_op_stats(self, model, tune_cfg, approach, res, prefix=""): """This is a function to get quantizable ops of sub modules to user recursively. + Args: model (object): input model tune_cfg (dict): quantization config @@ -3846,7 +3970,7 @@ def _get_sub_module_op_stats(self, model, tune_cfg, approach, res, prefix=''): None """ for name, module in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name if op_name in self.sub_module_list: module_res = self._get_module_op_stats(module, tune_cfg, approach) for key, value in module_res.items(): @@ -3859,6 +3983,7 @@ def _get_sub_module_op_stats(self, model, tune_cfg, approach, res, prefix=''): def _dump_model_op_stats(self, model, tune_cfg, approach): """This is a function to dump quantizable ops of model to user. + Args: model (object): input model tune_cfg (dict): quantization config @@ -3866,36 +3991,36 @@ def _dump_model_op_stats(self, model, tune_cfg, approach): Returns: None """ - if self.sub_module_list is None or \ - self.approach == 'post_training_dynamic_quant': + if self.sub_module_list is None or self.approach == "post_training_dynamic_quant": res = self._get_module_op_stats(model, tune_cfg, approach) else: res = dict() self._get_sub_module_op_stats(model, tune_cfg, approach, res) - if self.use_bf16 and (self.version.release >= Version("1.11.0").release) and \ - (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): # pragma: no cover - bf16_ops_list = tune_cfg['bf16_ops_list'] + if ( + self.use_bf16 + and (self.version.release >= Version("1.11.0").release) + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + ): # pragma: no cover + bf16_ops_list = tune_cfg["bf16_ops_list"] if len(bf16_ops_list) > 0: for bf16_op in bf16_ops_list: op_type = bf16_op[1] if op_type in res.keys(): - res[op_type]['BF16'] += 1 - if res[op_type]['FP32'] > 0: - res[op_type]['FP32'] -= 1 + res[op_type]["BF16"] += 1 + if res[op_type]["FP32"] > 0: + res[op_type]["FP32"] -= 1 else: - res[op_type] = {'INT8': 0, 'BF16': 1, 'FP32': 0} + res[op_type] = {"INT8": 0, "BF16": 1, "FP32": 0} + output_data = [ + [op_type, sum(res[op_type].values()), res[op_type]["INT8"], res[op_type]["BF16"], res[op_type]["FP32"]] + for op_type in res.keys() + ] - output_data = [[ - op_type, - sum(res[op_type].values()), res[op_type]['INT8'], res[op_type]['BF16'], - res[op_type]['FP32'] - ] for op_type in res.keys()] - - Statistics(output_data, - header='Mixed Precision Statistics', - field_names=["Op Type", "Total", "INT8", "BF16", "FP32"]).print_stat() + Statistics( + output_data, header="Mixed Precision Statistics", field_names=["Op Type", "Total", "INT8", "BF16", "FP32"] + ).print_stat() def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): """This is a helper function for `query_fw_capability`, @@ -3911,41 +4036,54 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): """ from .torch_utils.pattern_detector import TransformerBasedModelBlockPatternDetector from .torch_utils.util import get_op_type_by_name + detector = TransformerBasedModelBlockPatternDetector(model) detect_result = detector.detect_block() attention_block = detect_result.get("attention_blocks", None) - ffn_blocks = detect_result.get("ffn_blocks", None) + ffn_blocks = detect_result.get("ffn_blocks", None) logger.info(f"Attention Blocks: {len(attention_block)}") logger.info(f"FFN Blocks: {len(ffn_blocks)}") module_dict = dict(model.named_modules()) for op_name, child in model.named_modules(): if self.is_fused_module(child): for name, _ in child.named_children(): - module_prefix = op_name + '.' + name + module_prefix = op_name + "." + name if module_prefix in module_dict: module_dict.pop(module_prefix) # remove sub-modules of fused modules q_ops_set = set() for op_name, child in module_dict.items(): - if type(child) in self.white_list \ - and type(child) != torch.nn.Sequential \ - and type(child) != torch.quantization.stubs.DeQuantStub: + if ( + type(child) in self.white_list + and type(child) != torch.nn.Sequential + and type(child) != torch.quantization.stubs.DeQuantStub + ): quantizable_ops.append( - (op_name, unify_op_type_mapping[str(child.__class__.__name__)] - if str(child.__class__.__name__) in unify_op_type_mapping else str( - child.__class__.__name__))) + ( + op_name, + unify_op_type_mapping[str(child.__class__.__name__)] + if str(child.__class__.__name__) in unify_op_type_mapping + else str(child.__class__.__name__), + ) + ) q_ops_set.add(op_name) # discard the op does not belong to quantizable_ops - block_wise = [[(name, get_op_type_by_name(name, quantizable_ops)) for name in block if\ - get_op_type_by_name(name, quantizable_ops) != None] for block in ffn_blocks] + block_wise = [ + [ + (name, get_op_type_by_name(name, quantizable_ops)) + for name in block + if get_op_type_by_name(name, quantizable_ops) is not None + ] + for block in ffn_blocks + ] self.block_wise = block_wise - def _get_module_scale_zeropoint(self, model, tune_cfg, prefix=''): - """get activation scale and zero_point for converted module. + def _get_module_scale_zeropoint(self, model, tune_cfg, prefix=""): + """Get activation scale and zero_point for converted module. Args: model (dir): Int8 model converted from fp32 model. scale and zero_point is set with calibration for each module - tune_cfg (object): This file saves scale and zero_point of + tune_cfg (object): This file saves scale and zero_point of output activation of each quantized module. prefix (string): prefix of op name @@ -3954,36 +4092,36 @@ def _get_module_scale_zeropoint(self, model, tune_cfg, prefix=''): """ # get scale and zero_point of modules. modules = dict(model.named_modules()) - for key in tune_cfg['op']: + for key in tune_cfg["op"]: if prefix: - sub_name = key[0].replace(prefix + '.', '', 1) + sub_name = key[0].replace(prefix + ".", "", 1) else: sub_name = key[0] if sub_name in modules: - value = tune_cfg['op'][key] + value = tune_cfg["op"][key] assert isinstance(value, dict) - if hasattr(modules[sub_name], 'scale'): - value['activation']['scale'] = float(modules[sub_name].scale) - if hasattr(modules[sub_name], 'zero_point'): - value['activation']['zero_point'] = int(modules[sub_name].zero_point) + if hasattr(modules[sub_name], "scale"): + value["activation"]["scale"] = float(modules[sub_name].scale) + if hasattr(modules[sub_name], "zero_point"): + value["activation"]["zero_point"] = int(modules[sub_name].zero_point) # get scale and zero_point of getattr ops (like quantize ops). for node in model.graph.nodes: - if node.op == 'get_attr': + if node.op == "get_attr": if prefix: - sub_name = prefix + '--' + node.target + sub_name = prefix + "--" + node.target else: sub_name = node.target if not hasattr(model, node.target): continue - if 'scale' in node.target: - tune_cfg['get_attr'][sub_name] = float(getattr(model, node.target)) - elif 'zero_point' in node.target: - tune_cfg['get_attr'][sub_name] = int(getattr(model, node.target)) + if "scale" in node.target: + tune_cfg["get_attr"][sub_name] = float(getattr(model, node.target)) + elif "zero_point" in node.target: + tune_cfg["get_attr"][sub_name] = int(getattr(model, node.target)) else: pass - def _get_sub_module_scale_zeropoint(self, model, tune_cfg, prefix=''): - """get activation scale and zero_point for converted sub modules recursively. + def _get_sub_module_scale_zeropoint(self, model, tune_cfg, prefix=""): + """Get activation scale and zero_point for converted sub modules recursively. Args: model (dir): Int8 model converted from fp32 model. @@ -3996,14 +4134,14 @@ def _get_sub_module_scale_zeropoint(self, model, tune_cfg, prefix=''): None """ for name, module in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name if op_name in self.sub_module_list: self._get_module_scale_zeropoint(module, tune_cfg, op_name) else: self._get_sub_module_scale_zeropoint(module, tune_cfg, op_name) def _get_scale_zeropoint(self, model, tune_cfg): - """get activation scale and zero_point for converted model. + """Get activation scale and zero_point for converted model. Args: model (dir): Int8 model converted from fp32 model. @@ -4014,20 +4152,16 @@ def _get_scale_zeropoint(self, model, tune_cfg): Returns: None """ - tune_cfg['get_attr'] = {} + tune_cfg["get_attr"] = {} if self.sub_module_list is None: self._get_module_scale_zeropoint(model, tune_cfg) else: self._get_sub_module_scale_zeropoint(model, tune_cfg) @staticmethod - def prepare_sub_graph(sub_module_list, - fx_op_cfgs, - model, - prefix, - is_qat=False, - example_inputs=None, - custom_config=None): + def prepare_sub_graph( + sub_module_list, fx_op_cfgs, model, prefix, is_qat=False, example_inputs=None, custom_config=None + ): """Static method to prepare sub modules recursively. Args: @@ -4042,41 +4176,46 @@ def prepare_sub_graph(sub_module_list, Returns: model (dir): output model which is a prepared PyTorch model. """ - from torch.quantization.quantize_fx import prepare_fx, prepare_qat_fx import torch.quantization.quantization_mappings as tqqm + from torch.quantization.quantize_fx import prepare_fx, prepare_qat_fx + version = get_torch_version() fx_white_list = tqqm.get_default_qconfig_propagation_list() for name, module in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name # skip custom non traceable module in fine-grained FX if custom_config: - if ('non_traceable_module_name' in custom_config \ - and op_name in custom_config['non_traceable_module_name']) \ - or ('non_traceable_module_class' in custom_config \ - and isinstance(module, tuple(custom_config['non_traceable_module_class']))): + if ( + "non_traceable_module_name" in custom_config + and op_name in custom_config["non_traceable_module_name"] + ) or ( + "non_traceable_module_class" in custom_config + and isinstance(module, tuple(custom_config["non_traceable_module_class"])) + ): continue if op_name in sub_module_list: # remove prefix in fx_op_cfgs version = get_torch_version() if version > Version("1.12.1"): # pragma: no cover from torch.ao.quantization import QConfigMapping + fx_sub_op_cfgs = QConfigMapping() fx_sub_op_cfgs.set_global(None) fx_op_cfgs_dict = fx_op_cfgs.to_dict() else: fx_sub_op_cfgs = dict() - fx_sub_op_cfgs[''] = None - fx_sub_op_cfgs['module_name'] = [] + fx_sub_op_cfgs[""] = None + fx_sub_op_cfgs["module_name"] = [] fx_op_cfgs_dict = fx_op_cfgs - for k, v in fx_op_cfgs_dict['module_name']: + for k, v in fx_op_cfgs_dict["module_name"]: if op_name in k: - sub_name = k.replace(op_name + '.', '', 1) + sub_name = k.replace(op_name + ".", "", 1) if version > Version("1.12.1"): # pragma: no cover # pylint: disable=no-member fx_sub_op_cfgs.set_module_name(sub_name, v) else: - fx_sub_op_cfgs['module_name'].append((sub_name, v)) + fx_sub_op_cfgs["module_name"].append((sub_name, v)) if type(module) in fx_white_list and type(module) != torch.nn.Sequential: # Don't really need a quant/dequant, just move nn.Embedding \ @@ -4087,26 +4226,25 @@ def prepare_sub_graph(sub_module_list, # pylint: disable=E1123 # pragma: no cover if is_qat: - module_pre = prepare_qat_fx( - tmp_module, - fx_sub_op_cfgs) if version <= Version("1.12.1") else prepare_qat_fx( - tmp_module, fx_sub_op_cfgs, example_inputs=example_inputs) + module_pre = ( + prepare_qat_fx(tmp_module, fx_sub_op_cfgs) + if version <= Version("1.12.1") + else prepare_qat_fx(tmp_module, fx_sub_op_cfgs, example_inputs=example_inputs) + ) # pylint: disable=E1123 # pragma: no cover else: - module_pre = prepare_fx( - tmp_module, - fx_sub_op_cfgs) if version <= Version("1.12.1") else prepare_fx( - tmp_module, fx_sub_op_cfgs, example_inputs=example_inputs) + module_pre = ( + prepare_fx(tmp_module, fx_sub_op_cfgs) + if version <= Version("1.12.1") + else prepare_fx(tmp_module, fx_sub_op_cfgs, example_inputs=example_inputs) + ) torch_utils.util.append_attr(module_pre, module, fx_white_list) setattr(model, name, module_pre) else: - PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, - fx_op_cfgs, - module, - op_name, - is_qat, - example_inputs=example_inputs) + PyTorch_FXAdaptor.prepare_sub_graph( + sub_module_list, fx_op_cfgs, module, op_name, is_qat, example_inputs=example_inputs + ) @staticmethod def convert_sub_graph(sub_module_list, model, prefix, custom_config=None): @@ -4122,22 +4260,25 @@ def convert_sub_graph(sub_module_list, model, prefix, custom_config=None): model (dir): output model which is a converted PyTorch int8 model. """ from torch.quantization.quantize_fx import convert_fx + for name, module in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name # skip custom non traceable module in fine-grained FX if custom_config: - if ('non_traceable_module_name' in custom_config \ - and op_name in custom_config['non_traceable_module_name']) \ - or ('non_traceable_module_class' in custom_config \ - and isinstance(module, tuple(custom_config['non_traceable_module_class']))): + if ( + "non_traceable_module_name" in custom_config + and op_name in custom_config["non_traceable_module_name"] + ) or ( + "non_traceable_module_class" in custom_config + and isinstance(module, tuple(custom_config["non_traceable_module_class"])) + ): continue if op_name in sub_module_list: module_con = convert_fx(module) torch_utils.util.append_attr(module_con, module) setattr(model, name, module_con) else: - PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, \ - module, op_name) + PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, module, op_name) @dump_elapsed_time("Pass query framework capability") def query_fw_capability(self, model): @@ -4166,40 +4307,35 @@ def fuse_fx_model(self, model, is_qat): """ try: tmp_model = copy.deepcopy(model._model) - except Exception as e: # pragma: no cover + except Exception as e: # pragma: no cover tmp_model = model._model logger.warning("Deepcopy failed: {}, inplace=True now!".format(repr(e))) tmp_model.train() if is_qat else tmp_model.eval() from torch.fx import GraphModule - from torch.quantization.quantize_fx import _fuse_fx, QuantizationTracer + from torch.quantization.quantize_fx import QuantizationTracer, _fuse_fx + if model.kwargs is not None: - prepare_custom_config_dict = model.kwargs.get('prepare_custom_config_dict', {}) + prepare_custom_config_dict = model.kwargs.get("prepare_custom_config_dict", {}) else: prepare_custom_config_dict = {} - skipped_module_names = prepare_custom_config_dict.get(\ - 'non_traceable_module_name', []) - skipped_module_classes = prepare_custom_config_dict.get(\ - 'non_traceable_module_class', []) + skipped_module_names = prepare_custom_config_dict.get("non_traceable_module_name", []) + skipped_module_classes = prepare_custom_config_dict.get("non_traceable_module_class", []) try: tracer = QuantizationTracer(skipped_module_names, skipped_module_classes) graph_module = GraphModule(tmp_model, tracer.trace(tmp_model)) if self.version.release >= Version("1.13.0").release: # pragma: no cover # pylint: disable=E1124, E1123 - fused_model = _fuse_fx(graph_module, - is_qat, - fuse_custom_config=prepare_custom_config_dict) + fused_model = _fuse_fx(graph_module, is_qat, fuse_custom_config=prepare_custom_config_dict) elif self.version.release >= Version("1.11.0").release: # pragma: no cover # pylint: disable=E1124 - fused_model = _fuse_fx(graph_module, - is_qat, - fuse_custom_config_dict=prepare_custom_config_dict) + fused_model = _fuse_fx(graph_module, is_qat, fuse_custom_config_dict=prepare_custom_config_dict) else: fused_model = _fuse_fx(graph_module, prepare_custom_config_dict) except: self.sub_module_list = [] module_dict = dict(tmp_model.named_modules()) - self._fuse_sub_graph(tmp_model, module_dict, prefix='', is_qat=is_qat) + self._fuse_sub_graph(tmp_model, module_dict, prefix="", is_qat=is_qat) fused_model = tmp_model return fused_model @@ -4215,18 +4351,18 @@ def _fuse_sub_graph(self, model, module_dict, prefix, is_qat): Returns: fused_model (GraphModule): fused GraphModule model from torch.fx. """ - from torch.quantization.quantize_fx import _fuse_fx import torch.quantization.quantization_mappings as tqqm + from torch.quantization.quantize_fx import _fuse_fx + fx_white_list = tqqm.get_default_qconfig_propagation_list() for name, module in model.named_children(): # FX QAT cannot fallback nn.Dropout from train mode to eval if type(module) == torch.nn.Dropout: # pragma: no cover continue - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name if op_name not in module_dict: continue - if type(module) in fx_white_list \ - and type(module) != torch.nn.Sequential: + if type(module) in fx_white_list and type(module) != torch.nn.Sequential: module = torch.quantization.QuantWrapper(module) if self._check_dynamic_control(module): self._fuse_sub_graph(module, module_dict, op_name, is_qat=is_qat) @@ -4253,6 +4389,7 @@ def _check_dynamic_control(module): fused_model (GraphModule): fused GraphModule model from torch.fx. """ import inspect + try: lines = inspect.getsource(module.forward) # Proxy obj. will always be detectd as `not None`. @@ -4262,14 +4399,15 @@ def _check_dynamic_control(module): if anws: return True except: # pragma: no cover - logger.info('Module has no forward function') + logger.info("Module has no forward function") return False def get_output_op_names(self, *args, **kwargs): return None - def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, - confidence_batches, fallback=True, requantize_cfgs=None): + def calculate_op_sensitivity( + self, model, dataloader, tune_cfg, output_op_names, confidence_batches, fallback=True, requantize_cfgs=None + ): """This is a helper function for `query_fw_capability`, and it will get all quantizable ops from model. @@ -4283,8 +4421,10 @@ def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, ops_lst (list): sorted op list by sensitivity """ from .torch_utils.util import get_fallback_order - ordered_ops = get_fallback_order(self, model.model, dataloader, tune_cfg, - confidence_batches, fallback, requantize_cfgs) + + ordered_ops = get_fallback_order( + self, model.model, dataloader, tune_cfg, confidence_batches, fallback, requantize_cfgs + ) return ordered_ops @@ -4295,6 +4435,7 @@ class PyTorchWeightOnlyAdaptor(TemplateAdaptor): Args: framework_specific_info (dict): dictionary of tuning configure from yaml file. """ + def __init__(self, framework_specific_info): super(PyTorchWeightOnlyAdaptor, self).__init__(framework_specific_info) self.tune_cfg = None @@ -4302,12 +4443,11 @@ def __init__(self, framework_specific_info): query_config_file = "pytorch_cpu.yaml" else: # pragma: no cover assert False, "Unsupport this device {}".format(self.device) - self.query_handler = PyTorchQuery( - local_config_file=os.path.join(os.path.dirname(__file__), query_config_file)) + self.query_handler = PyTorchQuery(local_config_file=os.path.join(os.path.dirname(__file__), query_config_file)) self.white_list = [torch.nn.Linear, torch.nn.Conv2d] # Contains parameters for algorithms such as AWQ, GPTQ, etc. - self.recipes = framework_specific_info['recipes'] + self.recipes = framework_specific_info["recipes"] self.optype_statistics = None @dump_elapsed_time("Pass quantize model") @@ -4323,53 +4463,49 @@ def quantize(self, tune_cfg, model, dataloader, calib_func=None): Returns: (object): quantized model """ - - assert isinstance(model._model, torch.nn.Module), \ - "The model passed in is not the instance of torch.nn.Module" + + assert isinstance(model._model, torch.nn.Module), "The model passed in is not the instance of torch.nn.Module" if self.performance_only: q_model = model else: try: q_model = copy.deepcopy(model) except Exception as e: # pragma: no cover - logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format( - repr(e))) + logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format(repr(e))) q_model = model # For tensorboard display self.tune_cfg = tune_cfg self.tune_cfg["approach"] = self.approach self.tune_cfg["framework"] = "pytorch" - assert self.approach=='post_training_weight_only', "Please make sure the approach is weight_only" + assert self.approach == "post_training_weight_only", "Please make sure the approach is weight_only" all_algo = set() - for key, config in tune_cfg['op'].items(): + for key, config in tune_cfg["op"].items(): op_name, op_type = key - if config['weight']['dtype'] == 'fp32': + if config["weight"]["dtype"] == "fp32": continue else: - dtype = config['weight']['dtype'] - if dtype in ['nf4', 'fp4', 'fp4_e2m1_bnb', 'fp4_e2m1']: - config['weight']['bits'] = 4 - config['weight']['scheme'] = 'sym' - elif dtype in ['int4']: - config['weight']['bits'] = 4 - elif dtype in ['int8']: - config['weight']['bits'] = 8 - algorithm = config['weight']['algorithm'] + dtype = config["weight"]["dtype"] + if dtype in ["nf4", "fp4", "fp4_e2m1_bnb", "fp4_e2m1"]: + config["weight"]["bits"] = 4 + config["weight"]["scheme"] = "sym" + elif dtype in ["int4"]: + config["weight"]["bits"] = 4 + elif dtype in ["int8"]: + config["weight"]["bits"] = 8 + algorithm = config["weight"]["algorithm"] all_algo.add(algorithm) if len(all_algo): logger.info(f"All algorithms to do: {all_algo}") - if 'GPTQ' in all_algo: - q_model._model, gptq_config = self.gptq_quantize( - q_model._model, tune_cfg, dataloader - ) + if "GPTQ" in all_algo: + q_model._model, gptq_config = self.gptq_quantize(q_model._model, tune_cfg, dataloader) q_model.gptq_config = gptq_config - if 'TEQ' in all_algo: + if "TEQ" in all_algo: q_model._model = self.teq_quantize(q_model._model, tune_cfg, dataloader, calib_func) - if 'AWQ' in all_algo: # includes RTN in AWQ + if "AWQ" in all_algo: # includes RTN in AWQ q_model._model = self.awq_quantize(q_model._model, tune_cfg, dataloader, calib_func) - if 'RTN' in all_algo: + if "RTN" in all_algo: q_model._model = self.rtn_quantize(q_model._model, tune_cfg) q_model.q_config = copy.deepcopy(self.tune_cfg) @@ -4379,38 +4515,45 @@ def quantize(self, tune_cfg, model, dataloader, calib_func=None): def rtn_quantize(self, model, tune_cfg): logger.info("quantizing with the round-to-nearest algorithm") - if 'rtn_args' in self.recipes: - sym_full_range = self.recipes['rtn_args'].get('sym_full_range', False) - mse_range = self.recipes['rtn_args'].get('mse_range', False) - else: # pragma: no cover - sym_full_range=False - mse_range=False - from .torch_utils.weight_only import rtn_quantize + if "rtn_args" in self.recipes: + sym_full_range = self.recipes["rtn_args"].get("sym_full_range", False) + mse_range = self.recipes["rtn_args"].get("mse_range", False) + else: # pragma: no cover + sym_full_range = False + mse_range = False from .torch_utils.util import fetch_module, set_module - for key, config in tune_cfg['op'].items(): + from .torch_utils.weight_only import rtn_quantize + + for key, config in tune_cfg["op"].items(): op_name, op_type = key - if config['weight']['dtype'] == 'fp32': + if config["weight"]["dtype"] == "fp32": continue else: - dtype = config['weight']['dtype'] - num_bits = config['weight']['bits'] - scheme = config['weight']['scheme'] - group_size = config['weight']['group_size'] - algorithm = config['weight']['algorithm'] - if algorithm != 'RTN': + dtype = config["weight"]["dtype"] + num_bits = config["weight"]["bits"] + scheme = config["weight"]["scheme"] + group_size = config["weight"]["group_size"] + algorithm = config["weight"]["algorithm"] + if algorithm != "RTN": continue m = fetch_module(model, op_name) - m = rtn_quantize(m, num_bits, group_size, scheme, - return_int=False, - data_type=dtype, - sym_full_range=sym_full_range, - mse_range=mse_range) + m = rtn_quantize( + m, + num_bits, + group_size, + scheme, + return_int=False, + data_type=dtype, + sym_full_range=sym_full_range, + mse_range=mse_range, + ) set_module(model, op_name, m) return model def gptq_quantize(self, model, tune_cfg, dataloader): logger.info("quantizing with the GPTQ algorithm") from .torch_utils.weight_only import gptq_quantize + # convert tune_cfg to gptq_quantize's weight config """please refer to weight_config which can be analyzed by user-define API function weight_only.gptq_quantize keys of weight_config can not only be specific name, but can also be a re formula @@ -4433,37 +4576,33 @@ def gptq_quantize(self, model, tune_cfg, dataloader): } """ weight_config = {} - for key, config in tune_cfg['op'].items(): + for key, config in tune_cfg["op"].items(): op_name, op_type = key - if config['weight']['dtype'] == 'fp32': - continue # no need to be quantized + if config["weight"]["dtype"] == "fp32": + continue # no need to be quantized else: weight_config[op_name] = { - 'wbits': config['weight']['bits'], - 'group_size': config['weight']['group_size'], - 'sym': config['weight']['scheme'] == 'sym', - 'percdamp': self.recipes['gptq_args'].get("percdamp", 0.01), - 'act_order': self.recipes['gptq_args'].get("act_order", False), - 'block_size': self.recipes['gptq_args'].get("block_size", True) - } - nsamples = self.recipes['gptq_args'].get("nsamples", 128) - use_max_length = self.recipes['gptq_args'].get("use_max_length", False) - # tune_cfg => weight_config + "wbits": config["weight"]["bits"], + "group_size": config["weight"]["group_size"], + "sym": config["weight"]["scheme"] == "sym", + "percdamp": self.recipes["gptq_args"].get("percdamp", 0.01), + "act_order": self.recipes["gptq_args"].get("act_order", False), + "block_size": self.recipes["gptq_args"].get("block_size", True), + } + nsamples = self.recipes["gptq_args"].get("nsamples", 128) + use_max_length = self.recipes["gptq_args"].get("use_max_length", False) + # tune_cfg => weight_config model, quantization_perm = gptq_quantize( - model, - weight_config, - dataloader, - nsamples, - use_max_length, - self.device + model, weight_config, dataloader, nsamples, use_max_length, self.device ) return model, quantization_perm def teq_quantize(self, model, tune_cfg, dataloader, calib_func): logger.info("quantizing with the TEQ algorithm") from .torch_utils.weight_only import teq_quantize + # get example inputs if not provided. - if self.example_inputs is None: # pragma: no cover + if self.example_inputs is None: # pragma: no cover if dataloader is None: assert False, "Please provide dataloader or example_inputs for TEQ algorithm." try: @@ -4476,18 +4615,19 @@ def teq_quantize(self, model, tune_cfg, dataloader, calib_func): break folding = True - if 'teq_args' in self.recipes: # pragma: no cover - folding = self.recipes['teq_args'].get('folding', True) - - supported_layers = ['Linear'] - if folding: # pragma: no cover + if "teq_args" in self.recipes: # pragma: no cover + folding = self.recipes["teq_args"].get("folding", True) + + supported_layers = ["Linear"] + if folding: # pragma: no cover from .torch_utils.smooth_quant import GraphTrace + tg = GraphTrace() absorb_to_layer, _ = tg.get_absorb_to_layer(model, self.example_inputs, supported_layers) if absorb_to_layer is None or absorb_to_layer == {}: - logger.warning('No absorb layer is detected, skip TEQ algorithm') + logger.warning("No absorb layer is detected, skip TEQ algorithm") return model - else: # pragma: no cover + else: # pragma: no cover absorb_to_layer = {} for name, module in model.named_modules(): for op_type in supported_layers: @@ -4498,35 +4638,35 @@ def teq_quantize(self, model, tune_cfg, dataloader, calib_func): flipped_dict = {} for k, v in absorb_to_layer.items(): for m in v: - flipped_dict[m] = {'absorb_layer': k} + flipped_dict[m] = {"absorb_layer": k} # check tune_cfg to skip layers without TEQ config weight_config = {} skipped_op_name_set = set() - for key, config in tune_cfg['op'].items(): + for key, config in tune_cfg["op"].items(): op_name, op_type = key - if config['weight']['dtype'] == 'fp32': # pragma: no cover + if config["weight"]["dtype"] == "fp32": # pragma: no cover if op_name in flipped_dict: - absorb_to_layer.pop(flipped_dict[op_name]['absorb_layer']) + absorb_to_layer.pop(flipped_dict[op_name]["absorb_layer"]) continue else: weight_config[op_name] = {} - weight_config[op_name]['bits'] = config['weight']['bits'] - weight_config[op_name]['group_size'] = config['weight']['group_size'] - weight_config[op_name]['scheme'] = config['weight']['scheme'] + weight_config[op_name]["bits"] = config["weight"]["bits"] + weight_config[op_name]["group_size"] = config["weight"]["group_size"] + weight_config[op_name]["scheme"] = config["weight"]["scheme"] if op_name in flipped_dict: - algorithm = config['weight']['algorithm'] - if algorithm != 'TEQ': - absorb_to_layer.pop(weight_config[op_name]['absorb_layer']) + algorithm = config["weight"]["algorithm"] + if algorithm != "TEQ": + absorb_to_layer.pop(weight_config[op_name]["absorb_layer"]) else: skipped_op_name_set.add(op_name) - if skipped_op_name_set: # pragma: no cover + if skipped_op_name_set: # pragma: no cover logger.info("{} is skipped by TEQ algorithm".format(skipped_op_name_set)) # collect TEQ config from tune_cfg for quantization. - if len(absorb_to_layer) == 0: # pragma: no cover - logger.warning('No absorb layer needs TEQ algorithim, skip it') - else: # pragma: no cover + if len(absorb_to_layer) == 0: # pragma: no cover + logger.warning("No absorb layer needs TEQ algorithim, skip it") + else: # pragma: no cover logger.debug("**absorb layer**: **absorbed layers**") for k, v in absorb_to_layer.items(): logger.debug(f"{k}: {v}") @@ -4542,53 +4682,55 @@ def teq_quantize(self, model, tune_cfg, dataloader, calib_func): extra_config, dataloader, example_inputs=self.example_inputs, - calib_func=calib_func + calib_func=calib_func, ) return model - + def awq_quantize(self, model, tune_cfg, dataloader, calib_func): logger.info("quantizing with the AWQ algorithm") from .torch_utils.weight_only import awq_quantize + # get example inputs if not provided. if self.example_inputs is None: from neural_compressor.adaptor.torch_utils.util import get_example_input + assert dataloader is not None, "datalaoder or example_inputs is required." self.example_inputs = get_example_input(dataloader) # build weight_config weight_config = {} - for key, config in tune_cfg['op'].items(): + for key, config in tune_cfg["op"].items(): op_name, op_type = key - if config['weight']['dtype'] == 'fp32': + if config["weight"]["dtype"] == "fp32": weight_config[op_name] = { - 'bits': -1, # skip quantization - 'group_size': 128, - 'scheme': 'asym', - 'algorithm': 'RTN', + "bits": -1, # skip quantization + "group_size": 128, + "scheme": "asym", + "algorithm": "RTN", } else: - weight_config[op_name] = config['weight'] + weight_config[op_name] = config["weight"] - if 'awq_args' in self.recipes: - auto_scale = self.recipes['awq_args'].get('auto_scale', True) - mse_range = self.recipes['awq_args'].get('mse_range', True) - folding = self.recipes['awq_args'].get('folding', False) + if "awq_args" in self.recipes: + auto_scale = self.recipes["awq_args"].get("auto_scale", True) + mse_range = self.recipes["awq_args"].get("mse_range", True) + folding = self.recipes["awq_args"].get("folding", False) else: auto_scale, mse_range, folding = True, True, False - if 'rtn_args' in self.recipes: - sym_full_range = self.recipes['rtn_args'].get('sym_full_range', False) - return_int = self.recipes['rtn_args'].get('return_int', False) + if "rtn_args" in self.recipes: + sym_full_range = self.recipes["rtn_args"].get("sym_full_range", False) + return_int = self.recipes["rtn_args"].get("return_int", False) else: sym_full_range, return_int = False, False - calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) + calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) model = awq_quantize( - model, - bits=-1, # no quantize for op not in weight_config + model, + bits=-1, # no quantize for op not in weight_config example_inputs=self.example_inputs, - weight_config=weight_config, + weight_config=weight_config, dataloader=dataloader, n_samples=calib_sampling_size, - auto_scale=auto_scale, + auto_scale=auto_scale, mse_range=mse_range, calib_func=calib_func, folding=folding, @@ -4599,6 +4741,7 @@ def awq_quantize(self, model, tune_cfg, dataloader, calib_func): def _dump_model_op_stats(self, model, tune_cfg): """This is a function to dump quantizable ops of model to user. + Args: model (object): input model tune_cfg (dict): quantization config @@ -4608,28 +4751,28 @@ def _dump_model_op_stats(self, model, tune_cfg): res = {} # collect all dtype info and build empty results with existing op_type dtype_set = set() - for op, config in tune_cfg['op'].items(): + for op, config in tune_cfg["op"].items(): op_type = op[1] - if not config['weight']['dtype'] == 'fp32': - num_bits = config['weight']['bits'] - group_size = config['weight']['group_size'] + if not config["weight"]["dtype"] == "fp32": + num_bits = config["weight"]["bits"] + group_size = config["weight"]["group_size"] dtype_str = "A32W{}G{}".format(num_bits, group_size) dtype_set.add(dtype_str) - dtype_set.add('FP32') + dtype_set.add("FP32") dtype_list = list(dtype_set) dtype_list.sort() - for op, config in tune_cfg['op'].items(): + for op, config in tune_cfg["op"].items(): op_type = op[1] if op_type not in res.keys(): res[op_type] = {dtype: 0 for dtype in dtype_list} # fill in results with op_type and dtype - for op, config in tune_cfg['op'].items(): - if config['weight']['dtype'] == 'fp32': - res[op_type]['FP32'] += 1 + for op, config in tune_cfg["op"].items(): + if config["weight"]["dtype"] == "fp32": + res[op_type]["FP32"] += 1 else: - num_bits = config['weight']['bits'] - group_size = config['weight']['group_size'] + num_bits = config["weight"]["bits"] + group_size = config["weight"]["group_size"] dtype_str = "A32W{}G{}".format(num_bits, group_size) res[op_type][dtype_str] += 1 @@ -4642,9 +4785,7 @@ def _dump_model_op_stats(self, model, tune_cfg): field_results.extend([res[op_type][dtype] for dtype in dtype_list]) output_data.append(field_results) - Statistics(output_data, - header='Mixed Precision Statistics', - field_names=field_names).print_stat() + Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() self.optype_statistics = field_names, output_data def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): @@ -4678,6 +4819,7 @@ def query_fw_capability(self, model): self.pre_optimized_model = model return self._get_quantizable_ops(model.model) + class PyTorchQuery(QueryBackendCapability): def __init__(self, local_config_file=None): super().__init__() @@ -4699,9 +4841,9 @@ def _get_specified_version_cfg(self, data): """ # default_config = None for sub_data in data: - if sub_data['version']['name'] == 'default': + if sub_data["version"]["name"] == "default": return sub_data - sub_data_version = Version(sub_data['version']['name']) + sub_data_version = Version(sub_data["version"]["name"]) if self.version >= sub_data_version: return sub_data @@ -4713,16 +4855,18 @@ def _one_shot_query(self): except Exception as e: # pragma: no cover logger.info("Fail to parse {} due to {}".format(self.cfg, str(e))) self.cur_config = None - raise ValueError("Please check if the format of {} follows " - "Neural Compressor yaml scheme.".format(self.cfg)) + raise ValueError( + "Please check if the format of {} follows " "Neural Compressor yaml scheme.".format(self.cfg) + ) self._update_cfg_with_usr_definition() def _update_cfg_with_usr_definition(self): from neural_compressor.conf.pythonic_config import pytorch_config + if pytorch_config.precisions is not None: - self.cur_config['precisions']['names'] = ','.join(pytorch_config.precisions) + self.cur_config["precisions"]["names"] = ",".join(pytorch_config.precisions) - def get_quantization_capability(self, datatype='int8'): + def get_quantization_capability(self, datatype="int8"): """Get the supported op types' quantization capability. Args: @@ -4732,8 +4876,9 @@ def get_quantization_capability(self, datatype='int8'): [dictionary list]: A list composed of dictionary which key is precision and value is a dict that describes all op types' quantization capability. """ - assert datatype in self.get_quant_datatypes(), \ - f"The target data type should be one of {self.get_quant_datatypes()}" + assert ( + datatype in self.get_quant_datatypes() + ), f"The target data type should be one of {self.get_quant_datatypes()}" return self.cur_config[datatype] def get_quant_datatypes(self): @@ -4744,12 +4889,13 @@ def get_quant_datatypes(self): # TODO to handle other data types such FP8, FP8E4M3 datatype_lst = [] for key in self.cur_config: - if key.startswith('int') or key == 'weight_only_integer': + if key.startswith("int") or key == "weight_only_integer": datatype_lst.append(key) return datatype_lst def get_op_types(self): """Get the supported op types by all precisions. + Returns: [dictionary list]: A list composed of dictionary which key is precision and value is the op types. diff --git a/neural_compressor/adaptor/pytorch_cpu.yaml b/neural_compressor/adaptor/pytorch_cpu.yaml index 9ecd3e0d4ff..4ff09a1dcce 100644 --- a/neural_compressor/adaptor/pytorch_cpu.yaml +++ b/neural_compressor/adaptor/pytorch_cpu.yaml @@ -19,7 +19,7 @@ name: '1.11' bf16: ['Linear', 'bmm', 'mm', 'baddbmm', 'addmm', 'addbmm', - '_convolution', 'LSTM', 'LSTMCell', 'GRU', 'GRUCell'] + '_convolution', 'LSTM', 'LSTMCell', 'GRU', 'GRUCell'] fp32: ['*'] # `*` means all op types. int8: &1_11_capabilities { 'static': &cap_s8_1_11 { @@ -280,7 +280,7 @@ version: name: '1.10' - bf16: [] + bf16: [] fp32: ['*'] # `*` means all op types. int8: &1_10_capabilities { 'static': &cap_s8_1_10 { @@ -411,7 +411,7 @@ - version: name: '1.3' - + bf16: [] fp32: ['*'] # '*' means all op types int8: &1_3_capabilities { diff --git a/neural_compressor/adaptor/pytorch_gpu.yaml b/neural_compressor/adaptor/pytorch_gpu.yaml index 07d37073f42..951b42fc059 100644 --- a/neural_compressor/adaptor/pytorch_gpu.yaml +++ b/neural_compressor/adaptor/pytorch_gpu.yaml @@ -27,7 +27,7 @@ 'LinearReLU', 'ConvBn2d', 'ConvBnReLU2d'] uint8: *ops_s8_default fp32: ['*'] # '*' means all op types - + capabilities: &default_capabilities int8: &cap_s8_default { 'Conv1d': &cap_s8_default_Conv1d { diff --git a/neural_compressor/adaptor/pytorch_ipex.yaml b/neural_compressor/adaptor/pytorch_ipex.yaml index 640609ac296..c6777bfe7b5 100644 --- a/neural_compressor/adaptor/pytorch_ipex.yaml +++ b/neural_compressor/adaptor/pytorch_ipex.yaml @@ -149,6 +149,3 @@ 'quant_aware':{} } uint8: *cap_default_s8 - - - diff --git a/neural_compressor/adaptor/query.py b/neural_compressor/adaptor/query.py index 714ce536ffa..4617d0270b7 100644 --- a/neural_compressor/adaptor/query.py +++ b/neural_compressor/adaptor/query.py @@ -14,46 +14,46 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging from abc import abstractmethod -import logging -class QueryBackendCapability(): +class QueryBackendCapability: """Base class that defines Query Interface. - Each adaption layer should implement the inherited class for specific backend on their own. + + Each adaption layer should implement the inherited class for specific backend on their own. """ + def __init__(self): pass @abstractmethod def get_version(self): - """Get the current backend's version string. - """ + """Get the current backend's version string.""" raise NotImplementedError @abstractmethod def get_precisions(self): - """Get the supported low precisions, e.g ['int8', 'bf16'] - """ + """Get the supported low precisions, e.g ['int8', 'bf16']""" raise NotImplementedError @abstractmethod def get_op_types(self): """Get the op types for specific backend per low precision. - e.g {'2.3.0': {'int8': ['Conv2D', 'MatMuL']}} + + e.g {'2.3.0': {'int8': ['Conv2D', 'MatMuL']}} """ raise NotImplementedError @abstractmethod def get_fuse_patterns(self): - """Get the fusion patterns for specified op type for every specific precision - - """ + """Get the fusion patterns for specified op type for every specific precision.""" raise NotImplementedError @abstractmethod def set_quantization_config(self, q_config): """Set the quantization config to backend. + Args: q_config (yaml content?): set the organized quantization configuration to backend. """ @@ -62,14 +62,15 @@ def set_quantization_config(self, q_config): @abstractmethod def get_quantization_capability(self): """Get the quantization capability of low precision op types. - e.g, granularity, scheme and etc. + e.g, granularity, scheme and etc. """ raise NotImplementedError @abstractmethod def get_mixed_precision_combination(self, unsupported_precisions): """Get the valid precision combination base on hardware and user' config. - e.g['fp32', 'bf16', 'int8'] + + e.g['fp32', 'bf16', 'int8'] """ raise NotImplementedError diff --git a/neural_compressor/adaptor/tensorflow.py b/neural_compressor/adaptor/tensorflow.py index fb412390e1f..77042c73171 100644 --- a/neural_compressor/adaptor/tensorflow.py +++ b/neural_compressor/adaptor/tensorflow.py @@ -16,23 +16,36 @@ # limitations under the License. """Tensorflow Adaptor Classes.""" -import os import copy -import yaml import math -import numpy as np +import os from collections import OrderedDict, UserDict -from .query import QueryBackendCapability -from .adaptor import adaptor_registry, Adaptor -from ..utils.utility import LazyImport, CpuInfo, singleton, Dequantize, dump_elapsed_time -from ..utils.utility import Statistics, GLOBAL_STATE, MODE -from ..utils.utility import version1_lt_version2, version1_gte_version2, version1_eq_version2 -from ..utils import logger + +import numpy as np +import yaml + from ..conf.dotdict import deep_get from ..data.dataloaders.base_dataloader import BaseDataLoader +from ..utils import logger +from ..utils.utility import ( + GLOBAL_STATE, + MODE, + CpuInfo, + Dequantize, + LazyImport, + Statistics, + dump_elapsed_time, + singleton, + version1_eq_version2, + version1_gte_version2, + version1_lt_version2, +) +from .adaptor import Adaptor, adaptor_registry +from .query import QueryBackendCapability + +tensorflow = LazyImport("tensorflow") +spr_base_verions = ("2.11.0202242", "2.11.0202250", "2.11.0202317", "2.11.0202323") -tensorflow = LazyImport('tensorflow') -spr_base_verions = ('2.11.0202242', '2.11.0202250', '2.11.0202317', '2.11.0202323') @adaptor_registry class TensorFlowAdaptor(Adaptor): @@ -53,8 +66,9 @@ class TensorFlowAdaptor(Adaptor): "BatchMatMulV2": "matmul", "Pad": "pad", "Conv2DBackpropInput": "deconv2d", - "Conv3DBackpropInputV2": "deconv3d" + "Conv3DBackpropInputV2": "deconv3d", } + def __init__(self, framework_specific_info): """Initialization. @@ -65,16 +79,16 @@ def __init__(self, framework_specific_info): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - self.quantize_config = {'op_wise_config': {}} + self.quantize_config = {"op_wise_config": {}} self.framework_specific_info = framework_specific_info - self.approach = deep_get(self.framework_specific_info, 'approach', False) - self.device = self.framework_specific_info['device'] - self.work_dir = os.path.abspath(self.framework_specific_info['workspace_path']) - self.recipes = deep_get(self.framework_specific_info, 'recipes', {}) - self.performance_only = deep_get(self.framework_specific_info, 'performance_only', False) - self.use_bf16 = deep_get(self.framework_specific_info, 'use_bf16', False) - self.backend = self.framework_specific_info['backend'] - self.format = self.framework_specific_info['format'] + self.approach = deep_get(self.framework_specific_info, "approach", False) + self.device = self.framework_specific_info["device"] + self.work_dir = os.path.abspath(self.framework_specific_info["workspace_path"]) + self.recipes = deep_get(self.framework_specific_info, "recipes", {}) + self.performance_only = deep_get(self.framework_specific_info, "performance_only", False) + self.use_bf16 = deep_get(self.framework_specific_info, "use_bf16", False) + self.backend = self.framework_specific_info["backend"] + self.format = self.framework_specific_info["format"] os.makedirs(self.work_dir, exist_ok=True) self.model = None @@ -84,28 +98,30 @@ def __init__(self, framework_specific_info): self.bf16_ops = [] self.fp32_ops = [] self.smooth_quant_mul_ops = [] - self.dump_times = 0 # for tensorboard + self.dump_times = 0 # for tensorboard + + cfg_yaml_name = "{}.yaml".format(self.__class__.__name__[: -len("Adaptor")].lower()) + self.itex_mode = self.backend == "itex" or cfg_yaml_name == "tensorflow_itex.yaml" - cfg_yaml_name = "{}.yaml".format(self.__class__.__name__[:-len('Adaptor')].lower()) - self.itex_mode = self.backend == 'itex' or cfg_yaml_name == 'tensorflow_itex.yaml' - if self.itex_mode: self._check_itex() - - self.query_handler = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(__file__), cfg_yaml_name), + + self.query_handler = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(__file__), cfg_yaml_name), performance_only=self.performance_only, - itex_mode=self.itex_mode) + itex_mode=self.itex_mode, + ) - from pkg_resources import parse_version import tensorflow as tf + from pkg_resources import parse_version + self.new_api = tf.version.VERSION in spr_base_verions - self.qdq_enabled = self.itex_mode or self.format == 'QDQ' or self.new_api + self.qdq_enabled = self.itex_mode or self.format == "QDQ" or self.new_api self.op_wise_sequences = self.query_handler.get_eightbit_patterns(self.qdq_enabled) self.fp32_results = [] self.fp32_preds_as_label = False - self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK) + self.benchmark = GLOBAL_STATE.STATE == MODE.BENCHMARK self.callbacks = [] self.optype_statistics = None @@ -117,12 +133,15 @@ def _check_itex(self): try: import intel_extension_for_tensorflow except: - raise ImportError("The Intel® Extension for TensorFlow is not installed. "\ - "Please install it to run models on ITEX backend") + raise ImportError( + "The Intel® Extension for TensorFlow is not installed. " + "Please install it to run models on ITEX backend" + ) def _log_histogram(self, writer, tag, values, step=0, bins=1000): """Writes a histogram for later analysis.""" import tensorflow as tf + # Convert to a numpy array values = np.array(values) @@ -135,12 +154,12 @@ def _log_histogram(self, writer, tag, values, step=0, bins=1000): def _pre_hook_for_hvd(self, dataloader=None): """Pre hook for Horovod.""" import horovod.tensorflow as hvd + self.hvd = hvd self.hvd.init() @dump_elapsed_time(customized_msg="Model training") - def train(self, model, dataloader, optimizer_tuple, - criterion_tuple, hooks, postprocess, **kwargs): + def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, postprocess, **kwargs): """Model training API. Args: @@ -157,85 +176,95 @@ def train(self, model, dataloader, optimizer_tuple, """ # check model is savedmodel or not import tensorflow as tf + from neural_compressor.model.tensorflow_model import get_model_type + tf.random.set_seed(1) self.model_type = get_model_type(model._model) optimizer = optimizer_tuple[0](**optimizer_tuple[1]) criterion = criterion_tuple[0](**criterion_tuple[1]) - start_epochs = kwargs['kwargs'].get('start_epoch', None) - end_epochs = kwargs['kwargs'].get('end_epoch', None) - epochs = kwargs['kwargs'].get('epoch', None) - iters = kwargs['kwargs'].get('iteration', None) - callbacks = kwargs['kwargs'].get('callbacks', None) - execution_mode = kwargs['kwargs'].get('execution_mode', None) - distributed = getattr(dataloader, 'distributed', False) + start_epochs = kwargs["kwargs"].get("start_epoch", None) + end_epochs = kwargs["kwargs"].get("end_epoch", None) + epochs = kwargs["kwargs"].get("epoch", None) + iters = kwargs["kwargs"].get("iteration", None) + callbacks = kwargs["kwargs"].get("callbacks", None) + execution_mode = kwargs["kwargs"].get("execution_mode", None) + distributed = getattr(dataloader, "distributed", False) from neural_compressor.experimental.common.criterion import TensorflowKnowledgeDistillationLoss + if isinstance(criterion, TensorflowKnowledgeDistillationLoss): input_model = model._model else: input_model = tf.keras.models.load_model(model._model) - hooks = callbacks['tf_pruning'](model, input_model, hooks) - hooks['on_train_begin']() # on_train_begin hook + hooks = callbacks["tf_pruning"](model, input_model, hooks) + hooks["on_train_begin"]() # on_train_begin hook train_loss_results = [] if distributed: try: len_dataloader = len(dataloader) except: - logger.info("The length of the distributed training dataloader is unknown." - "When the iteration of training dataloader in each process is " - "inconsistent, an error may occur.") + logger.info( + "The length of the distributed training dataloader is unknown." + "When the iteration of training dataloader in each process is " + "inconsistent, an error may occur." + ) else: list_len_dataloader = self.hvd.allgather_object(len_dataloader) if self.hvd.rank() == 0: - for i in range(len(list_len_dataloader)-1): - if list_len_dataloader[i] != list_len_dataloader[i+1]: - raise AttributeError("The traning dataloader's iteration is" - "different between processes, please reset dataloader's batch_size.") + for i in range(len(list_len_dataloader) - 1): + if list_len_dataloader[i] != list_len_dataloader[i + 1]: + raise AttributeError( + "The traning dataloader's iteration is" + "different between processes, please reset dataloader's batch_size." + ) def training_step(x, y, first_batch): with tf.GradientTape() as tape: tape.watch(input_model.trainable_variables) y_ = input_model(x, training=True) loss_value = criterion(y, y_) - loss_value = hooks['on_after_compute_loss'](x, y_, loss_value) + loss_value = hooks["on_after_compute_loss"](x, y_, loss_value) tape = self.hvd.DistributedGradientTape(tape) if distributed else tape # Get gradient - grads = tape.gradient(loss_value, input_model.trainable_variables) # pylint: disable=no-member + grads = tape.gradient(loss_value, input_model.trainable_variables) # pylint: disable=no-member # Optimize the model - optimizer.apply_gradients(zip(grads, input_model.trainable_variables)) # pylint: disable=no-member + optimizer.apply_gradients(zip(grads, input_model.trainable_variables)) # pylint: disable=no-member if distributed and first_batch: self.hvd.broadcast_variables(input_model.variables, root_rank=0) self.hvd.broadcast_variables(optimizer.variables(), root_rank=0) return loss_value - training_step = training_step if execution_mode=='eager' else tf.function(training_step) + training_step = training_step if execution_mode == "eager" else tf.function(training_step) if start_epochs is not None and end_epochs is not None: epochs = end_epochs - start_epochs for epoch in range(epochs): cnt = 0 epoch_loss_avg = tf.keras.metrics.Mean() - hooks['on_epoch_begin'](epoch) # on_epoch_begin hook + hooks["on_epoch_begin"](epoch) # on_epoch_begin hook # Training loop for iter, data in enumerate(dataloader): x, y = postprocess(data) if postprocess is not None else data - hooks['on_step_begin'](iter) # on_step_begin hook + hooks["on_step_begin"](iter) # on_step_begin hook cnt += 1 - loss_value = training_step(x, y, iter==0) + loss_value = training_step(x, y, iter == 0) # Track progress epoch_loss_avg.update_state(loss_value) # Add current batch loss - hooks['on_step_end']() # on_step_end hook + hooks["on_step_end"]() # on_step_end hook if iters is not None and cnt >= iters: break model._sess = None - hooks['on_epoch_end']() # on_epoch_end hook + hooks["on_epoch_end"]() # on_epoch_end hook # End epoch train_loss_results.append(epoch_loss_avg.result()) if distributed: - logger.info("Epoch-{:03d} training on rank {!s} have been done." \ - .format(epoch+1, self.hvd.allgather_object(self.hvd.rank()))) - logger.info("Epoch {:03d}: Loss: {:.3f}".format(epoch+1, epoch_loss_avg.result())) - - hooks['on_train_end']() # on_train_end hook + logger.info( + "Epoch-{:03d} training on rank {!s} have been done.".format( + epoch + 1, self.hvd.allgather_object(self.hvd.rank()) + ) + ) + logger.info("Epoch {:03d}: Loss: {:.3f}".format(epoch + 1, epoch_loss_avg.result())) + + hooks["on_train_end"]() # on_train_end hook model._sess = None if not isinstance(criterion, TensorflowKnowledgeDistillationLoss): if distributed: @@ -243,17 +272,24 @@ def training_step(x, y, first_batch): # Update the input model with pruned weights manually due to keras API limitation. input_model.save(model._model) rank_list = self.hvd.allgather_object(self.hvd.rank()) - logger.info(f"rank 0 has saved the pruned model to '{model._model}'," - f"all ranks {rank_list} ready.") + logger.info(f"rank 0 has saved the pruned model to '{model._model}'," f"all ranks {rank_list} ready.") else: input_model.save(model._model) else: - input_model.save('distillation_model') + input_model.save("distillation_model") @dump_elapsed_time(customized_msg="Model inference") - def evaluate(self, model, dataloader, postprocess=None, - metrics=None, measurer=None, iteration=-1, - tensorboard=False, fp32_baseline=False): + def evaluate( + self, + model, + dataloader, + postprocess=None, + metrics=None, + measurer=None, + iteration=-1, + tensorboard=False, + fp32_baseline=False, + ): """Evaluate the model for specified metric on validation dataset. Args: @@ -271,11 +307,14 @@ def evaluate(self, model, dataloader, postprocess=None, [float]: evaluation result, the larger is better. """ import tensorflow as tf + from .tf_utils.util import iterator_sess_run + outputs = model.output_tensor_names - if getattr(dataloader, 'distributed', False): + if getattr(dataloader, "distributed", False): import horovod.tensorflow as hvd + hvd.init() # If metric.hvd is not None then run distributed inference for metric in metrics: @@ -283,26 +322,46 @@ def evaluate(self, model, dataloader, postprocess=None, try: len_dataloader = len(dataloader) except: - logger.info("The length of the distributed evaluation dataloader is unknown." - "When the iteration of evaluation dataloader in each process is " - "inconsistent, an error may occur.") + logger.info( + "The length of the distributed evaluation dataloader is unknown." + "When the iteration of evaluation dataloader in each process is " + "inconsistent, an error may occur." + ) else: list_len_dataloader = hvd.allgather_object(len_dataloader) if hvd.rank() == 0: - for i in range(len(list_len_dataloader)-1): - if list_len_dataloader[i] != list_len_dataloader[i+1]: - raise AttributeError("The evaluation dataloader's iteration is" - "different between processes, please reset dataloader's batch_size.") - logger.info("Rank {!s} dataloaders' data distribution balance check for evaluation have been finnished." \ - .format(hvd.allgather_object(hvd.rank()))) + for i in range(len(list_len_dataloader) - 1): + if list_len_dataloader[i] != list_len_dataloader[i + 1]: + raise AttributeError( + "The evaluation dataloader's iteration is" + "different between processes, please reset dataloader's batch_size." + ) + logger.info( + "Rank {!s} dataloaders' data distribution balance check for evaluation have been finnished.".format( + hvd.allgather_object(hvd.rank()) + ) + ) if tensorboard: - from .tf_utils.graph_util import GraphAnalyzer from tensorflow.python.framework import tensor_util + from .tf_utils.graph_util import GraphAnalyzer + output_postfix = "_fp32.output" - inspect_node_types = ["Conv2D", "DepthwiseConv2dNative", "MaxPool", "AvgPool", - "ConcatV2", "MatMul", "FusedBatchNormV3", "FusedBatchNorm", "BiasAdd", - "_MklFusedInstanceNorm", "Relu", "Relu6", "Dequantize"] + inspect_node_types = [ + "Conv2D", + "DepthwiseConv2dNative", + "MaxPool", + "AvgPool", + "ConcatV2", + "MatMul", + "FusedBatchNormV3", + "FusedBatchNorm", + "BiasAdd", + "_MklFusedInstanceNorm", + "Relu", + "Relu6", + "Dequantize", + ] fp32_inspect_node_name = [] int8_inspect_node_name = [] q_node_scale = {} @@ -312,11 +371,12 @@ def evaluate(self, model, dataloader, postprocess=None, temp_dir = "./runs/eval/tune_" + str(self.dump_times) if os.path.isdir(temp_dir): import shutil + shutil.rmtree(temp_dir, ignore_errors=True) # Create the writer using TF2.x APIs to handle eager excutions - writer = tf.summary.create_file_writer(temp_dir) # pylint: disable=no-member + writer = tf.summary.create_file_writer(temp_dir) # pylint: disable=no-member with writer.as_default(): - tf.summary.graph(model.graph) # pylint: disable=no-member + tf.summary.graph(model.graph) # pylint: disable=no-member cur_graph = GraphAnalyzer() cur_graph.graph = model.graph_def @@ -341,18 +401,20 @@ def evaluate(self, model, dataloader, postprocess=None, if node.op.find("Sum") != -1: out_min = -5 out_max = -4 - q_out_min = graph_info[node.input[out_min] - ].node.attr["value"].tensor.float_val[0] - q_out_max = graph_info[node.input[out_max] - ].node.attr["value"].tensor.float_val[0] + q_out_min = graph_info[node.input[out_min]].node.attr["value"].tensor.float_val[0] + q_out_max = graph_info[node.input[out_max]].node.attr["value"].tensor.float_val[0] q_node_scale[node.name] = (node.op, q_out_min, q_out_max) int8_inspect_node_name.append(node.name) # Inspect weights, bias. Need further optimize - if node.op == "Const" and graph_info[graph_info[node.name].outputs[0]].node.op \ - in ["Conv2D", "DepthwiseConv2dNative", "MatMul", - "FusedBatchNormV3", "_MklFusedInstanceNorm", "BiasAdd"]: - const_value = tensor_util.MakeNdarray(node.attr.get( - 'value').tensor).astype(np.float32) + if node.op == "Const" and graph_info[graph_info[node.name].outputs[0]].node.op in [ + "Conv2D", + "DepthwiseConv2dNative", + "MatMul", + "FusedBatchNormV3", + "_MklFusedInstanceNorm", + "BiasAdd", + ]: + const_value = tensor_util.MakeNdarray(node.attr.get("value").tensor).astype(np.float32) self._log_histogram(writer, node.name, const_value) outputs.extend(fp32_inspect_node_name) @@ -363,14 +425,14 @@ def evaluate(self, model, dataloader, postprocess=None, if metrics: for metric in metrics: metric.reset() - self.fp32_preds_as_label = any([hasattr(metric, "compare_label") and \ - not metric.compare_label for metric in metrics]) + self.fp32_preds_as_label = any( + [hasattr(metric, "compare_label") and not metric.compare_label for metric in metrics] + ) origin_output_tensor_names = model.output_tensor_names model.output_tensor_names = outputs input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] + output_tensor = model.output_tensor if len(model.output_tensor) > 1 else model.output_tensor[0] logger.info("Start to evaluate the TensorFlow model.") def eval_func(dataloader): @@ -379,8 +441,7 @@ def eval_func(dataloader): # dataloader should keep the order and len of inputs same with input_tensor if len(input_tensor) == 1: feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -391,11 +452,9 @@ def eval_func(dataloader): else: feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] else: - assert len(input_tensor) == len(inputs), \ - 'inputs len must equal with input_tensor' + assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -407,8 +466,9 @@ def eval_func(dataloader): feed_dict = dict(zip(input_tensor, inputs)) if model.iter_op: - predictions = iterator_sess_run(model.sess, model.iter_op, \ - feed_dict, output_tensor, iteration, measurer) + predictions = iterator_sess_run( + model.sess, model.iter_op, feed_dict, output_tensor, iteration, measurer + ) elif measurer is not None: measurer.start() predictions = model.sess.run(output_tensor, feed_dict) @@ -417,8 +477,7 @@ def eval_func(dataloader): predictions = model.sess.run(output_tensor, feed_dict) if self.fp32_preds_as_label: - self.fp32_results.append(predictions) if fp32_baseline else \ - results.append(predictions) + self.fp32_results.append(predictions) if fp32_baseline else results.append(predictions) # Inspect node output, just get 1st iteration output tensors for now if idx == 0 and tensorboard: @@ -426,20 +485,20 @@ def eval_func(dataloader): tensor = predictions[index] if node_name in int8_inspect_node_name: tensor = Dequantize(predictions[index], q_node_scale[node_name]) - self._log_histogram(writer, node_name + output_postfix, tensor.astype( - np.float32), idx) + self._log_histogram(writer, node_name + output_postfix, tensor.astype(np.float32), idx) writer.close() if isinstance(predictions, list): if len(origin_output_tensor_names) == 1: predictions = predictions[0] elif len(origin_output_tensor_names) > 1: - predictions = predictions[:len(origin_output_tensor_names)] + predictions = predictions[: len(origin_output_tensor_names)] if postprocess is not None: predictions, labels = postprocess((predictions, labels)) if metrics: for metric in metrics: - if not hasattr(metric, "compare_label") or \ - (hasattr(metric, "compare_label") and metric.compare_label): + if not hasattr(metric, "compare_label") or ( + hasattr(metric, "compare_label") and metric.compare_label + ): metric.update(predictions, labels) if idx + 1 == iteration: break @@ -449,9 +508,7 @@ def eval_func(dataloader): try: results = eval_func(dataloader) except Exception: # pragma: no cover - logger.warning( - "Fail to forward with batch size={}, set to {} now.". - format(dataloader.batch_size, 1)) + logger.warning("Fail to forward with batch size={}, set to {} now.".format(dataloader.batch_size, 1)) dataloader.batch(1) results = eval_func(dataloader) else: # pragma: no cover @@ -459,6 +516,7 @@ def eval_func(dataloader): if self.fp32_preds_as_label: from .tf_utils.util import collate_tf_preds + if fp32_baseline: results = collate_tf_preds(self.fp32_results) reference = results @@ -475,6 +533,7 @@ def eval_func(dataloader): writer.close() if os.path.isdir(new_dir): import shutil + shutil.rmtree(new_dir, ignore_errors=True) os.rename(temp_dir, new_dir) self.dump_times += 1 @@ -487,48 +546,43 @@ def _tuning_cfg_to_fw(self, tuning_cfg): Args: tuning_cfg (dict): configuration for quantization. """ - self.quantize_config['calib_iteration'] = tuning_cfg['calib_iteration'] - self.quantize_config['device'] = self.device - self.quantize_config['advance'] = deep_get(tuning_cfg, 'advance') + self.quantize_config["calib_iteration"] = tuning_cfg["calib_iteration"] + self.quantize_config["device"] = self.device + self.quantize_config["advance"] = deep_get(tuning_cfg, "advance") fp32_ops = [] bf16_ops = [] - dispatched_op_names = [j[0] for j in tuning_cfg['op']] + dispatched_op_names = [j[0] for j in tuning_cfg["op"]] - invalid_op_names = [i for i in self.quantize_config['op_wise_config'] - if i not in dispatched_op_names] + invalid_op_names = [i for i in self.quantize_config["op_wise_config"] if i not in dispatched_op_names] for op_name in invalid_op_names: - self.quantize_config['op_wise_config'].pop(op_name) + self.quantize_config["op_wise_config"].pop(op_name) - for each_op_info in tuning_cfg['op']: + for each_op_info in tuning_cfg["op"]: op_name = each_op_info[0] - if tuning_cfg['op'][each_op_info]['activation']['dtype'] in ['fp32', 'bf16']: - if op_name in self.quantize_config['op_wise_config']: - self.quantize_config['op_wise_config'].pop(op_name) - if tuning_cfg['op'][each_op_info]['activation']['dtype'] == 'fp32': + if tuning_cfg["op"][each_op_info]["activation"]["dtype"] in ["fp32", "bf16"]: + if op_name in self.quantize_config["op_wise_config"]: + self.quantize_config["op_wise_config"].pop(op_name) + if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "fp32": fp32_ops.append(op_name) - if tuning_cfg['op'][each_op_info]['activation']['dtype'] == 'bf16': + if tuning_cfg["op"][each_op_info]["activation"]["dtype"] == "bf16": bf16_ops.append(op_name) continue is_perchannel = False bit = None - if 'weight' in tuning_cfg['op'][each_op_info]: - is_perchannel = tuning_cfg['op'][each_op_info]['weight'][ - 'granularity'] == 'per_channel' - #bit = tuning_cfg['op'][each_op_info]['weight']['bit'] + if "weight" in tuning_cfg["op"][each_op_info]: + is_perchannel = tuning_cfg["op"][each_op_info]["weight"]["granularity"] == "per_channel" + # bit = tuning_cfg['op'][each_op_info]['weight']['bit'] weight_bit = bit if bit else 7.0 - algorithm = tuning_cfg['op'][each_op_info]['activation']['algorithm'] + algorithm = tuning_cfg["op"][each_op_info]["activation"]["algorithm"] is_asymmetric = False - if 'activation' in tuning_cfg['op'][each_op_info]: - is_asymmetric = tuning_cfg['op'][each_op_info]['activation']['scheme'] == 'asym' - self.quantize_config['op_wise_config'][op_name] = (is_perchannel, - algorithm, - is_asymmetric, - weight_bit) + if "activation" in tuning_cfg["op"][each_op_info]: + is_asymmetric = tuning_cfg["op"][each_op_info]["activation"]["scheme"] == "asym" + self.quantize_config["op_wise_config"][op_name] = (is_perchannel, algorithm, is_asymmetric, weight_bit) self.fp32_ops = fp32_ops self.bf16_ops = bf16_ops @@ -546,26 +600,30 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): Returns: tf.compat.v1.GraphDef: the quantized model """ - assert self.approach != "post_training_dynamic_quant", \ - "Dynamic quantization is not supported on TensorFlow framework now!" + assert ( + self.approach != "post_training_dynamic_quant" + ), "Dynamic quantization is not supported on TensorFlow framework now!" - if self.approach == "quant_aware_training": # pragma: no cover - assert q_func is not None, "quantization aware training mode \ + if self.approach == "quant_aware_training": # pragma: no cover + assert ( + q_func is not None + ), "quantization aware training mode \ is not configured correctly" from neural_compressor.model import Model + qat_model = q_func(model) - return self.convert(Model(qat_model), 'QAT', 'default') + return self.convert(Model(qat_model), "QAT", "default") - assert q_func is None, \ - "post-training quantization mode is not support calibration function for Tensorflow!" + assert q_func is None, "post-training quantization mode is not support calibration function for Tensorflow!" self._tuning_cfg_to_fw(tune_cfg) self.bf16_ops.extend(self.smooth_quant_mul_ops) logger.debug("Dump quantization configurations:") logger.debug(self.quantize_config) from .tf_utils.graph_converter import GraphConverter - calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) + + calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) if isinstance(data_loader, BaseDataLoader): batch_size = data_loader.batch_size try: @@ -573,68 +631,75 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): if calib_sampling_size % (batch_size - i) == 0: calib_batch_size = batch_size - i if i != 0: # pragma: no cover - logger.warning("Reset `calibration.dataloader.batch_size` field " - "to {}".format(calib_batch_size) + - " to make sure the sampling_size is " - "divisible exactly by batch size") + logger.warning( + "Reset `calibration.dataloader.batch_size` field " + "to {}".format(calib_batch_size) + " to make sure the sampling_size is " + "divisible exactly by batch size" + ) break tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size)) data_loader.batch(calib_batch_size) - self.quantize_config['calib_iteration'] = tmp_iterations - converted_model = GraphConverter(model, - qt_config=self.quantize_config, - recipes=self.recipes, - int8_sequences=self.op_wise_sequences, - fp32_ops=self.fp32_ops, - bf16_ops=self.bf16_ops, - data_loader=data_loader, - qdq_enabled=self.qdq_enabled, - new_api=self.new_api, - performance_only = self.performance_only, - use_bf16=self.use_bf16).convert() - except Exception: # pragma: no cover + self.quantize_config["calib_iteration"] = tmp_iterations + converted_model = GraphConverter( + model, + qt_config=self.quantize_config, + recipes=self.recipes, + int8_sequences=self.op_wise_sequences, + fp32_ops=self.fp32_ops, + bf16_ops=self.bf16_ops, + data_loader=data_loader, + qdq_enabled=self.qdq_enabled, + new_api=self.new_api, + performance_only=self.performance_only, + use_bf16=self.use_bf16, + ).convert() + except Exception: # pragma: no cover from .tf_utils.util import get_model_input_shape + batch_size = get_model_input_shape(model) logger.warning( - "Fail to forward with batch size={}, set to {} now.". - format(data_loader.batch_size, batch_size)) + "Fail to forward with batch size={}, set to {} now.".format(data_loader.batch_size, batch_size) + ) data_loader.batch(batch_size) - self.quantize_config['calib_iteration'] = calib_sampling_size - converted_model = GraphConverter(model, - qt_config=self.quantize_config, - recipes=self.recipes, - int8_sequences=self.op_wise_sequences, - fp32_ops=self.fp32_ops, - bf16_ops=self.bf16_ops, - data_loader=data_loader, - qdq_enabled=self.qdq_enabled, - new_api=self.new_api, - performance_only = self.performance_only, - use_bf16=self.use_bf16).convert() - else: # pragma: no cover - if hasattr(data_loader, 'batch_size') and \ - calib_sampling_size % data_loader.batch_size != 0: - iter = self.quantize_config['calib_iteration'] + self.quantize_config["calib_iteration"] = calib_sampling_size + converted_model = GraphConverter( + model, + qt_config=self.quantize_config, + recipes=self.recipes, + int8_sequences=self.op_wise_sequences, + fp32_ops=self.fp32_ops, + bf16_ops=self.bf16_ops, + data_loader=data_loader, + qdq_enabled=self.qdq_enabled, + new_api=self.new_api, + performance_only=self.performance_only, + use_bf16=self.use_bf16, + ).convert() + else: # pragma: no cover + if hasattr(data_loader, "batch_size") and calib_sampling_size % data_loader.batch_size != 0: + iter = self.quantize_config["calib_iteration"] logger.warning( - "Please note that calibration sampling size {} " \ - "isn't divisible exactly by batch size {}. " \ - "So the real sampling size is {}.". - format(calib_sampling_size, data_loader.batch_size, - data_loader.batch_size * iter)) - converted_model = GraphConverter(model, - qt_config=self.quantize_config, - recipes=self.recipes, - int8_sequences=self.op_wise_sequences, - fp32_ops=self.fp32_ops, - bf16_ops=self.bf16_ops, - data_loader=data_loader, - qdq_enabled=self.qdq_enabled, - new_api=self.new_api, - performance_only = self.performance_only, - use_bf16=self.use_bf16).convert() - #just save framework_specific_info feature for recover - converted_model.q_config.update({'framework_specific_info': \ - self.framework_specific_info}) + "Please note that calibration sampling size {} " + "isn't divisible exactly by batch size {}. " + "So the real sampling size is {}.".format( + calib_sampling_size, data_loader.batch_size, data_loader.batch_size * iter + ) + ) + converted_model = GraphConverter( + model, + qt_config=self.quantize_config, + recipes=self.recipes, + int8_sequences=self.op_wise_sequences, + fp32_ops=self.fp32_ops, + bf16_ops=self.bf16_ops, + data_loader=data_loader, + qdq_enabled=self.qdq_enabled, + new_api=self.new_api, + performance_only=self.performance_only, + use_bf16=self.use_bf16, + ).convert() + # just save framework_specific_info feature for recover + converted_model.q_config.update({"framework_specific_info": self.framework_specific_info}) self._dump_model_op_stats(converted_model.graph_def) @@ -642,75 +707,79 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): def _dump_model_op_stats(self, model_graphdef): """Dump the whole model's OPs statistics information for analysis.""" - fp32_op_list_uint8 = copy.deepcopy( - self.query_handler.get_op_types_by_precision(precision='uint8')) - fp32_op_list_int8 = copy.deepcopy( - self.query_handler.get_op_types_by_precision(precision='int8')) - fp32_op_list=list(set(fp32_op_list_uint8).union(set(fp32_op_list_int8))) - - - int8_op_prefix_list = ['QuantizedConv2D', '_FusedQuantizedConv3D', 'QuantizedDepthwise', - 'QuantizedMaxPool', 'QuantizedAvgPool', - 'QuantizedConcatV2', 'QuantizedMatMul', - '_QuantizedFusedBatchNorm', '_QuantizedMatMul', - '_QuantizedBatchMatMul', '_QuantizedFusedInstanceNorm', - '_FusedQuantizedDeconv2D', '_FusedQuantizedDeconv3D'] + fp32_op_list_uint8 = copy.deepcopy(self.query_handler.get_op_types_by_precision(precision="uint8")) + fp32_op_list_int8 = copy.deepcopy(self.query_handler.get_op_types_by_precision(precision="int8")) + fp32_op_list = list(set(fp32_op_list_uint8).union(set(fp32_op_list_int8))) + + int8_op_prefix_list = [ + "QuantizedConv2D", + "_FusedQuantizedConv3D", + "QuantizedDepthwise", + "QuantizedMaxPool", + "QuantizedAvgPool", + "QuantizedConcatV2", + "QuantizedMatMul", + "_QuantizedFusedBatchNorm", + "_QuantizedMatMul", + "_QuantizedBatchMatMul", + "_QuantizedFusedInstanceNorm", + "_FusedQuantizedDeconv2D", + "_FusedQuantizedDeconv3D", + ] from tensorflow.python.framework import dtypes res = {} for op_type in fp32_op_list: - res[op_type] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - res['QuantizeV2'] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - res['Dequantize'] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - res['Cast'] = {'INT8': 0, 'BF16': 0, 'FP32': 0} - fp32_op_list.extend(['QuantizeV2', 'Dequantize', 'Cast']) + res[op_type] = {"INT8": 0, "BF16": 0, "FP32": 0} + res["QuantizeV2"] = {"INT8": 0, "BF16": 0, "FP32": 0} + res["Dequantize"] = {"INT8": 0, "BF16": 0, "FP32": 0} + res["Cast"] = {"INT8": 0, "BF16": 0, "FP32": 0} + fp32_op_list.extend(["QuantizeV2", "Dequantize", "Cast"]) for i in model_graphdef.node: - if i.op == 'Const': + if i.op == "Const": continue possible_int8_res = [name for name in int8_op_prefix_list if i.op.find(name) != -1] if any(possible_int8_res): - origin_op_type = possible_int8_res[0].split('Quantized')[-1] - if origin_op_type == 'FusedBatchNorm': - origin_op_type = 'FusedBatchNormV3' - if origin_op_type == 'FusedInstanceNorm': - origin_op_type = '_MklFusedInstanceNorm' - if origin_op_type == 'Depthwise': - origin_op_type = 'DepthwiseConv2dNative' - if origin_op_type == 'BatchMatMul': - origin_op_type = 'BatchMatMulV2' - if origin_op_type == 'FusedBatchMatMulV2': - origin_op_type = '_MklFusedBatchMatMulV2' - if origin_op_type == 'Deconv2D': - origin_op_type = 'Conv2DBackpropInput' - if origin_op_type == 'Deconv3D': - origin_op_type = 'Conv3DBackpropInputV2' - res[origin_op_type]['INT8'] += 1 + origin_op_type = possible_int8_res[0].split("Quantized")[-1] + if origin_op_type == "FusedBatchNorm": + origin_op_type = "FusedBatchNormV3" + if origin_op_type == "FusedInstanceNorm": + origin_op_type = "_MklFusedInstanceNorm" + if origin_op_type == "Depthwise": + origin_op_type = "DepthwiseConv2dNative" + if origin_op_type == "BatchMatMul": + origin_op_type = "BatchMatMulV2" + if origin_op_type == "FusedBatchMatMulV2": + origin_op_type = "_MklFusedBatchMatMulV2" + if origin_op_type == "Deconv2D": + origin_op_type = "Conv2DBackpropInput" + if origin_op_type == "Deconv3D": + origin_op_type = "Conv3DBackpropInputV2" + res[origin_op_type]["INT8"] += 1 if i.op in fp32_op_list: - if 'T' not in i.attr and i.op != 'Cast': + if "T" not in i.attr and i.op != "Cast": continue - if i.attr['T'].type == dtypes.bfloat16: - res[i.op]['BF16'] += 1 - elif i.attr['T'].type in (dtypes.quint8,dtypes.qint8): - res[i.op]['INT8'] += 1 - elif i.op == 'Cast': - if i.attr['DstT'].type == dtypes.bfloat16: - res[i.op]['BF16'] += 1 - elif i.attr['DstT'].type == dtypes.float32: - res[i.op]['FP32'] += 1 + if i.attr["T"].type == dtypes.bfloat16: + res[i.op]["BF16"] += 1 + elif i.attr["T"].type in (dtypes.quint8, dtypes.qint8): + res[i.op]["INT8"] += 1 + elif i.op == "Cast": + if i.attr["DstT"].type == dtypes.bfloat16: + res[i.op]["BF16"] += 1 + elif i.attr["DstT"].type == dtypes.float32: + res[i.op]["FP32"] += 1 else: - res[i.op]['FP32'] += 1 + res[i.op]["FP32"] += 1 field_names = ["Op Type", "Total", "INT8", "BF16", "FP32"] - output_data = [[ - op_type, sum(res[op_type].values()), - res[op_type]['INT8'], res[op_type]['BF16'], res[op_type]['FP32']] - for op_type in fp32_op_list] - - Statistics(output_data, - header='Mixed Precision Statistics', - field_names=field_names).print_stat() + output_data = [ + [op_type, sum(res[op_type].values()), res[op_type]["INT8"], res[op_type]["BF16"], res[op_type]["FP32"]] + for op_type in fp32_op_list + ] + + Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat() self.optype_statistics = field_names, output_data def _query_bf16_ops(self, matched_nodes): @@ -718,15 +787,15 @@ def _query_bf16_ops(self, matched_nodes): self.bf16_op_details = OrderedDict() valid_precision = self.query_handler.get_mixed_precision_combination() - if ('bf16' in valid_precision and CpuInfo().bf16) or os.getenv('FORCE_BF16') == '1': + if ("bf16" in valid_precision and CpuInfo().bf16) or os.getenv("FORCE_BF16") == "1": for details in matched_nodes: node_op = details[-1][0] node_name = details[0] - self.bf16_op_details[(node_name, node_op)] = [{'weight': {'dtype': ['bf16']}, \ - 'activation': {'dtype': ['bf16']}},\ - {'weight': {'dtype': 'fp32'}, \ - 'activation': {'dtype': 'fp32'}}] + self.bf16_op_details[(node_name, node_op)] = [ + {"weight": {"dtype": ["bf16"]}, "activation": {"dtype": ["bf16"]}}, + {"weight": {"dtype": "fp32"}, "activation": {"dtype": "fp32"}}, + ] def _query_quantizable_ops(self, matched_nodes): """Collect the op-wise configuration for quantization. @@ -734,111 +803,123 @@ def _query_quantizable_ops(self, matched_nodes): Returns: OrderDict: op-wise configuration. """ - bf16_common_config = {'weight': {'dtype': 'bf16'}, 'activation': {'dtype': 'bf16'}} - fp32_common_config = {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}} - uint8_type = self.query_handler.get_op_types_by_precision(precision='uint8') - int8_type = self.query_handler.get_op_types_by_precision(precision='int8') - bf16_type = self.query_handler.get_op_types_by_precision(precision='bf16') + bf16_common_config = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}} + fp32_common_config = {"weight": {"dtype": "fp32"}, "activation": {"dtype": "fp32"}} + uint8_type = self.query_handler.get_op_types_by_precision(precision="uint8") + int8_type = self.query_handler.get_op_types_by_precision(precision="int8") + bf16_type = self.query_handler.get_op_types_by_precision(precision="bf16") tf_quantizable_op_type = list(set(uint8_type).union(set(int8_type))) valid_precision = self.query_handler.get_mixed_precision_combination() op_capability = self.query_handler.get_quantization_capability() - conv_config = copy.deepcopy(op_capability['Conv2D']) - conv3d_config = copy.deepcopy(op_capability['Conv3D']) if 'Conv3D' in op_capability else None - matmul_config = copy.deepcopy(op_capability['MatMul']) - other_config = copy.deepcopy(op_capability['default']) + conv_config = copy.deepcopy(op_capability["Conv2D"]) + conv3d_config = copy.deepcopy(op_capability["Conv3D"]) if "Conv3D" in op_capability else None + matmul_config = copy.deepcopy(op_capability["MatMul"]) + other_config = copy.deepcopy(op_capability["default"]) self.quantizable_op_details = OrderedDict() self.recipes_ops = {} self._init_op_stat = {i: [] for i in tf_quantizable_op_type} - exclude_first_quantizable_op = True if 'first_conv_or_matmul_quantization' in \ - self.recipes and not self.recipes['first_conv_or_matmul_quantization'] \ - else False + exclude_first_quantizable_op = ( + True + if "first_conv_or_matmul_quantization" in self.recipes + and not self.recipes["first_conv_or_matmul_quantization"] + else False + ) for details in matched_nodes: node_op = details[-1][0] node_name = details[0] patterns = details[-1] pat_length = len(patterns) pattern_info = { - 'sequence': [[','.join(patterns[:pat_length - i]) for i in range(pat_length)][0]], - 'precision': ['int8'] + "sequence": [[",".join(patterns[: pat_length - i]) for i in range(pat_length)][0]], + "precision": ["int8"], } first_conv_or_matmul_node = [] - if node_op in tf_quantizable_op_type and node_name not in self.exclude_node_names and ( - node_name, self.unify_op_type_mapping[node_op]) not in self.quantizable_op_details: - if (self.unify_op_type_mapping[node_op].find("conv2d") != -1 or \ - self.unify_op_type_mapping[node_op].find("matmul") != -1) and \ - len(first_conv_or_matmul_node) == 0: - first_conv_or_matmul_node.append((node_name, \ - self.unify_op_type_mapping[node_op])) - self.recipes_ops['first_conv_or_matmul_quantization'] = \ - first_conv_or_matmul_node - if exclude_first_quantizable_op and \ - (self.unify_op_type_mapping[node_op].find("conv2d") != -1 or \ - self.unify_op_type_mapping[node_op].find("matmul") != -1): + if ( + node_op in tf_quantizable_op_type + and node_name not in self.exclude_node_names + and (node_name, self.unify_op_type_mapping[node_op]) not in self.quantizable_op_details + ): + if ( + self.unify_op_type_mapping[node_op].find("conv2d") != -1 + or self.unify_op_type_mapping[node_op].find("matmul") != -1 + ) and len(first_conv_or_matmul_node) == 0: + first_conv_or_matmul_node.append((node_name, self.unify_op_type_mapping[node_op])) + self.recipes_ops["first_conv_or_matmul_quantization"] = first_conv_or_matmul_node + if exclude_first_quantizable_op and ( + self.unify_op_type_mapping[node_op].find("conv2d") != -1 + or self.unify_op_type_mapping[node_op].find("matmul") != -1 + ): exclude_first_quantizable_op = False self.exclude_node_names.append(node_name) continue self._init_op_stat[node_op].append(node_name) if self.unify_op_type_mapping[node_op].find("conv2d") != -1: conv2d_int8_config = copy.deepcopy(conv_config) - conv2d_int8_config['pattern'] = pattern_info - self.quantizable_op_details[( - node_name, self.unify_op_type_mapping[node_op] - )] = [conv2d_int8_config, fp32_common_config] + conv2d_int8_config["pattern"] = pattern_info + self.quantizable_op_details[(node_name, self.unify_op_type_mapping[node_op])] = [ + conv2d_int8_config, + fp32_common_config, + ] elif self.unify_op_type_mapping[node_op].find("conv3d") != -1: conv3d_int8_config = copy.deepcopy(conv3d_config) - conv3d_int8_config['pattern'] = pattern_info - self.quantizable_op_details[( - node_name, self.unify_op_type_mapping[node_op] - )] = [conv3d_int8_config, fp32_common_config] + conv3d_int8_config["pattern"] = pattern_info + self.quantizable_op_details[(node_name, self.unify_op_type_mapping[node_op])] = [ + conv3d_int8_config, + fp32_common_config, + ] elif self.unify_op_type_mapping[node_op].find("matmul") != -1: matmul_int8_config = copy.deepcopy(matmul_config) - matmul_int8_config['pattern'] = pattern_info + matmul_int8_config["pattern"] = pattern_info # TODO enable the sym mode once the tf fixed the mkldequantize_op.cc bug. # is_positive_input = self.pre_optimizer_handle.has_positive_input(node_name) # matmul_scheme = 'sym' if is_positive_input else 'asym' - matmul_scheme = ['asym'] - matmul_int8_config['activation']['scheme'] = matmul_scheme - self.quantizable_op_details[( - node_name, self.unify_op_type_mapping[node_op] - )] = [matmul_int8_config, fp32_common_config] + matmul_scheme = ["asym"] + matmul_int8_config["activation"]["scheme"] = matmul_scheme + self.quantizable_op_details[(node_name, self.unify_op_type_mapping[node_op])] = [ + matmul_int8_config, + fp32_common_config, + ] else: - self.quantizable_op_details[( - node_name, self.unify_op_type_mapping[node_op] - )] = [copy.deepcopy(other_config), fp32_common_config] - if node_op in bf16_type and (('bf16' in valid_precision and CpuInfo().bf16) \ - or os.getenv('FORCE_BF16') == '1'): - self.quantizable_op_details[( - node_name, self.unify_op_type_mapping[node_op] - )].insert(1, bf16_common_config) - - self.quantize_config['op_wise_config'][node_name] = (False, "minmax", False) + self.quantizable_op_details[(node_name, self.unify_op_type_mapping[node_op])] = [ + copy.deepcopy(other_config), + fp32_common_config, + ] + if node_op in bf16_type and ( + ("bf16" in valid_precision and CpuInfo().bf16) or os.getenv("FORCE_BF16") == "1" + ): + self.quantizable_op_details[(node_name, self.unify_op_type_mapping[node_op])].insert( + 1, bf16_common_config + ) + + self.quantize_config["op_wise_config"][node_name] = (False, "minmax", False) return self.quantizable_op_details def _filter_unquantizable_concat(self, matched_nodes): """Filter out unquantizable ConcatV2 Ops based on the positive input rule.""" - target_concat_nodes = [i[0] for i in matched_nodes if i[-1][0] == 'ConcatV2'] - from neural_compressor.adaptor.tf_utils.util import GraphAnalyzer + target_concat_nodes = [i[0] for i in matched_nodes if i[-1][0] == "ConcatV2"] from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper + from neural_compressor.adaptor.tf_utils.util import GraphAnalyzer g = GraphAnalyzer() g.graph = self.pre_optimized_model.graph_def graph_info = g.parse_graph() - concat_nodes = g.query_fusion_pattern_nodes([['ConcatV2']]) + concat_nodes = g.query_fusion_pattern_nodes([["ConcatV2"]]) for i in concat_nodes: concat_node_name = i[0] if concat_node_name not in target_concat_nodes: continue input_positive_status = [] - for index in range(graph_info[concat_node_name].node.attr['N'].i): + for index in range(graph_info[concat_node_name].node.attr["N"].i): each_input_name = GraphRewriterHelper.node_name_from_input( - graph_info[concat_node_name].node.input[index]) + graph_info[concat_node_name].node.input[index] + ) each_input_node = graph_info[each_input_name].node positive_input = False - if each_input_node.op in ('Relu', 'Relu6'): + if each_input_node.op in ("Relu", "Relu6"): positive_input = True else: positive_input = g.has_positive_input(each_input_node.name) @@ -848,26 +929,27 @@ def _filter_unquantizable_concat(self, matched_nodes): def _filter_unquantizable_concat_performance_only(self, matched_nodes): """OOB filter out unquantizable ConcatV2 OPs by checking the control flow rule.""" - target_concat_nodes = [i[0] for i in matched_nodes if i[-1][0] == 'ConcatV2'] - from neural_compressor.adaptor.tf_utils.util import GraphAnalyzer + target_concat_nodes = [i[0] for i in matched_nodes if i[-1][0] == "ConcatV2"] from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper + from neural_compressor.adaptor.tf_utils.util import GraphAnalyzer g = GraphAnalyzer() g.graph = self.pre_optimized_model.graph_def graph_info = g.parse_graph() - concat_nodes = g.query_fusion_pattern_nodes([['ConcatV2']]) + concat_nodes = g.query_fusion_pattern_nodes([["ConcatV2"]]) for i in concat_nodes: concat_node_name = i[0] if concat_node_name not in target_concat_nodes: continue input_positive_status = [] control_flow = False - for index in range(graph_info[concat_node_name].node.attr['N'].i): + for index in range(graph_info[concat_node_name].node.attr["N"].i): each_input_name = GraphRewriterHelper.node_name_from_input( - graph_info[concat_node_name].node.input[index]) + graph_info[concat_node_name].node.input[index] + ) each_input_node = graph_info[each_input_name].node - if each_input_node.op in ('Switch'): - control_flow = True + if each_input_node.op in ("Switch"): + control_flow = True if control_flow: matched_nodes.remove(i) @@ -882,6 +964,7 @@ def query_fw_capability(self, model): """ if self.pre_optimized_model is None: from .tf_utils.graph_rewriter.generic.pre_optimize import PreOptimization + self.pre_optimizer_handle = PreOptimization(model, self.new_api, self.device) self.pre_optimized_model = self.pre_optimizer_handle.get_optimized_model(self.itex_mode) model.graph_def = self.pre_optimized_model.graph_def @@ -892,27 +975,28 @@ def query_fw_capability(self, model): matched_nodes = self.pre_optimizer_handle.get_matched_nodes(patterns) matched_bf16_nodes = self.pre_optimizer_handle.get_matched_nodes(bf16_patterns) original_graph_node_name = [i.name for i in model.graph_def.node] - matched_nodes = sorted(matched_nodes, reverse=True, key=lambda i: ( - original_graph_node_name.index(i[0]), len(i[-1]))) + matched_nodes = sorted( + matched_nodes, reverse=True, key=lambda i: (original_graph_node_name.index(i[0]), len(i[-1])) + ) def check_match(patterns, input_pattern): for i in patterns: - if input_pattern == [i for i in i.replace('+', ' ').strip().split(' ') if i]: + if input_pattern == [i for i in i.replace("+", " ").strip().split(" ") if i]: return True return False - if (self.new_api and self.performance_only) or self.itex_mode or \ - os.getenv('TF_FORCE_CONCAT_OPTS') == '1': + if (self.new_api and self.performance_only) or self.itex_mode or os.getenv("TF_FORCE_CONCAT_OPTS") == "1": self._filter_unquantizable_concat_performance_only(matched_nodes) else: self._filter_unquantizable_concat(matched_nodes) copied_matched_nodes = copy.deepcopy(matched_nodes) for i in copied_matched_nodes: - if i[-1][0] in self.query_handler.get_op_types()['int8']: + if i[-1][0] in self.query_handler.get_op_types()["int8"]: continue - if not self.pre_optimizer_handle.has_positive_input(i[0]) and \ - not check_match(self.query_handler.get_fuse_patterns()['int8'], i[-1]): + if not self.pre_optimizer_handle.has_positive_input(i[0]) and not check_match( + self.query_handler.get_fuse_patterns()["int8"], i[-1] + ): matched_nodes.remove(i) del copied_matched_nodes @@ -927,12 +1011,9 @@ def check_match(patterns, input_pattern): self._query_quantizable_ops(matched_nodes) self._query_bf16_ops(matched_bf16_nodes) - capability = { - 'optypewise': self.get_optype_wise_ability(), - 'recipes_ops': self.recipes_ops - } - capability['opwise'] = copy.deepcopy(self.quantizable_op_details) - capability['opwise'].update(self.bf16_op_details) + capability = {"optypewise": self.get_optype_wise_ability(), "recipes_ops": self.recipes_ops} + capability["opwise"] = copy.deepcopy(self.quantizable_op_details) + capability["opwise"].update(self.bf16_op_details) logger.debug("Dump framework quantization capability:") logger.debug(capability) @@ -941,6 +1022,7 @@ def check_match(patterns, input_pattern): def set_tensor(self, model, tensor_dict): """Quantize the bias and weight tensors in tensor_dict.""" from .tf_utils.graph_util import GraphAnalyzer + g = GraphAnalyzer() g.graph = model.graph_def graph_info = g.parse_graph() @@ -951,7 +1033,6 @@ def _get_fp32_op_name(model, tensor_name): last_node_name = None current_node_name = None for each_node in model.graph_def.node: - if tensor_name in each_node.input: tensor_index = list(each_node.input).index(tensor_name) if each_node.op.find("Quantized") != -1 and tensor_index == 2: @@ -964,76 +1045,77 @@ def _get_fp32_op_name(model, tensor_name): return is_weight, is_biasadd, current_node_name, last_node_name - from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper - from tensorflow.python.framework import dtypes - from tensorflow.python.framework import tensor_util from tensorflow.core.framework import attr_value_pb2 + from tensorflow.python.framework import dtypes, tensor_util + + from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper + qint32_type = dtypes.qint32.as_datatype_enum for tensor_name, tensor_content in tensor_dict.items(): - is_weight, is_biasadd, current_node_name, last_node_name = \ - _get_fp32_op_name(model, tensor_name) + is_weight, is_biasadd, current_node_name, last_node_name = _get_fp32_op_name(model, tensor_name) if is_biasadd: - is_biasadd_dtype_is_fp32 = graph_info[\ - current_node_name].node.attr['Tbias'] == attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum) + is_biasadd_dtype_is_fp32 = graph_info[current_node_name].node.attr["Tbias"] == attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum + ) current_node = graph_info[current_node_name].node bias_add_node = graph_info[current_node.input[2]].node if is_biasadd_dtype_is_fp32: bias_add_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(tensor_content, - dtypes.float32, tensor_content.shape))) + tensor=tensor_util.make_tensor_proto(tensor_content, dtypes.float32, tensor_content.shape) + ) + ) else: last_node = graph_info[last_node_name].node - min_input = graph_info[\ - last_node.input[-2]].node.attr['value'].tensor.float_val[0] - max_input = graph_info[\ - last_node.input[-1]].node.attr['value'].tensor.float_val[0] + min_input = graph_info[last_node.input[-2]].node.attr["value"].tensor.float_val[0] + max_input = graph_info[last_node.input[-1]].node.attr["value"].tensor.float_val[0] channel_size = tensor_content.shape[0] max_filter_node = graph_info[current_node.input[6]].node min_filter_node = graph_info[current_node.input[5]].node - if max_filter_node.attr['value'].tensor.float_val: + if max_filter_node.attr["value"].tensor.float_val: max_filter_tensor = [] min_filter_tensor = [] - max_filter_tensor.append(\ - (max_filter_node.attr['value'].tensor.float_val)[0]) - min_filter_tensor.append(\ - (min_filter_node.attr['value'].tensor.float_val)[0]) + max_filter_tensor.append((max_filter_node.attr["value"].tensor.float_val)[0]) + min_filter_tensor.append((min_filter_node.attr["value"].tensor.float_val)[0]) else: - max_filter_tensor = tensor_util.MakeNdarray(\ - min_filter_node.attr['value'].tensor) - min_filter_tensor = tensor_util.MakeNdarray(\ - min_filter_node.attr['value'].tensor) - activation_range = 127.0 if \ - current_node.attr["Tinput"].type == dtypes.qint8 else 255.0 - updated_bias = Helper.generate_int32_bias_for_conv(\ - tensor_content, channel_size, max_input, min_input, \ - max_filter_tensor, min_filter_tensor, activation_range) - - bias_add_node.attr['dtype'].CopyFrom(\ - attr_value_pb2.AttrValue(type=qint32_type)) - bias_add_node.attr["value"].CopyFrom(\ + max_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) + min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) + activation_range = 127.0 if current_node.attr["Tinput"].type == dtypes.qint8 else 255.0 + updated_bias = Helper.generate_int32_bias_for_conv( + tensor_content, + channel_size, + max_input, + min_input, + max_filter_tensor, + min_filter_tensor, + activation_range, + ) + + bias_add_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=qint32_type)) + bias_add_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(updated_bias, - dtypes.int32, tensor_content.shape))) - bias_add_node.attr['value'].tensor.dtype = qint32_type + tensor=tensor_util.make_tensor_proto(updated_bias, dtypes.int32, tensor_content.shape) + ) + ) + bias_add_node.attr["value"].tensor.dtype = qint32_type current_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=qint32_type)) if is_weight: - tmp_const_node = Helper.create_constant_node(\ - current_node.name + '_weights_tmp', - tensor_content.transpose(2,3,1,0), dtypes.float32) + tmp_const_node = Helper.create_constant_node( + current_node.name + "_weights_tmp", tensor_content.transpose(2, 3, 1, 0), dtypes.float32 + ) min_filter_node = graph_info[current_node.input[5]].node - per_channel = True if min_filter_node.attr['value'].tensor.tensor_shape else False + per_channel = True if min_filter_node.attr["value"].tensor.tensor_shape else False from .tf_utils.quantize_graph_common import QuantizeGraphHelper + original_fp32_op = current_node.op.split("With")[0].split("Quantized")[-1] if original_fp32_op.find("Depthwise") != -1: original_fp32_op = "DepthwiseConv2dNative" - qint8_const_node, min_node, max_node = \ - QuantizeGraphHelper.generate_quantized_weight_node( - original_fp32_op, tmp_const_node, per_channel) + qint8_const_node, min_node, max_node = QuantizeGraphHelper.generate_quantized_weight_node( + original_fp32_op, tmp_const_node, per_channel + ) g.add_node(qint8_const_node, [], [current_node.name]) g.add_node(min_node, [], [current_node.name]) g.add_node(max_node, [], [current_node.name]) @@ -1043,38 +1125,41 @@ def _get_fp32_op_name(model, tensor_name): def inspect_weight_and_bias(self, node_list, graph_def, graph_info, graph_node_name_mapping): """Inspect the weights and biases.""" - from neural_compressor.utils.utility import dequantize_weight + import tensorflow as tf + from neural_compressor.adaptor.tf_utils.util import get_tensor_val_from_graph_node + from neural_compressor.utils.utility import dequantize_weight + from .tf_utils.util import int8_node_name_reverse - import tensorflow as tf + weights_result = {} inspect_nodes = [] node_set = set(node_list) for node in graph_def.node: node_name = node.name - if 'Quantized' in node.op: + if "Quantized" in node.op: node_name = int8_node_name_reverse(node) - if node_name in node_set and ('Conv' in node.op or 'Mul' in node.op): + if node_name in node_set and ("Conv" in node.op or "Mul" in node.op): inspect_nodes.append(node) - logger.debug(f'Start to inspect weight and bias for: {[node.name for node in inspect_nodes]}.') + logger.debug(f"Start to inspect weight and bias for: {[node.name for node in inspect_nodes]}.") for node in inspect_nodes: # inspect weights and bias node_name = node.name weight_node_name = node.input[1] weight_node = graph_node_name_mapping[weight_node_name] - if weight_node.op != 'Const': # skip the matmul whose two inputs are previous output + if weight_node.op != "Const": # skip the matmul whose two inputs are previous output continue weight_node_val = get_tensor_val_from_graph_node(graph_node_name_mapping, weight_node_name) - weight_node_val = weight_node_val.astype('float32') + weight_node_val = weight_node_val.astype("float32") # dequantize the weight for quantized model - if 'Quantized' in node.op: + if "Quantized" in node.op: node_name = int8_node_name_reverse(node) - weight_node_name_pre = weight_node_name.split('_qint8_const')[0] - min_filter_node = weight_node_name_pre + '_min' - max_filter_node = weight_node_name_pre + '_max' - if graph_info[min_filter_node].node.attr['value'].tensor.float_val: - min_filter_val = graph_info[min_filter_node].node.attr['value'].tensor.float_val - max_filter_val = graph_info[max_filter_node].node.attr['value'].tensor.float_val + weight_node_name_pre = weight_node_name.split("_qint8_const")[0] + min_filter_node = weight_node_name_pre + "_min" + max_filter_node = weight_node_name_pre + "_max" + if graph_info[min_filter_node].node.attr["value"].tensor.float_val: + min_filter_val = graph_info[min_filter_node].node.attr["value"].tensor.float_val + max_filter_val = graph_info[max_filter_node].node.attr["value"].tensor.float_val else: min_filter_val = get_tensor_val_from_graph_node(graph_node_name_mapping, min_filter_node) max_filter_val = get_tensor_val_from_graph_node(graph_node_name_mapping, max_filter_node) @@ -1097,7 +1182,7 @@ def fused_node_mapping(self, node_list, pattern_mapping, graph_info, graph_node_ fused_mapping = {} fused_mapping_reverse = {} for node_name in node_list: - fused_seq = pattern_mapping[node_name]['sequence'].split(',') + fused_seq = pattern_mapping[node_name]["sequence"].split(",") # for the node not fused with others if len(fused_seq) == 1: fused_mapping[node_name] = node_name @@ -1107,25 +1192,25 @@ def fused_node_mapping(self, node_list, pattern_mapping, graph_info, graph_node_ for _next_node_op_type in fused_seq[1:]: node_details = graph_info[_next_node_name] for node_output_name in node_details.outputs: - if graph_node_name_mapping[node_output_name].op == 'Cast': + if graph_node_name_mapping[node_output_name].op == "Cast": cast_node = graph_node_name_mapping[node_output_name] node_output_name = graph_info[cast_node.name].outputs[0] - if graph_node_name_mapping[node_output_name].op in [_next_node_op_type, 'Cast']: + if graph_node_name_mapping[node_output_name].op in [_next_node_op_type, "Cast"]: _next_node_name = node_output_name fused_mapping[node_name] = _next_node_name fused_mapping_reverse[_next_node_name] = node_name return fused_mapping, fused_mapping_reverse - def _inspect_tensor_inference(self, inspect_node_dict, model, dataloader, iteration_list): + def _inspect_tensor_inference(self, inspect_node_dict, model, dataloader, iteration_list): """Do inference for inspect activation.""" out_tensor_lst = [] - out_tensor_lst += [{n : [n + ':' + str(i) for i in range(3)]} for n in inspect_node_dict['qreq_node']] - out_tensor_lst += [{n : n + ':0'} for n in inspect_node_dict['qdq_node']] - out_tensor_lst += [{n : n + ':0'} for n in inspect_node_dict['f_node']] + out_tensor_lst += [{n: [n + ":" + str(i) for i in range(3)]} for n in inspect_node_dict["qreq_node"]] + out_tensor_lst += [{n: n + ":0"} for n in inspect_node_dict["qdq_node"]] + out_tensor_lst += [{n: n + ":0"} for n in inspect_node_dict["f_node"]] out_cnt = len(out_tensor_lst) iteration_list = set(iteration_list) input_tensor = model.input_tensor - logger.info('Start to do inference for inspect activation.') + logger.info("Start to do inference for inspect activation.") activation_result = [] for idx, (inputs, labels) in enumerate(dataloader): model_out = [] @@ -1136,49 +1221,51 @@ def _inspect_tensor_inference(self, inspect_node_dict, model, dataloader, itera if len(input_tensor) == 1: feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] else: - assert len(input_tensor) == len(inputs), \ - 'inputs len must equal with input_tensor' + assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" feed_dict = dict(zip(input_tensor, inputs)) - #TODO find an optimized method to avoid multiple runs + # TODO find an optimized method to avoid multiple runs for i, out_t in enumerate(out_tensor_lst): - logger.debug(f'Finished inspect {i}/{out_cnt} nodes, current inspect node {out_t.keys()}.') + logger.debug(f"Finished inspect {i}/{out_cnt} nodes, current inspect node {out_t.keys()}.") model_out.append(model.sess.run(out_t, feed_dict)) activation_result.append(model_out) return activation_result - def inspect_activation(self, node_list, graph_def, graph_node_name_mapping, quantization_cfg, - dataloader, iteration_list, graph_info): + def inspect_activation( + self, node_list, graph_def, graph_node_name_mapping, quantization_cfg, dataloader, iteration_list, graph_info + ): """Inspect the activation.""" from neural_compressor.model import Model + original_graph_node_mapping = {} for node in graph_def.node: original_graph_node_mapping[node.name] = node - inspect_node_dict = {'qdq_node':[], 'qreq_node':[], 'f_node':[]} + inspect_node_dict = {"qdq_node": [], "qreq_node": [], "f_node": []} for node_name in node_list: node = graph_node_name_mapping[node_name] - if 'Quantized' in node.op and 'Dequantize' in node.op: - inspect_node_dict['qdq_node'].append(node.name) - elif 'Quantized' in node.op or '_Quantized' in node.op or 'Requantize' in node.op: - inspect_node_dict['qreq_node'].append(node.name) + if "Quantized" in node.op and "Dequantize" in node.op: + inspect_node_dict["qdq_node"].append(node.name) + elif "Quantized" in node.op or "_Quantized" in node.op or "Requantize" in node.op: + inspect_node_dict["qreq_node"].append(node.name) else: - inspect_node_dict['f_node'].append(node_name) + inspect_node_dict["f_node"].append(node_name) pattern_mapping = {} - node_dict = quantization_cfg['op'] + node_dict = quantization_cfg["op"] for node_name_and_type in node_dict.keys(): node_name, _ = node_name_and_type - if 'pattern' in node_dict[node_name_and_type]: - pattern_mapping[node_name] = node_dict[node_name_and_type]['pattern'] + if "pattern" in node_dict[node_name_and_type]: + pattern_mapping[node_name] = node_dict[node_name_and_type]["pattern"] else: - pattern_mapping[node_name] = {'sequence': node_name} - if inspect_node_dict['f_node']: - fuse_map, fuse_map_reverse = self.fused_node_mapping(inspect_node_dict['f_node'], pattern_mapping, - graph_info, graph_node_name_mapping) - inspect_node_dict['f_node'] = [fuse_map[n] for n in inspect_node_dict['f_node']] + pattern_mapping[node_name] = {"sequence": node_name} + if inspect_node_dict["f_node"]: + fuse_map, fuse_map_reverse = self.fused_node_mapping( + inspect_node_dict["f_node"], pattern_mapping, graph_info, graph_node_name_mapping + ) + inspect_node_dict["f_node"] = [fuse_map[n] for n in inspect_node_dict["f_node"]] # build model and do inference model = Model(graph_def) activation_result = self._inspect_tensor_inference(inspect_node_dict, model, dataloader, iteration_list) final_result = [] - int8_postfix = '_eightbit' + int8_postfix = "_eightbit" for iter_res in activation_result: tmp_iter_result = {} for res in iter_res: @@ -1194,8 +1281,17 @@ def inspect_activation(self, node_list, graph_def, graph_node_name_mapping, quan final_result.append(tmp_iter_result) return final_result - def inspect_tensor(self, model, dataloader=None, op_list=[], iteration_list=[], - inspect_type='activation', save_to_disk=False, save_path=None, quantization_cfg=None): + def inspect_tensor( + self, + model, + dataloader=None, + op_list=[], + iteration_list=[], + inspect_type="activation", + save_to_disk=False, + save_path=None, + quantization_cfg=None, + ): """Dump the weight and activation(output) to local disk. 1. create the correspondence between query node name and the actually output node name in graph_def @@ -1233,30 +1329,33 @@ def inspect_tensor(self, model, dataloader=None, op_list=[], iteration_list=[], ] } """ - from neural_compressor.model.tensorflow_model import TensorflowBaseModel - from neural_compressor.utils.utility import load_data_from_pkl, dump_data_to_local + import tensorflow as tf + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer + from neural_compressor.model.tensorflow_model import TensorflowBaseModel + from neural_compressor.utils.utility import dump_data_to_local, load_data_from_pkl + from .tf_utils.util import int8_node_name_reverse - import tensorflow as tf + if isinstance(model, TensorflowBaseModel): model = model.graph_def if not quantization_cfg: # TODO get config from graph if config is None - quantization_cfg = load_data_from_pkl('./nc_workspace/', 'cfg.pkl') + quantization_cfg = load_data_from_pkl("./nc_workspace/", "cfg.pkl") node_list = op_list # create the mapping between node name and node, key: node_name, val: node graph_node_name_mapping = {} quan_model_flag = False for node in model.node: node_name = int8_node_name_reverse(node) - if 'Quantized' in node.op: + if "Quantized" in node.op: quan_model_flag = True node_name = int8_node_name_reverse(node) - if node.attr['value'].tensor.dtype == tf.dtypes.bfloat16.as_datatype_enum: + if node.attr["value"].tensor.dtype == tf.dtypes.bfloat16.as_datatype_enum: quan_model_flag = True graph_node_name_mapping[node_name] = node if quan_model_flag: - logger.info('Dump the tensor for quantized model.') + logger.info("Dump the tensor for quantized model.") # create the mapping between node name and node detail g = GraphAnalyzer() @@ -1265,27 +1364,27 @@ def inspect_tensor(self, model, dataloader=None, op_list=[], iteration_list=[], inspect_result = {} # inspect weight - if inspect_type == 'weight' or inspect_type == 'all': - logger.info('Start to inspect weight and bias.') + if inspect_type == "weight" or inspect_type == "all": + logger.info("Start to inspect weight and bias.") weights_result = self.inspect_weight_and_bias(node_list, model, graph_info, graph_node_name_mapping) - inspect_result['weight'] = weights_result + inspect_result["weight"] = weights_result # inspect activation - if inspect_type == 'activation' or inspect_type == 'all': - logger.info('Start to inspect activation.') - activation_result = self.inspect_activation(node_list, model, graph_node_name_mapping, quantization_cfg, - dataloader, iteration_list, graph_info) - inspect_result['activation'] = activation_result + if inspect_type == "activation" or inspect_type == "all": + logger.info("Start to inspect activation.") + activation_result = self.inspect_activation( + node_list, model, graph_node_name_mapping, quantization_cfg, dataloader, iteration_list, graph_info + ) + inspect_result["activation"] = activation_result # save to disk if save_to_disk: if not save_path: - save_path = './nc_workspace/tmp/' - dump_data_to_local(inspect_result, save_path, 'inspect_result.pkl') - logger.info(f'Dumped the inspect tensor to {save_path}') + save_path = "./nc_workspace/tmp/" + dump_data_to_local(inspect_result, save_path, "inspect_result.pkl") + logger.info(f"Dumped the inspect tensor to {save_path}") return inspect_result - def quantize_input(self, model): """Quantize the model to be able to take quantized input. @@ -1302,7 +1401,8 @@ def quantize_input(self, model): scale = None # quantize input only support tensorflow version > 2.1.0 import tensorflow as tf - if version1_lt_version2(tf.version.VERSION, '2.1.0'): + + if version1_lt_version2(tf.version.VERSION, "2.1.0"): logger.warning("Quantize input needs tensorflow 2.1.0 and newer.") return model, scale @@ -1311,43 +1411,44 @@ def quantize_input(self, model): quantize_nodes = [] for node in graph_def.node: node_name_mapping[node.name] = node - if node.op == 'QuantizeV2': + if node.op == "QuantizeV2": quantize_nodes.append(node) target_quantize_nodes = [] for node in quantize_nodes: # only support Quantizev2 input op Pad and Placeholder - if (node_name_mapping[node.input[0]].op == 'Pad' and node_name_mapping[ - node_name_mapping[node.input[0]].input[0]].op == 'Placeholder') or \ - node_name_mapping[node.input[0]].op == 'Placeholder': + if ( + node_name_mapping[node.input[0]].op == "Pad" + and node_name_mapping[node_name_mapping[node.input[0]].input[0]].op == "Placeholder" + ) or node_name_mapping[node.input[0]].op == "Placeholder": target_quantize_nodes.append(node) - assert len(target_quantize_nodes) == 1, 'only support 1 QuantizeV2 from Placeholder' + assert len(target_quantize_nodes) == 1, "only support 1 QuantizeV2 from Placeholder" quantize_node = target_quantize_nodes[0] quantize_node_input = node_name_mapping[quantize_node.input[0]] - quantize_node_outputs = [node for node in graph_def.node - if quantize_node.name in node.input] + quantize_node_outputs = [node for node in graph_def.node if quantize_node.name in node.input] from .tf_utils.graph_util import GraphRewriterHelper - if quantize_node_input.op == 'Pad': + + if quantize_node_input.op == "Pad": pad_node_input = node_name_mapping[quantize_node_input.input[0]] - assert pad_node_input.op == 'Placeholder', \ - 'only support Pad between QuantizeV2 and Placeholder' + assert pad_node_input.op == "Placeholder", "only support Pad between QuantizeV2 and Placeholder" from tensorflow.python.framework import tensor_util - paddings_tensor = tensor_util.MakeNdarray(node_name_mapping[ - quantize_node_input.input[1]].attr['value'].tensor).flatten() + + paddings_tensor = tensor_util.MakeNdarray( + node_name_mapping[quantize_node_input.input[1]].attr["value"].tensor + ).flatten() quantize_node.input[0] = quantize_node_input.input[0] for conv_node in quantize_node_outputs: - assert 'Conv2D' in conv_node.op, 'only support QuantizeV2 to Conv2D' + assert "Conv2D" in conv_node.op, "only support QuantizeV2 to Conv2D" - GraphRewriterHelper.set_attr_int_list(conv_node, - "padding_list", paddings_tensor) + GraphRewriterHelper.set_attr_int_list(conv_node, "padding_list", paddings_tensor) graph_def.node.remove(quantize_node_input) from tensorflow.python.framework import dtypes - GraphRewriterHelper.set_attr_dtype(node_name_mapping[quantize_node.input[0]], - "dtype", dtypes.qint8) + + GraphRewriterHelper.set_attr_dtype(node_name_mapping[quantize_node.input[0]], "dtype", dtypes.qint8) for conv_node in quantize_node_outputs: for index, conv_input in enumerate(conv_node.input): @@ -1361,16 +1462,16 @@ def quantize_input(self, model): # get the input's min-max value and calculate scale max_node = node_name_mapping[quantize_node.input[2]] min_node = node_name_mapping[quantize_node.input[1]] - max_value = max_node.attr['value'].tensor.float_val[0] - min_value = min_node.attr['value'].tensor.float_val[0] - scale = 127. / max(abs(max_value), abs(min_value)) + max_value = max_node.attr["value"].tensor.float_val[0] + min_value = min_node.attr["value"].tensor.float_val[0] + scale = 127.0 / max(abs(max_value), abs(min_value)) # remove QuantizeV2 node graph_def.node.remove(quantize_node) graph = tensorflow.Graph() with graph.as_default(): # use name='' to avoid 'import/' to name scope - tensorflow.import_graph_def(graph_def, name='') + tensorflow.import_graph_def(graph_def, name="") return graph, scale def get_optype_wise_ability(self): @@ -1383,12 +1484,12 @@ def get_optype_wise_ability(self): res = OrderedDict() for op in self.quantizable_op_details: if op[1] not in res: - res[op[1]] = {'activation': self.quantizable_op_details[op][0]['activation']} - if 'weight' in self.quantizable_op_details[op][0]: - res[op[1]]['weight'] = self.quantizable_op_details[op][0]['weight'] + res[op[1]] = {"activation": self.quantizable_op_details[op][0]["activation"]} + if "weight" in self.quantizable_op_details[op][0]: + res[op[1]]["weight"] = self.quantizable_op_details[op][0]["weight"] for op in self.bf16_op_details: if op[1] not in res: - res[op[1]] = {'activation': {'dtype': ['bf16']}, 'weight': {'dtype': ['bf16']}} + res[op[1]] = {"activation": {"dtype": ["bf16"]}, "weight": {"dtype": ["bf16"]}} return res def _pre_hook_for_qat(self, dataloader=None): @@ -1414,7 +1515,7 @@ def save(self, model, path): # this function is used to convert keras QAT model to pb in old QAT implementation, # and it's not used in refactored QAT - def convert(self, model, source, destination): # pragma: no cover + def convert(self, model, source, destination): # pragma: no cover """The function is used to convert a source model format to another. Args: @@ -1422,41 +1523,46 @@ def convert(self, model, source, destination): # pragma: no cover source (string): The source model format. destination (string): The destination model format. """ - assert source.lower() == 'qat' and destination.lower() == 'default' + assert source.lower() == "qat" and destination.lower() == "default" capability = self.query_fw_capability(model) - quantize_config = {'op_wise_config': {}} - for each_op_info in capability['opwise']: + quantize_config = {"op_wise_config": {}} + for each_op_info in capability["opwise"]: is_perchannel = False weight_bit = 7.0 - for op_cap in capability['opwise'][each_op_info]: - if'activation'in op_cap and 'quant_mode' in op_cap['activation']: - activation = op_cap['activation'] - if 'weight' in op_cap: - weight = op_cap['weight'] - is_perchannel = True if weight[ - 'granularity'][0] == 'per_channel' else False - algorithm = activation['algorithm'][0] + for op_cap in capability["opwise"][each_op_info]: + if "activation" in op_cap and "quant_mode" in op_cap["activation"]: + activation = op_cap["activation"] + if "weight" in op_cap: + weight = op_cap["weight"] + is_perchannel = True if weight["granularity"][0] == "per_channel" else False + algorithm = activation["algorithm"][0] is_asymmetric = False - if 'activation' in op_cap: - is_asymmetric = True if activation['scheme'][0] == 'asym' else False - - quantize_config['op_wise_config'][each_op_info[0]] = (is_perchannel, - algorithm, - is_asymmetric, - weight_bit) + if "activation" in op_cap: + is_asymmetric = True if activation["scheme"][0] == "asym" else False + + quantize_config["op_wise_config"][each_op_info[0]] = ( + is_perchannel, + algorithm, + is_asymmetric, + weight_bit, + ) from .tf_utils.graph_converter import GraphConverter + tmp_graphdef = copy.deepcopy(model.graph_def) for i in tmp_graphdef.node: - if i.op == 'Const' and i.input: - i.ClearField('input') + if i.op == "Const" and i.input: + i.ClearField("input") model.graph_def = tmp_graphdef - converter = GraphConverter(model, - qt_config=quantize_config, - int8_sequences=self.op_wise_sequences, - fake_quant=True, new_api=self.new_api, - performance_only=self.performance_only, - use_bf16=self.use_bf16) + converter = GraphConverter( + model, + qt_config=quantize_config, + int8_sequences=self.op_wise_sequences, + fake_quant=True, + new_api=self.new_api, + performance_only=self.performance_only, + use_bf16=self.use_bf16, + ) return converter.convert() @@ -1471,16 +1577,22 @@ def qat_convert(self, model, quantize_recipe=None): converted_model (tf.keras.Model): Quantized model with fake quant nodes inserted. """ import tensorflow as tf - assert isinstance(model, tf.keras.Model), ("The model to be converted is expected to be " - "a `tf.keras.Model` instance. You should not pass an instance of type: {input}.".format( - input=model.__class__.__name__)) - assert ( - model.__class__.__name__ in ['Functional', 'Sequential'] - ), "Only `Functional` or `Sequential` keras model is supported for QAT." + assert isinstance(model, tf.keras.Model), ( + "The model to be converted is expected to be " + "a `tf.keras.Model` instance. You should not pass an instance of type: {input}.".format( + input=model.__class__.__name__ + ) + ) + + assert model.__class__.__name__ in [ + "Functional", + "Sequential", + ], "Only `Functional` or `Sequential` keras model is supported for QAT." from .tf_utils.quantize_graph.qat.quantize_config import global_config from .tf_utils.quantize_graph.qat.quantize_helper import init_quantize_config, qat_clone_function + config = init_quantize_config(model, quantize_recipe) q_model = tf.keras.models.clone_model(model, input_tensors=None, clone_function=qat_clone_function) global_config.clear() @@ -1500,22 +1612,27 @@ def recover_tuned_model(self, model, q_config): tf.compat.v1.GraphDef: the quantized model """ from .tf_utils.graph_rewriter.generic.pre_optimize import PreOptimization + self.pre_optimizer_handle = PreOptimization(model, self.new_api, self.device) self.pre_optimized_model = self.pre_optimizer_handle.get_optimized_model(self.itex_mode) model.graph_def = self.pre_optimized_model.graph_def from .tf_utils.graph_converter_without_calib import GraphConverterWithoutCalib - converter = GraphConverterWithoutCalib(model, - recover_config=q_config, - new_api=self.new_api, - performance_only=self.performance_only, - use_bf16=self.use_bf16) + + converter = GraphConverterWithoutCalib( + model, + recover_config=q_config, + new_api=self.new_api, + performance_only=self.performance_only, + use_bf16=self.use_bf16, + ) return converter.convert_without_calib() def diagnosis_helper(self, fp32_model, quan_model, tune_cfg, save_path): """Tensorflow diagnosis helper function.""" from .tf_utils.util import tf_diagnosis_helper + return tf_diagnosis_helper(fp32_model, quan_model, tune_cfg, save_path) def get_output_op_names(self, qmodel): @@ -1527,11 +1644,11 @@ def get_output_op_names(self, qmodel): def _add_output_op_name(opname): if opname.endswith("_dequantize"): - output_op_names.add(opname[:-len("_dequantize")]) # pylint: disable=no-member + output_op_names.add(opname[: -len("_dequantize")]) # pylint: disable=no-member elif opname.endswith("__dequant"): pass else: - output_op_names.add(opname) # pylint: disable=no-member + output_op_names.add(opname) # pylint: disable=no-member for output_opname in qmodel.output_node_names: op_count = 0 @@ -1543,7 +1660,7 @@ def _add_output_op_name(opname): if opname not in graph_def: break op = graph_def[opname] - if op.node.op == 'Dequantize': + if op.node.op == "Dequantize": _add_output_op_name(opname) break next_opnames = op.node.input @@ -1558,8 +1675,9 @@ def _add_output_op_name(opname): logger.debug(f"output op names: {output_op_names}") return output_op_names - def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, - confidence_batches, fallback=True, requantize_cfgs=None): + def calculate_op_sensitivity( + self, model, dataloader, tune_cfg, output_op_names, confidence_batches, fallback=True, requantize_cfgs=None + ): """Compute the op sensitivity. The sensitivity metric is the mse between the output of the last quantized op of @@ -1582,22 +1700,27 @@ def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, """ from copy import deepcopy - fp32_op_cfg = {'activation': {'dtype': 'fp32', 'quant_mode': 'fp32'}, - 'weight': {'dtype': 'fp32'}} + fp32_op_cfg = {"activation": {"dtype": "fp32", "quant_mode": "fp32"}, "weight": {"dtype": "fp32"}} if fallback: - ops_list = [op for op, config in tune_cfg['op'].items() - if config['activation']['quant_mode'] in ('static', 'dynamic')] - replace_cfgs = {op : fp32_op_cfg for op in tune_cfg['op']} + ops_list = [ + op + for op, config in tune_cfg["op"].items() + if config["activation"]["quant_mode"] in ("static", "dynamic") + ] + replace_cfgs = {op: fp32_op_cfg for op in tune_cfg["op"]} else: - ops_list = [op for op, config in tune_cfg['op'].items() - if config['activation']['quant_mode'] == 'fp32' and op in requantize_cfgs] + ops_list = [ + op + for op, config in tune_cfg["op"].items() + if config["activation"]["quant_mode"] == "fp32" and op in requantize_cfgs + ] replace_cfgs = requantize_cfgs # Step2. compute mse mse_result = self._get_mse_order( - model, deepcopy(tune_cfg), replace_cfgs, ops_list, dataloader, - output_op_names, confidence_batches) + model, deepcopy(tune_cfg), replace_cfgs, ops_list, dataloader, output_op_names, confidence_batches + ) # Step3. sort mse_order = [op for op, _ in sorted(mse_result.items(), key=lambda i: i[1])] @@ -1606,15 +1729,15 @@ def calculate_op_sensitivity(self, model, dataloader, tune_cfg, output_op_names, logger.debug(f"{op}: {mse_result[op]}") return mse_order - def _get_mse_order(self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader, - output_op_names, confidence_batches): + def _get_mse_order( + self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader, output_op_names, confidence_batches + ): """Compute MSE.""" - op_cfg = tune_cfg['op'] + op_cfg = tune_cfg["op"] mse_result = {} partial_dataloader = self._partial_dataloader(dataloader, confidence_batches) - fp32_output = self._inference_model_on_batches( - fp32_model, tune_cfg, partial_dataloader, output_op_names) + fp32_output = self._inference_model_on_batches(fp32_model, tune_cfg, partial_dataloader, output_op_names) for op in ops_lst: # backup and set replace tuning config @@ -1623,8 +1746,7 @@ def _get_mse_order(self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader # quantize and inference the model q_model = self.quantize(tune_cfg, fp32_model, partial_dataloader) - q_output = self._inference_model_on_batches( - q_model, tune_cfg, partial_dataloader, output_op_names) + q_output = self._inference_model_on_batches(q_model, tune_cfg, partial_dataloader, output_op_names) mse_result[op] = self._calculate_mse(fp32_output, q_output) @@ -1635,10 +1757,11 @@ def _get_mse_order(self, fp32_model, tune_cfg, replace_cfgs, ops_lst, dataloader def _partial_dataset_of(self, dataloader, confidence_batches): """Partial dataset.""" - from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset from neural_compressor.data.datasets.dummy_dataset import DummyDataset as DummyDataset_v2_x + from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset + if isinstance(dataloader.dataset, DummyDataset) or isinstance(dataloader.dataset, DummyDataset_v2_x): - assert(isinstance(confidence_batches, int)) + assert isinstance(confidence_batches, int) ds = copy.deepcopy(dataloader.dataset) ds.dataset = ds.dataset[:confidence_batches] return ds @@ -1657,7 +1780,8 @@ def _partial_dataloader(self, dataloader, confidence_batches): num_workers=dataloader.num_workers, pin_memory=dataloader.pin_memory, shuffle=dataloader.shuffle, - distributed=dataloader.distributed) + distributed=dataloader.distributed, + ) def _calculate_mse(self, fp32_output, q_output): """MSE calculation.""" @@ -1666,8 +1790,7 @@ def _calculate_mse(self, fp32_output, q_output): result.append(np.square(i - j).mean()) return np.array(result).mean() - def _inference_model_on_batches(self, model, tune_cfg, dataloader, - output_op_names): + def _inference_model_on_batches(self, model, tune_cfg, dataloader, output_op_names): """Inference model on batches.""" from .tf_utils.util import generate_feed_dict @@ -1687,9 +1810,19 @@ def _inference_model_on_batches(self, model, tune_cfg, dataloader, return predictions - def smooth_quant(self, model, dataloader, calib_iter=1, tune_cfg=None, alpha=0.5, folding=False, - percentile=99.999, op_types=['MatMul', 'Conv2D'], scales_per_op=True, - record_max_info=False): + def smooth_quant( + self, + model, + dataloader, + calib_iter=1, + tune_cfg=None, + alpha=0.5, + folding=False, + percentile=99.999, + op_types=["MatMul", "Conv2D"], + scales_per_op=True, + record_max_info=False, + ): """Convert the model by smooth quant. Args: @@ -1714,6 +1847,7 @@ def smooth_quant(self, model, dataloader, calib_iter=1, tune_cfg=None, alpha=0.5 # Do a pre-optimization before smooth quant from .tf_utils.graph_rewriter.generic.pre_optimize import PreOptimization + self.pre_optimizer_handle = PreOptimization(model, self.new_api, self.device) self.pre_optimized_model = self.pre_optimizer_handle.get_optimized_model(self.itex_mode) model.graph_def = self.pre_optimized_model.graph_def @@ -1722,27 +1856,31 @@ def smooth_quant(self, model, dataloader, calib_iter=1, tune_cfg=None, alpha=0.5 black_nodes = [] if tune_cfg is not None: self._tuning_cfg_to_fw(tune_cfg) - black_nodes = [node for node in self.quantize_config if self.quantize_config[node] == 'fp32'] + black_nodes = [node for node in self.quantize_config if self.quantize_config[node] == "fp32"] # Run calibration to get max values per channel from .tf_utils.smooth_quant_calibration import SmoothQuantCalibration + calibration = SmoothQuantCalibration(model, dataloader, calib_iter, op_types, percentile, black_nodes) max_vals_per_channel, sq_weight_node_names = calibration() # Get weight tensors and weight nodes based on the input tensor from neural_compressor.adaptor.tf_utils.util import get_weight_from_input_tensor - sq_weight_tensors, sq_weights_nodes = get_weight_from_input_tensor( - model, max_vals_per_channel.keys(), op_types) + + sq_weight_tensors, sq_weights_nodes = get_weight_from_input_tensor(model, max_vals_per_channel.keys(), op_types) # Calculate the smooth quant scaler and insert Mul op into the graph from .tf_utils.smooth_quant_scaler import SmoothQuantScaler + scaler = SmoothQuantScaler(model, dataloader, alpha, scales_per_op) - model, mul_list = scaler.transform(max_vals_per_channel, sq_weight_tensors, - sq_weights_nodes, sq_weight_node_names) + model, mul_list = scaler.transform( + max_vals_per_channel, sq_weight_tensors, sq_weights_nodes, sq_weight_node_names + ) self.smooth_quant_mul_ops.extend(mul_list) self.smooth_quant_model = model return self.smooth_quant_model + @adaptor_registry class Tensorflow_ITEXAdaptor(TensorFlowAdaptor): """Tensorflow ITEX Adaptor Class.""" @@ -1771,10 +1909,11 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): """ assert q_func is None, "quantization aware training mode is not support on tensorflow" self._tuning_cfg_to_fw(tune_cfg) - logger.debug('Dump quantization configurations:') + logger.debug("Dump quantization configurations:") logger.debug(self.quantize_config) from .tf_utils.graph_converter import GraphConverter - calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) + + calib_sampling_size = tune_cfg.get("calib_sampling_size", 1) if isinstance(data_loader, BaseDataLoader): batch_size = data_loader.batch_size try: @@ -1782,76 +1921,86 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None): if calib_sampling_size % (batch_size - i) == 0: calib_batch_size = batch_size - i if i != 0: # pragma: no cover - logger.warning("Reset `calibration.dataloader.batch_size` field " - "to {}".format(calib_batch_size) + - " to make sure the sampling_size is " - "divisible exactly by batch size") + logger.warning( + "Reset `calibration.dataloader.batch_size` field " + "to {}".format(calib_batch_size) + " to make sure the sampling_size is " + "divisible exactly by batch size" + ) break tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size)) data_loader.batch(calib_batch_size) - self.quantize_config['calib_iteration'] = tmp_iterations - converted_model = GraphConverter(model, - qt_config=self.quantize_config, - recipes=self.recipes, - int8_sequences=self.op_wise_sequences, - fp32_ops=self.fp32_ops, - bf16_ops=self.bf16_ops, - data_loader=data_loader, - itex_mode=self.itex_mode, - qdq_enabled=self.qdq_enabled, - new_api=self.new_api, - performance_only = self.performance_only, - use_bf16=self.use_bf16).convert() - except Exception: # pragma: no cover + self.quantize_config["calib_iteration"] = tmp_iterations + converted_model = GraphConverter( + model, + qt_config=self.quantize_config, + recipes=self.recipes, + int8_sequences=self.op_wise_sequences, + fp32_ops=self.fp32_ops, + bf16_ops=self.bf16_ops, + data_loader=data_loader, + itex_mode=self.itex_mode, + qdq_enabled=self.qdq_enabled, + new_api=self.new_api, + performance_only=self.performance_only, + use_bf16=self.use_bf16, + ).convert() + except Exception: # pragma: no cover from .tf_utils.util import get_model_input_shape + batch_size = get_model_input_shape(model) logger.warning( - "Fail to forward with batch size={}, set to {} now.". - format(data_loader.batch_size, batch_size)) + "Fail to forward with batch size={}, set to {} now.".format(data_loader.batch_size, batch_size) + ) data_loader.batch(batch_size) - self.quantize_config['calib_iteration'] = calib_sampling_size - converted_model = GraphConverter(model, - qt_config=self.quantize_config, - recipes=self.recipes, - int8_sequences=self.op_wise_sequences, - fp32_ops=self.fp32_ops, - bf16_ops=self.bf16_ops, - data_loader=data_loader, - itex_mode=self.itex_mode, - qdq_enabled=self.qdq_enabled, - new_api=self.new_api, - performance_only = self.performance_only, - use_bf16=self.use_bf16).convert() - else: # pragma: no cover - if hasattr(data_loader, 'batch_size') and \ - calib_sampling_size % data_loader.batch_size != 0: - iter = self.quantize_config['calib_iteration'] + self.quantize_config["calib_iteration"] = calib_sampling_size + converted_model = GraphConverter( + model, + qt_config=self.quantize_config, + recipes=self.recipes, + int8_sequences=self.op_wise_sequences, + fp32_ops=self.fp32_ops, + bf16_ops=self.bf16_ops, + data_loader=data_loader, + itex_mode=self.itex_mode, + qdq_enabled=self.qdq_enabled, + new_api=self.new_api, + performance_only=self.performance_only, + use_bf16=self.use_bf16, + ).convert() + else: # pragma: no cover + if hasattr(data_loader, "batch_size") and calib_sampling_size % data_loader.batch_size != 0: + iter = self.quantize_config["calib_iteration"] logger.warning( - "Please note that calibration sampling size {} " \ - "isn't divisible exactly by batch size {}. " \ - "So the real sampling size is {}.". - format(calib_sampling_size, data_loader.batch_size, - data_loader.batch_size * iter)) - converted_model = GraphConverter(model, - qt_config=self.quantize_config, - recipes=self.recipes, - int8_sequences=self.op_wise_sequences, - fp32_ops=self.fp32_ops, - bf16_ops=self.bf16_ops, - data_loader=data_loader, - itex_mode=self.itex_mode, - qdq_enabled=self.qdq_enabled, - new_api=self.new_api, - performance_only = self.performance_only, - use_bf16=self.use_bf16).convert() + "Please note that calibration sampling size {} " + "isn't divisible exactly by batch size {}. " + "So the real sampling size is {}.".format( + calib_sampling_size, data_loader.batch_size, data_loader.batch_size * iter + ) + ) + converted_model = GraphConverter( + model, + qt_config=self.quantize_config, + recipes=self.recipes, + int8_sequences=self.op_wise_sequences, + fp32_ops=self.fp32_ops, + bf16_ops=self.bf16_ops, + data_loader=data_loader, + itex_mode=self.itex_mode, + qdq_enabled=self.qdq_enabled, + new_api=self.new_api, + performance_only=self.performance_only, + use_bf16=self.use_bf16, + ).convert() self._dump_model_op_stats(converted_model.graph_def) return converted_model + class TensorflowQuery(QueryBackendCapability): """Tensorflow Query Capability Class.""" - def __init__(self, local_config_file=None, performance_only=False, itex_mode=False, quant_mode='static'): + + def __init__(self, local_config_file=None, performance_only=False, itex_mode=False, quant_mode="static"): """Initialization. Args: @@ -1884,7 +2033,9 @@ def _get_specified_version_cfg(self, data): [dictionary]: the content for specific version. """ from functools import cmp_to_key + from pkg_resources import parse_version + config = None def _compare(version1, version2): @@ -1897,33 +2048,31 @@ def _compare(version1, version2): fallback_list = [] for sub_data in data: - if 'default' in sub_data['version']['name']: - assert config == None, "Only one default config " \ - "is allowed in framework yaml file." + if "default" in sub_data["version"]["name"]: + assert config is None, "Only one default config " "is allowed in framework yaml file." config = sub_data - if self.version in sub_data['version']['name']: + if self.version in sub_data["version"]["name"]: return sub_data else: - if sub_data['version']['name'] == ['2.11.0202242', '2.11.0202250', \ - '2.11.0202317', '2.11.0202323']: + if sub_data["version"]["name"] == ["2.11.0202242", "2.11.0202250", "2.11.0202317", "2.11.0202323"]: continue - sorted_list = copy.deepcopy(sub_data['version']['name']) - sorted_list.remove('default') if 'default' in sorted_list else None + sorted_list = copy.deepcopy(sub_data["version"]["name"]) + sorted_list.remove("default") if "default" in sorted_list else None if isinstance(sorted_list, list): # TensorFlow 1.15.0-up1/up2/up3 release versions are abnoraml release naming # convention. Replacing them with dot for version comparision. - sorted_list = [i.replace('-up', '.') for i in sorted_list] + sorted_list = [i.replace("-up", ".") for i in sorted_list] sorted_list = sorted(sorted_list, key=cmp_to_key(_compare), reverse=True) else: assert isinstance(sorted_list, str) - sorted_list = list(sorted_list.replace('-up', '.').split()) + sorted_list = list(sorted_list.replace("-up", ".").split()) for i in sorted_list: if parse_version(self.version) >= parse_version(i): fallback_list.append([i, sub_data]) break - assert config != None, "The default config in framework yaml must exist." + assert config is not None, "The default config in framework yaml must exist." nearest_version = str(0) for fallback in fallback_list: if parse_version(fallback[0]) > parse_version(nearest_version): @@ -1940,24 +2089,30 @@ def _one_shot_query(self): try: self.cur_config = self._get_specified_version_cfg(content) if not self.performance_only: - remove_int8_ops = ['FusedBatchNorm', 'FusedBatchNormV2', 'FusedBatchNormV3', - '_MklFusedInstanceNorm'] + remove_int8_ops = [ + "FusedBatchNorm", + "FusedBatchNormV2", + "FusedBatchNormV3", + "_MklFusedInstanceNorm", + ] for op_type in remove_int8_ops: - while op_type in self.cur_config['int8'][self.quant_mode].keys(): - self.cur_config['int8'][self.quant_mode].pop(op_type, None) + while op_type in self.cur_config["int8"][self.quant_mode].keys(): + self.cur_config["int8"][self.quant_mode].pop(op_type, None) except Exception as e: logger.info("Fail to parse {} due to {}.".format(self.cfg, str(e))) self.cur_config = None - raise ValueError("Please check if the format of {} follows Neural Compressor yaml schema.". - format(self.cfg)) + raise ValueError( + "Please check if the format of {} follows Neural Compressor yaml schema.".format(self.cfg) + ) self._update_cfg_with_usr_definition() def _update_cfg_with_usr_definition(self): """Add user defined precesion configuration.""" from neural_compressor.conf.pythonic_config import tensorflow_config + if tensorflow_config.precisions is not None: - self.cur_config['precisions']['names'] = ','.join(tensorflow_config.precisions) + self.cur_config["precisions"]["names"] = ",".join(tensorflow_config.precisions) def get_version(self): """Get the current backend version infomation. @@ -1965,7 +2120,7 @@ def get_version(self): Returns: [string]: version string. """ - return self.cur_config['version']['name'] + return self.cur_config["version"]["name"] def get_op_types(self): """Get the supported op types by all precisions. @@ -1974,9 +2129,11 @@ def get_op_types(self): [dictionary list]: A list composed of dictionary which key is precision and value is the op types. """ - return {'int8': self.get_op_types_by_precision('int8'), - 'uint8': self.get_op_types_by_precision('uint8'), - 'bf16': self.get_op_types_by_precision('bf16')} + return { + "int8": self.get_op_types_by_precision("int8"), + "uint8": self.get_op_types_by_precision("uint8"), + "bf16": self.get_op_types_by_precision("bf16"), + } def get_fuse_patterns(self): """Get supported patterns by low precisions. @@ -1986,230 +2143,224 @@ def get_fuse_patterns(self): and value is the supported patterns. """ spr_int8_pattern_list = [ - 'Conv2D + BiasAdd', - 'Conv2D + BiasAdd + Add + Relu6 + Mul + Mul', - 'Conv2D + Add + Relu6 + Mul + Mul', - 'Conv2D + BiasAdd + swish_f32', - 'Conv2D + Add + swish_f32', - 'Conv2D + AddV2 + swish_f32', - 'Conv2D + swish_f32', - 'Conv2D + BiasAdd + Relu', - 'Conv2D + Relu', - 'Conv2D + BiasAdd + Elu', - 'Conv2D + Elu', - 'Conv2D + BiasAdd + Relu6', - 'Conv2D + Relu6', - 'Conv2D + BiasAdd + LeakyRelu', - 'Conv2D + BiasAdd + Add + LeakyRelu', - 'Conv2D + BiasAdd + AddV2 + LeakyRelu', - 'Conv2D + Add + LeakyRelu', - 'Conv2D + AddV2 + LeakyRelu', - 'Conv2D + LeakyRelu', - 'Conv2D + BiasAdd + Sigmoid', - 'Conv2D + Sigmoid', - 'Conv2D + BiasAdd + LeakyRelu + AddV2', - 'Conv2D + BiasAdd + LeakyRelu + Add', - 'Conv2D + LeakyRelu + AddV2', - 'Conv2D + LeakyRelu + Add', - 'Conv2D + BiasAdd + Relu + AddV2', - 'Conv2D + BiasAdd + Relu + Add', - 'Conv2D + Relu + AddV2', - 'Conv2D + Relu + Add', - 'Conv2D + Add', - 'Conv2D + AddV2', - 'Conv2D + AddV2 + Add', - 'Conv2D + Add + Add', - 'Conv2D + BiasAdd + Add', - 'Conv3D + Add', - 'Conv3D + AddV2', - 'Conv3D + BiasAdd', - 'Conv3D + BiasAdd + Add', - 'Conv3D + BiasAdd + AddV2', - 'Conv3D + AddV2 + AddV2', - 'DepthwiseConv2dNative + BiasAdd + Add + Relu6 + Mul + Mul', - 'DepthwiseConv2dNative + Add + Relu6 + Mul + Mul', - 'DepthwiseConv2dNative + BiasAdd + swish_f32', - 'DepthwiseConv2dNative + Add + swish_f32', - 'DepthwiseConv2dNative + AddV2 + swish_f32', - 'DepthwiseConv2dNative + swish_f32', - 'DepthwiseConv2dNative + BiasAdd + LeakyRelu', - 'DepthwiseConv2dNative + LeakyRelu', - 'DepthwiseConv2dNative + BiasAdd + Relu6', - 'DepthwiseConv2dNative + Relu6', - 'DepthwiseConv2dNative + BiasAdd + Relu', - 'DepthwiseConv2dNative + Relu', - 'DepthwiseConv2dNative + Add + Relu6', - 'DepthwiseConv2dNative + BiasAdd', - 'FusedBatchNormV3 + Relu', - 'FusedBatchNormV3 + LeakyRelu', - '_MklFusedInstanceNorm + Relu', - '_MklFusedInstanceNorm + LeakyRelu', - 'Conv2DBackpropInput + BiasAdd', - 'Conv3DBackpropInputV2 + BiasAdd' + "Conv2D + BiasAdd", + "Conv2D + BiasAdd + Add + Relu6 + Mul + Mul", + "Conv2D + Add + Relu6 + Mul + Mul", + "Conv2D + BiasAdd + swish_f32", + "Conv2D + Add + swish_f32", + "Conv2D + AddV2 + swish_f32", + "Conv2D + swish_f32", + "Conv2D + BiasAdd + Relu", + "Conv2D + Relu", + "Conv2D + BiasAdd + Elu", + "Conv2D + Elu", + "Conv2D + BiasAdd + Relu6", + "Conv2D + Relu6", + "Conv2D + BiasAdd + LeakyRelu", + "Conv2D + BiasAdd + Add + LeakyRelu", + "Conv2D + BiasAdd + AddV2 + LeakyRelu", + "Conv2D + Add + LeakyRelu", + "Conv2D + AddV2 + LeakyRelu", + "Conv2D + LeakyRelu", + "Conv2D + BiasAdd + Sigmoid", + "Conv2D + Sigmoid", + "Conv2D + BiasAdd + LeakyRelu + AddV2", + "Conv2D + BiasAdd + LeakyRelu + Add", + "Conv2D + LeakyRelu + AddV2", + "Conv2D + LeakyRelu + Add", + "Conv2D + BiasAdd + Relu + AddV2", + "Conv2D + BiasAdd + Relu + Add", + "Conv2D + Relu + AddV2", + "Conv2D + Relu + Add", + "Conv2D + Add", + "Conv2D + AddV2", + "Conv2D + AddV2 + Add", + "Conv2D + Add + Add", + "Conv2D + BiasAdd + Add", + "Conv3D + Add", + "Conv3D + AddV2", + "Conv3D + BiasAdd", + "Conv3D + BiasAdd + Add", + "Conv3D + BiasAdd + AddV2", + "Conv3D + AddV2 + AddV2", + "DepthwiseConv2dNative + BiasAdd + Add + Relu6 + Mul + Mul", + "DepthwiseConv2dNative + Add + Relu6 + Mul + Mul", + "DepthwiseConv2dNative + BiasAdd + swish_f32", + "DepthwiseConv2dNative + Add + swish_f32", + "DepthwiseConv2dNative + AddV2 + swish_f32", + "DepthwiseConv2dNative + swish_f32", + "DepthwiseConv2dNative + BiasAdd + LeakyRelu", + "DepthwiseConv2dNative + LeakyRelu", + "DepthwiseConv2dNative + BiasAdd + Relu6", + "DepthwiseConv2dNative + Relu6", + "DepthwiseConv2dNative + BiasAdd + Relu", + "DepthwiseConv2dNative + Relu", + "DepthwiseConv2dNative + Add + Relu6", + "DepthwiseConv2dNative + BiasAdd", + "FusedBatchNormV3 + Relu", + "FusedBatchNormV3 + LeakyRelu", + "_MklFusedInstanceNorm + Relu", + "_MklFusedInstanceNorm + LeakyRelu", + "Conv2DBackpropInput + BiasAdd", + "Conv3DBackpropInputV2 + BiasAdd", ] spr_uint8_pattern_list = [ - 'Conv2D + BiasAdd + AddN + Relu', - 'Conv2D + AddN + Relu', - 'Conv2D + BiasAdd + AddN + Relu6', - 'Conv2D + AddN + Relu6', - 'Conv2D + BiasAdd + AddV2 + Relu', - 'Conv2D + AddV2 + Relu', - 'Conv2D + BiasAdd + AddV2 + Relu6', - 'Conv2D + AddV2 + Relu6', - 'Conv2D + BiasAdd + Add + Relu', - 'Conv2D + Add + Relu', - 'Conv2D + BiasAdd + Add + Relu6', - 'Conv2D + Add + Relu6', - 'Conv2D + BiasAdd + Relu', - 'Conv2D + BiasAdd + Relu6', - 'Conv2D + Relu', - 'Conv2D + Relu6', - 'Conv2D + BiasAdd', - 'Conv2D + Add + Add + Relu', - 'DepthwiseConv2dNative + BiasAdd + Relu6', - 'DepthwiseConv2dNative + Relu6', - 'DepthwiseConv2dNative + BiasAdd + Relu', - 'DepthwiseConv2dNative + Relu', - 'DepthwiseConv2dNative + Add + Relu6', - 'DepthwiseConv2dNative + BiasAdd', - 'MatMul + BiasAdd', - 'MatMul + BiasAdd + Add', - 'MatMul + BiasAdd + AddV2', - 'MatMul + BiasAdd + Relu', - 'MatMul + BiasAdd + Relu6', - 'MatMul + BiasAdd + LeakyRelu', - 'MatMul + BiasAdd + Gelu', - 'MatMul + BiasAdd + Elu', - 'MatMul + BiasAdd + Tanh', - 'MatMul + BiasAdd + Sigmoid', - 'MatMul + Add', - 'MatMul + AddV2', - 'MatMul + Relu', - 'MatMul + Relu6', - 'MatMul + LeakyRelu', - 'MatMul + Gelu', - 'MatMul + Elu', - 'MatMul + Tanh', - 'MatMul + Sigmoid', - 'BatchMatMul + Mul', - 'BatchMatMulV2 + Mul', - 'BatchMatMul + Add', - 'BatchMatMulV2 + Add', - 'BatchMatMul + AddV2', - 'BatchMatMulV2 + AddV2', - 'BatchMatMul + Mul + Add', - 'BatchMatMulV2 + Mul + Add', - 'BatchMatMul + Mul + AddV2', - 'BatchMatMulV2 + Mul + AddV2', - 'Conv3D + AddV2 + AddV2 + Relu', - 'Conv3D + Add + Relu', - 'Conv3D + AddV2 + Relu', - 'Conv3D + Relu', - 'Conv3D + Relu6', - 'Conv3D + Add + Relu6', - 'Conv3D + AddV2 + Relu6', - 'Conv3D + Elu', - 'Conv3D + LeakyRelu', - 'Conv3D + BiasAdd + Relu', - 'Conv3D + BiasAdd + Relu6', - 'Conv3D + BiasAdd + Elu', - 'Conv3D + BiasAdd + LeakyRelu', - 'Conv3D + Add + Elu', - 'Conv3D + Add + LeakyRelu', - 'Conv2DBackpropInput + BiasAdd', - 'Conv3DBackpropInputV2 + BiasAdd' + "Conv2D + BiasAdd + AddN + Relu", + "Conv2D + AddN + Relu", + "Conv2D + BiasAdd + AddN + Relu6", + "Conv2D + AddN + Relu6", + "Conv2D + BiasAdd + AddV2 + Relu", + "Conv2D + AddV2 + Relu", + "Conv2D + BiasAdd + AddV2 + Relu6", + "Conv2D + AddV2 + Relu6", + "Conv2D + BiasAdd + Add + Relu", + "Conv2D + Add + Relu", + "Conv2D + BiasAdd + Add + Relu6", + "Conv2D + Add + Relu6", + "Conv2D + BiasAdd + Relu", + "Conv2D + BiasAdd + Relu6", + "Conv2D + Relu", + "Conv2D + Relu6", + "Conv2D + BiasAdd", + "Conv2D + Add + Add + Relu", + "DepthwiseConv2dNative + BiasAdd + Relu6", + "DepthwiseConv2dNative + Relu6", + "DepthwiseConv2dNative + BiasAdd + Relu", + "DepthwiseConv2dNative + Relu", + "DepthwiseConv2dNative + Add + Relu6", + "DepthwiseConv2dNative + BiasAdd", + "MatMul + BiasAdd", + "MatMul + BiasAdd + Add", + "MatMul + BiasAdd + AddV2", + "MatMul + BiasAdd + Relu", + "MatMul + BiasAdd + Relu6", + "MatMul + BiasAdd + LeakyRelu", + "MatMul + BiasAdd + Gelu", + "MatMul + BiasAdd + Elu", + "MatMul + BiasAdd + Tanh", + "MatMul + BiasAdd + Sigmoid", + "MatMul + Add", + "MatMul + AddV2", + "MatMul + Relu", + "MatMul + Relu6", + "MatMul + LeakyRelu", + "MatMul + Gelu", + "MatMul + Elu", + "MatMul + Tanh", + "MatMul + Sigmoid", + "BatchMatMul + Mul", + "BatchMatMulV2 + Mul", + "BatchMatMul + Add", + "BatchMatMulV2 + Add", + "BatchMatMul + AddV2", + "BatchMatMulV2 + AddV2", + "BatchMatMul + Mul + Add", + "BatchMatMulV2 + Mul + Add", + "BatchMatMul + Mul + AddV2", + "BatchMatMulV2 + Mul + AddV2", + "Conv3D + AddV2 + AddV2 + Relu", + "Conv3D + Add + Relu", + "Conv3D + AddV2 + Relu", + "Conv3D + Relu", + "Conv3D + Relu6", + "Conv3D + Add + Relu6", + "Conv3D + AddV2 + Relu6", + "Conv3D + Elu", + "Conv3D + LeakyRelu", + "Conv3D + BiasAdd + Relu", + "Conv3D + BiasAdd + Relu6", + "Conv3D + BiasAdd + Elu", + "Conv3D + BiasAdd + LeakyRelu", + "Conv3D + Add + Elu", + "Conv3D + Add + LeakyRelu", + "Conv2DBackpropInput + BiasAdd", + "Conv3DBackpropInputV2 + BiasAdd", ] - tf_int8_pattern_list = [ - 'Conv2D + BiasAdd', - 'Conv2D + BiasAdd + Relu', - 'Conv2D + BiasAdd + Relu6' - ] + tf_int8_pattern_list = ["Conv2D + BiasAdd", "Conv2D + BiasAdd + Relu", "Conv2D + BiasAdd + Relu6"] tf_uint8_pattern_list = [ - 'Conv2D + BiasAdd + AddN + Relu', - 'Conv2D + BiasAdd + AddN + Relu6', - 'Conv2D + BiasAdd + AddV2 + Relu', - 'Conv2D + BiasAdd + AddV2 + Relu6', - 'Conv2D + BiasAdd + Add + Relu', - 'Conv2D + BiasAdd + Add + Relu6', - 'Conv2D + BiasAdd + Relu', - 'Conv2D + BiasAdd + Relu6', - 'Conv2D + Add + Relu', - 'Conv2D + Add + Relu6', - 'Conv2D + Relu', - 'Conv2D + Relu6', - 'Conv2D + BiasAdd', - 'DepthwiseConv2dNative + BiasAdd + Relu6', - 'DepthwiseConv2dNative + BiasAdd + Relu', - 'DepthwiseConv2dNative + Add + Relu6', - 'DepthwiseConv2dNative + BiasAdd', - 'MatMul + BiasAdd + Relu', - 'MatMul + BiasAdd' + "Conv2D + BiasAdd + AddN + Relu", + "Conv2D + BiasAdd + AddN + Relu6", + "Conv2D + BiasAdd + AddV2 + Relu", + "Conv2D + BiasAdd + AddV2 + Relu6", + "Conv2D + BiasAdd + Add + Relu", + "Conv2D + BiasAdd + Add + Relu6", + "Conv2D + BiasAdd + Relu", + "Conv2D + BiasAdd + Relu6", + "Conv2D + Add + Relu", + "Conv2D + Add + Relu6", + "Conv2D + Relu", + "Conv2D + Relu6", + "Conv2D + BiasAdd", + "DepthwiseConv2dNative + BiasAdd + Relu6", + "DepthwiseConv2dNative + BiasAdd + Relu", + "DepthwiseConv2dNative + Add + Relu6", + "DepthwiseConv2dNative + BiasAdd", + "MatMul + BiasAdd + Relu", + "MatMul + BiasAdd", + ] + tf1_15_up3_int8_pattern_list = [ + "Conv2D + BiasAdd", + "Conv2D + BiasAdd + Relu", + "Conv2D + BiasAdd + LeakyRelu", + "Conv2D + BiasAdd + LeakyRelu + AddV2", + "Conv2D + BiasAdd + Relu6", ] - tf1_15_up3_int8_pattern_list= [ - 'Conv2D + BiasAdd', - 'Conv2D + BiasAdd + Relu', - 'Conv2D + BiasAdd + LeakyRelu', - 'Conv2D + BiasAdd + LeakyRelu + AddV2', - 'Conv2D + BiasAdd + Relu6' - ] tf1_15_up3_uint8_pattern_list = [ - 'Conv2D + BiasAdd + AddN + Relu', - 'Conv2D + BiasAdd + AddN + Relu6', - 'Conv2D + BiasAdd + AddV2 + Relu', - 'Conv2D + BiasAdd + AddV2 + Relu6', - 'Conv2D + BiasAdd + Add + Relu', - 'Conv2D + BiasAdd + Add + Relu6', - 'Conv2D + BiasAdd + Relu', - 'Conv2D + BiasAdd + Relu6', - 'Conv2D + Add + Relu', - 'Conv2D + Add + Relu6', - 'Conv2D + Relu', - 'Conv2D + Relu6', - 'Conv2D + BiasAdd', - 'DepthwiseConv2dNative + BiasAdd + Relu6', - 'DepthwiseConv2dNative + Add + Relu6', - 'DepthwiseConv2dNative + BiasAdd', - 'MatMul + BiasAdd + Relu', - 'MatMul + BiasAdd', + "Conv2D + BiasAdd + AddN + Relu", + "Conv2D + BiasAdd + AddN + Relu6", + "Conv2D + BiasAdd + AddV2 + Relu", + "Conv2D + BiasAdd + AddV2 + Relu6", + "Conv2D + BiasAdd + Add + Relu", + "Conv2D + BiasAdd + Add + Relu6", + "Conv2D + BiasAdd + Relu", + "Conv2D + BiasAdd + Relu6", + "Conv2D + Add + Relu", + "Conv2D + Add + Relu6", + "Conv2D + Relu", + "Conv2D + Relu6", + "Conv2D + BiasAdd", + "DepthwiseConv2dNative + BiasAdd + Relu6", + "DepthwiseConv2dNative + Add + Relu6", + "DepthwiseConv2dNative + BiasAdd", + "MatMul + BiasAdd + Relu", + "MatMul + BiasAdd", ] - old_tf_int8_pattern_list = [ - 'MatMul + BiasAdd + Relu', - 'MatMul + BiasAdd' - ] + old_tf_int8_pattern_list = ["MatMul + BiasAdd + Relu", "MatMul + BiasAdd"] old_tf_uint8_pattern_list = [ - 'Conv2D + BiasAdd + AddN + Relu', - 'Conv2D + BiasAdd + AddN + Relu6', - 'Conv2D + BiasAdd + AddV2 + Relu', - 'Conv2D + BiasAdd + AddV2 + Relu6', - 'Conv2D + BiasAdd + Add + Relu', - 'Conv2D + BiasAdd + Add + Relu6', - 'Conv2D + BiasAdd + Relu', - 'Conv2D + BiasAdd + Relu6', - 'Conv2D + Add + Relu', - 'Conv2D + Add + Relu6', - 'Conv2D + Relu', - 'Conv2D + Relu6', - 'Conv2D + BiasAdd', - 'DepthwiseConv2dNative + BiasAdd + Relu6', - 'DepthwiseConv2dNative + Add + Relu6', - 'DepthwiseConv2dNative + BiasAdd', - 'MatMul + BiasAdd + Relu', - 'MatMul + BiasAdd' + "Conv2D + BiasAdd + AddN + Relu", + "Conv2D + BiasAdd + AddN + Relu6", + "Conv2D + BiasAdd + AddV2 + Relu", + "Conv2D + BiasAdd + AddV2 + Relu6", + "Conv2D + BiasAdd + Add + Relu", + "Conv2D + BiasAdd + Add + Relu6", + "Conv2D + BiasAdd + Relu", + "Conv2D + BiasAdd + Relu6", + "Conv2D + Add + Relu", + "Conv2D + Add + Relu6", + "Conv2D + Relu", + "Conv2D + Relu6", + "Conv2D + BiasAdd", + "DepthwiseConv2dNative + BiasAdd + Relu6", + "DepthwiseConv2dNative + Add + Relu6", + "DepthwiseConv2dNative + BiasAdd", + "MatMul + BiasAdd + Relu", + "MatMul + BiasAdd", ] for index, pattern in enumerate(spr_int8_pattern_list): - spr_int8_pattern_list[index] = 'Dequantize + ' + pattern + ' + QuantizeV2' + spr_int8_pattern_list[index] = "Dequantize + " + pattern + " + QuantizeV2" for index, pattern in enumerate(spr_uint8_pattern_list): - spr_uint8_pattern_list[index] = 'Dequantize + ' + pattern + ' + QuantizeV2' + spr_uint8_pattern_list[index] = "Dequantize + " + pattern + " + QuantizeV2" if not self.performance_only: - remove_int8_ops = ['FusedBatchNorm', 'FusedBatchNormV2', 'FusedBatchNormV3', - '_MklFusedInstanceNorm'] + remove_int8_ops = ["FusedBatchNorm", "FusedBatchNormV2", "FusedBatchNormV3", "_MklFusedInstanceNorm"] for op_type in remove_int8_ops: - patterns = [f'Dequantize + {op_type} + Relu + QuantizeV2', - f'Dequantize + {op_type} + LeakyRelu + QuantizeV2'] + patterns = [ + f"Dequantize + {op_type} + Relu + QuantizeV2", + f"Dequantize + {op_type} + LeakyRelu + QuantizeV2", + ] for pattern in patterns: while pattern in spr_int8_pattern_list: spr_int8_pattern_list.remove(pattern) @@ -2218,25 +2369,25 @@ def get_fuse_patterns(self): patterns = {} import tensorflow as tf + if tf.version.VERSION in spr_base_verions or self.itex_mode: - patterns['int8'] = spr_int8_pattern_list - patterns['uint8'] = spr_uint8_pattern_list - elif version1_gte_version2(tf.version.VERSION, '2.1.0'): - patterns['int8'] = tf_int8_pattern_list - patterns['uint8'] = tf_uint8_pattern_list + patterns["int8"] = spr_int8_pattern_list + patterns["uint8"] = spr_uint8_pattern_list + elif version1_gte_version2(tf.version.VERSION, "2.1.0"): + patterns["int8"] = tf_int8_pattern_list + patterns["uint8"] = tf_uint8_pattern_list if self.itex_mode: - patterns['int8'].append("FusedBatchNormV3 + Relu") - patterns['int8'].append("FusedBatchNormV3 + LeakyRelu") - elif version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): - patterns['int8'] = tf1_15_up3_int8_pattern_list - patterns['uint8'] = tf1_15_up3_uint8_pattern_list + patterns["int8"].append("FusedBatchNormV3 + Relu") + patterns["int8"].append("FusedBatchNormV3 + LeakyRelu") + elif version1_eq_version2(tf.version.VERSION, "1.15.0-up3"): + patterns["int8"] = tf1_15_up3_int8_pattern_list + patterns["uint8"] = tf1_15_up3_uint8_pattern_list else: - patterns['int8'] = old_tf_int8_pattern_list - patterns['uint8'] = old_tf_uint8_pattern_list + patterns["int8"] = old_tf_int8_pattern_list + patterns["uint8"] = old_tf_uint8_pattern_list return patterns - def get_quantization_capability(self): """Get the supported op types' quantization capability. @@ -2244,9 +2395,9 @@ def get_quantization_capability(self): [dictionary list]: A list composed of dictionary which key is precision and value is a dict that describes all op types' quantization capability. """ - for op_type, _ in self.cur_config['int8'][self.quant_mode].items(): - self.cur_config['int8'][self.quant_mode][op_type]['activation']['quant_mode'] = self.quant_mode - return self.cur_config['int8'][self.quant_mode] + for op_type, _ in self.cur_config["int8"][self.quant_mode].items(): + self.cur_config["int8"][self.quant_mode][op_type]["activation"]["quant_mode"] = self.quant_mode + return self.cur_config["int8"][self.quant_mode] def get_op_types_by_precision(self, precision): """Get op types per precision. @@ -2257,36 +2408,43 @@ def get_op_types_by_precision(self, precision): Returns: [string list]: A list composed of op type. """ - assert precision in ('bf16', 'uint8', 'int8') + assert precision in ("bf16", "uint8", "int8") import tensorflow as tf - if precision == 'int8': + + if precision == "int8": if tf.version.VERSION in spr_base_verions or self.itex_mode: - op_type_list = [key for key in self.cur_config['int8'][self.quant_mode].keys()] + op_type_list = [key for key in self.cur_config["int8"][self.quant_mode].keys()] if not self.performance_only and not self.itex_mode: - remove_int8_ops = ['FusedBatchNorm', 'FusedBatchNormV2', 'FusedBatchNormV3', - '_MklFusedInstanceNorm'] + remove_int8_ops = [ + "FusedBatchNorm", + "FusedBatchNormV2", + "FusedBatchNormV3", + "_MklFusedInstanceNorm", + ] for op_type in remove_int8_ops: while op_type in op_type_list: op_type_list.remove(op_type) return op_type_list - if version1_gte_version2(tf.version.VERSION, '2.1.0') or \ - version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): - return ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool'] - return ['MatMul', 'ConcatV2', 'MaxPool', 'AvgPool'] - if precision == 'uint8': + if version1_gte_version2(tf.version.VERSION, "2.1.0") or version1_eq_version2( + tf.version.VERSION, "1.15.0-up3" + ): + return ["Conv2D", "MatMul", "ConcatV2", "MaxPool", "AvgPool"] + return ["MatMul", "ConcatV2", "MaxPool", "AvgPool"] + if precision == "uint8": if tf.version.VERSION in spr_base_verions: - return [key for key in self.cur_config['int8'][self.quant_mode].keys() if 'Norm' not in key] - if version1_gte_version2(tf.version.VERSION, '2.1.0') or \ - version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): - return ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', - 'AvgPool', 'DepthwiseConv2dNative'] - return ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool'] - if precision == 'bf16': + return [key for key in self.cur_config["int8"][self.quant_mode].keys() if "Norm" not in key] + if version1_gte_version2(tf.version.VERSION, "2.1.0") or version1_eq_version2( + tf.version.VERSION, "1.15.0-up3" + ): + return ["Conv2D", "MatMul", "ConcatV2", "MaxPool", "AvgPool", "DepthwiseConv2dNative"] + return ["Conv2D", "MatMul", "ConcatV2", "MaxPool", "AvgPool"] + if precision == "bf16": if tf.version.VERSION in spr_base_verions: return self.cur_config[precision] - if version1_gte_version2(tf.version.VERSION, '2.1.0') or \ - version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): + if version1_gte_version2(tf.version.VERSION, "2.1.0") or version1_eq_version2( + tf.version.VERSION, "1.15.0-up3" + ): return self.cur_config[precision] return [] @@ -2297,10 +2455,10 @@ def get_mixed_precision_combination(self): [string list]: valid precision list. """ import tensorflow as tf - if version1_gte_version2(tf.version.VERSION, '2.1.0') or \ - version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): - return ['int8', 'uint8', 'bf16', 'fp32'] - return ['uint8', 'fp32'] + + if version1_gte_version2(tf.version.VERSION, "2.1.0") or version1_eq_version2(tf.version.VERSION, "1.15.0-up3"): + return ["int8", "uint8", "bf16", "fp32"] + return ["uint8", "fp32"] def get_bf16_patterns(self): """Get BF16 pattern list. @@ -2308,7 +2466,7 @@ def get_bf16_patterns(self): Returns: [List]: bf16 pattern list. """ - bf16_op_types = [i for i in self.get_op_types_by_precision('bf16')] + bf16_op_types = [i for i in self.get_op_types_by_precision("bf16")] res = [] for i in bf16_op_types: res.append([[i]]) @@ -2322,15 +2480,15 @@ def get_eightbit_patterns(self, qdq_enabled=False): [dictionary]: key is the op type while value is the list of sequences start with the op type same as key value. """ - quantizable_op_types = self.get_op_types_by_precision('int8') + \ - self.get_op_types_by_precision('uint8') - int8_patterns = [i.replace( - '+', ' ').split() for i in list(set(self.get_fuse_patterns()['int8'] + - self.get_fuse_patterns()['uint8']))] + quantizable_op_types = self.get_op_types_by_precision("int8") + self.get_op_types_by_precision("uint8") + int8_patterns = [ + i.replace("+", " ").split() + for i in list(set(self.get_fuse_patterns()["int8"] + self.get_fuse_patterns()["uint8"])) + ] res = {} for i in quantizable_op_types: if qdq_enabled: - res[i] = [['Dequantize', i, 'QuantizeV2']] + res[i] = [["Dequantize", i, "QuantizeV2"]] else: res[i] = [[i]] @@ -2346,9 +2504,10 @@ def get_eightbit_patterns(self, qdq_enabled=False): def generate_internal_patterns(self): """Translate the patterns defined in the yaml to internal pattern expression.""" + def _generate_pattern(data): length = [len(i) for i in data] - res=[] + res = [] for index in range(max(length)): if index <= min(length) - 1: tmp = [i[index] for i in data] @@ -2370,7 +2529,7 @@ def _generate_pattern(data): last_len = 1 each_combination = [] for index, value in enumerate(sorted_sequences): - if len(value) >= last_len: + if len(value) >= last_len: last_len = len(value) each_combination.append(value) else: @@ -2383,7 +2542,7 @@ def _generate_pattern(data): op_level_sequences[k].append(copy.deepcopy(each_combination)) final_out = [] - for _ , op_level_sequences in op_level_sequences.items(): + for _, op_level_sequences in op_level_sequences.items(): for similar_sequences in op_level_sequences: final_out.append(_generate_pattern(similar_sequences)) diff --git a/neural_compressor/adaptor/tensorflow.yaml b/neural_compressor/adaptor/tensorflow.yaml index f8443bbdd3e..d2a635dd3f6 100644 --- a/neural_compressor/adaptor/tensorflow.yaml +++ b/neural_compressor/adaptor/tensorflow.yaml @@ -37,7 +37,7 @@ "Where","Unpack","ZerosLike" #clear list ] fp32: ['*'] # '*' means all op types - int8: { + int8: { 'static': { 'Conv2D': { 'weight': { @@ -243,4 +243,3 @@ 'dynamic': { } } - diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py index 98714e07075..ec1390ea4eb 100644 --- a/neural_compressor/adaptor/tf_utils/graph_converter.py +++ b/neural_compressor/adaptor/tf_utils/graph_converter.py @@ -18,82 +18,97 @@ """Graph Converter Class.""" import copy -import os import logging +import os import tempfile -import tensorflow as tf - from collections import OrderedDict, UserDict + +import tensorflow as tf from tensorflow.core.framework import graph_pb2 from tensorflow.python.platform import gfile -from neural_compressor.utils.utility import get_all_fp32_data -from neural_compressor.utils.utility import get_tensor_histogram -from neural_compressor.utils.utility import combine_histogram -from neural_compressor.utils.utility import CaptureOutputToFile, CpuInfo + +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.insert_print_node import InsertPrintMinMaxNode from neural_compressor.conf.dotdict import deep_get from neural_compressor.model import Model from neural_compressor.model.tensorflow_model import TensorflowSavedModelModel -from .transform_graph.insert_logging import InsertLogging -from .transform_graph.rerange_quantized_concat import RerangeQuantizedConcat -from .transform_graph.bias_correction import BiasCorrection -from .util import generate_feed_dict, iterator_sess_run,version1_gt_version2,version1_eq_version2 -from .util import version1_gte_version2,version1_lte_version2,version1_lt_version2 -from .util import TF_SPR_BASE_VERSIONS -from .quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel -from .quantize_graph_common import QuantizeGraphHelper -from .quantize_graph.qdq.optimize_qdq import OptimizeQDQGraph +from neural_compressor.utils.utility import ( + CaptureOutputToFile, + CpuInfo, + combine_histogram, + get_all_fp32_data, + get_tensor_histogram, +) -from .graph_util import GraphAnalyzer -from .graph_rewriter.generic.remove_training_nodes import RemoveTrainingNodesOptimizer -from .graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer +from .graph_rewriter.bf16.bf16_convert import BF16Convert from .graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer from .graph_rewriter.generic.fuse_pad_with_conv import FusePadWithConv2DOptimizer -from .graph_rewriter.generic.strip_equivalent_nodes import StripEquivalentNodesOptimizer from .graph_rewriter.generic.fuse_pad_with_fp32_conv import FusePadWithFP32Conv2DOptimizer - -from .graph_rewriter.int8.freeze_value import FreezeValueTransformer +from .graph_rewriter.generic.remove_training_nodes import RemoveTrainingNodesOptimizer +from .graph_rewriter.generic.strip_equivalent_nodes import StripEquivalentNodesOptimizer +from .graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer from .graph_rewriter.int8.freeze_fake_quant import FreezeFakeQuantOpOptimizer -from .graph_rewriter.int8.fuse_conv_requantize import FuseConvRequantizeTransformer -from .graph_rewriter.int8.fuse_matmul_requantize import FuseMatMulRequantizeTransformer -from .graph_rewriter.int8.fuse_matmul_requantize import FuseMatMulRequantizeDequantizeTransformer -from .graph_rewriter.int8.fuse_matmul_requantize import FuseMatMulRequantizeNewAPITransformer -from .graph_rewriter.int8.fuse_matmul_requantize import FuseMatMulRequantizeDequantizeNewAPITransformer +from .graph_rewriter.int8.freeze_value import FreezeValueTransformer from .graph_rewriter.int8.fuse_conv_redundant_dequantize import FuseConvRedundantDequantizeTransformer +from .graph_rewriter.int8.fuse_conv_requantize import FuseConvRequantizeTransformer from .graph_rewriter.int8.fuse_matmul_redundant_dequantize import FuseMatMulRedundantDequantizeTransformer -from .graph_rewriter.int8.scale_propagation import ScaleProPagationTransformer -from .graph_rewriter.bf16.bf16_convert import BF16Convert -from .graph_rewriter.int8.post_quantized_op_cse import PostCseOptimizer -from .graph_rewriter.int8.post_hostconst_converter import PostHostConstConverter +from .graph_rewriter.int8.fuse_matmul_requantize import ( + FuseMatMulRequantizeDequantizeNewAPITransformer, + FuseMatMulRequantizeDequantizeTransformer, + FuseMatMulRequantizeNewAPITransformer, + FuseMatMulRequantizeTransformer, +) from .graph_rewriter.int8.meta_op_optimizer import MetaInfoChangingMemOpOptimizer +from .graph_rewriter.int8.post_hostconst_converter import PostHostConstConverter +from .graph_rewriter.int8.post_quantized_op_cse import PostCseOptimizer +from .graph_rewriter.int8.scale_propagation import ScaleProPagationTransformer from .graph_rewriter.qdq.insert_qdq_pattern import GenerateGraphWithQDQPattern -from .graph_rewriter.qdq.share_qdq_y_pattern import ShareQDQForItexYPatternOptimizer from .graph_rewriter.qdq.merge_duplicated_qdq import MergeDuplicatedQDQOptimizer -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.insert_print_node import InsertPrintMinMaxNode +from .graph_rewriter.qdq.share_qdq_y_pattern import ShareQDQForItexYPatternOptimizer +from .graph_util import GraphAnalyzer from .graph_util import GraphRewriterHelper as Helper - - -TF_SUPPORTED_MAX_VERSION = '2.12.0' -TF_SUPPORTED_MIN_VERSION = '1.14.0' +from .quantize_graph.qdq.optimize_qdq import OptimizeQDQGraph +from .quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel +from .quantize_graph_common import QuantizeGraphHelper +from .transform_graph.bias_correction import BiasCorrection +from .transform_graph.insert_logging import InsertLogging +from .transform_graph.rerange_quantized_concat import RerangeQuantizedConcat +from .util import ( + TF_SPR_BASE_VERSIONS, + generate_feed_dict, + iterator_sess_run, + version1_eq_version2, + version1_gt_version2, + version1_gte_version2, + version1_lt_version2, + version1_lte_version2, +) + +TF_SUPPORTED_MAX_VERSION = "2.12.0" +TF_SUPPORTED_MIN_VERSION = "1.14.0" logger = logging.getLogger("neural_compressor") debug = bool(logger.level == logging.DEBUG) + class GraphConverter: """Graph Converter Class is used to generate the quantization graph.""" - def __init__(self, - model, - qt_config={}, - recipes={}, - int8_sequences={}, - fp32_ops=[], - bf16_ops=[], - data_loader=None, - fake_quant=False, - itex_mode=False, - qdq_enabled=False, - new_api=False, - performance_only=False, - use_bf16=False): + + def __init__( + self, + model, + qt_config={}, + recipes={}, + int8_sequences={}, + fp32_ops=[], + bf16_ops=[], + data_loader=None, + fake_quant=False, + itex_mode=False, + qdq_enabled=False, + new_api=False, + performance_only=False, + use_bf16=False, + ): """Convert graph. :param model: input tensorflow model. @@ -104,14 +119,14 @@ def __init__(self, :param fake_quant: for quantization-aware training model conversion to default model """ self.model = model - #(TODO) does it right to make the internal model format as graph_def + # (TODO) does it right to make the internal model format as graph_def self.output_tensor_names = self.model.output_tensor_names self.input_tensor_names = self.model.input_tensor_names # quantize specific config - self.calib_iteration = qt_config['calib_iteration'] if not fake_quant else 0 - self.op_wise_config = qt_config['op_wise_config'] - self.advance_config = deep_get(qt_config, 'advance') - self.device = qt_config['device'] if 'device' in qt_config else 'cpu' + self.calib_iteration = qt_config["calib_iteration"] if not fake_quant else 0 + self.op_wise_config = qt_config["op_wise_config"] + self.advance_config = deep_get(qt_config, "advance") + self.device = qt_config["device"] if "device" in qt_config else "cpu" self.int8_sequences = int8_sequences self.fp32_ops = fp32_ops self.bf16_ops = bf16_ops @@ -130,10 +145,11 @@ def __init__(self, if "backend" in self.model.kwargs: self._fp32_model = Model(self.model._model, **self.model.kwargs) else: - self._fp32_model = Model(self.model._model, - **self.model.kwargs, - backend="itex" if itex_mode and not \ - isinstance(self.model, TensorflowSavedModelModel) else "default") + self._fp32_model = Model( + self.model._model, + **self.model.kwargs, + backend="itex" if itex_mode and not isinstance(self.model, TensorflowSavedModelModel) else "default" + ) self._fp32_model.graph_def = self.model.graph_def self._fp32_model.output_tensor_names = self.output_tensor_names self._fp32_model.input_tensor_names = self.input_tensor_names @@ -142,23 +158,22 @@ def __init__(self, self._kl_op_dict = {} self._kl_keys = [] self._print_node_mapping = {} - self._enable_kl_op_names = [ - k for k in self.op_wise_config if self.op_wise_config[k][1] == 'kl' - ] + self._enable_kl_op_names = [k for k in self.op_wise_config if self.op_wise_config[k][1] == "kl"] self.scale_info = {} self.scale_info.update(qt_config) - self.scale_info.update({'recipes': self.recipes}) - self.scale_info.update({'int8_sequences': self.int8_sequences}) - self.scale_info.update({'bf16_ops': self.bf16_ops}) - self.scale_info.update({'fp32_ops': self.fp32_ops}) + self.scale_info.update({"recipes": self.recipes}) + self.scale_info.update({"int8_sequences": self.int8_sequences}) + self.scale_info.update({"bf16_ops": self.bf16_ops}) + self.scale_info.update({"fp32_ops": self.fp32_ops}) if "backend" in self.model.kwargs: self._sampling_model = Model(self.model._model, **self.model.kwargs) else: - self._sampling_model = Model(self.model._model, - **self.model.kwargs, - backend="itex" if itex_mode and not \ - isinstance(self.model, TensorflowSavedModelModel) else "default") + self._sampling_model = Model( + self.model._model, + **self.model.kwargs, + backend="itex" if itex_mode and not isinstance(self.model, TensorflowSavedModelModel) else "default" + ) self._sampling_model.output_tensor_names = self.output_tensor_names self._sampling_model.input_tensor_names = self.input_tensor_names @@ -188,21 +203,22 @@ def _inference(self, model): output_tensor = model.output_tensor # TF table initialization: https://github.com/tensorflow/tensorflow/issues/8665 node_names = [node.name for node in sess.graph.as_graph_def().node] - if 'init_all_tables' in node_names: - init_table_op = sess.graph.get_operation_by_name('init_all_tables') + if "init_all_tables" in node_names: + init_table_op = sess.graph.get_operation_by_name("init_all_tables") sess.run(init_table_op) logger.info("Start sampling on calibration dataset.") if hasattr(self.data_loader, "__len__") and len(self.data_loader) == 0: feed_dict = {} - _ = sess.run(output_tensor, feed_dict) if iter_op==[] \ - else iterator_sess_run(sess, iter_op, \ - feed_dict, output_tensor, self.calib_iteration) + _ = ( + sess.run(output_tensor, feed_dict) + if iter_op == [] + else iterator_sess_run(sess, iter_op, feed_dict, output_tensor, self.calib_iteration) + ) for idx, (inputs, labels) in enumerate(self.data_loader): if len(input_tensor) == 1: feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -213,11 +229,9 @@ def _inference(self, model): else: feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] else: - assert len(input_tensor) == len(inputs), \ - 'inputs len must equal with input_tensor' + assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -230,9 +244,7 @@ def _inference(self, model): # we should check and pair them def check_shape(tensor, data): # scalar or 1 dim default True - if tensor.shape == None or \ - len(tensor.shape.dims) == 1 or \ - not hasattr(data, 'shape'): + if tensor.shape is None or len(tensor.shape.dims) == 1 or not hasattr(data, "shape"): return True tensor_shape = tuple(tensor.shape) data_shape = tuple(data.shape) @@ -255,9 +267,11 @@ def check_shape(tensor, data): if check_shape(dis_tensor, dis_input): feed_dict.update({dis_tensor: dis_input}) break - _ = sess.run(output_tensor, feed_dict) if iter_op==[] \ - else iterator_sess_run(sess, iter_op, \ - feed_dict, output_tensor, self.calib_iteration) + _ = ( + sess.run(output_tensor, feed_dict) + if iter_op == [] + else iterator_sess_run(sess, iter_op, feed_dict, output_tensor, self.calib_iteration) + ) if idx + 1 == self.calib_iteration: break os.environ["ITEX_REMAPPER"] = "1" @@ -268,8 +282,8 @@ def _check_tf_version(self): is_sprbase_version = False try: from tensorflow import python - if (hasattr(python, "pywrap_tensorflow") - and hasattr(python.pywrap_tensorflow, "IsMklEnabled")): + + if hasattr(python, "pywrap_tensorflow") and hasattr(python.pywrap_tensorflow, "IsMklEnabled"): from tensorflow.python.pywrap_tensorflow import IsMklEnabled elif hasattr(python.util, "_pywrap_util_port"): from tensorflow.python.util._pywrap_util_port import IsMklEnabled @@ -278,13 +292,13 @@ def _check_tf_version(self): if IsMklEnabled() and (version1_lte_version2(TF_SUPPORTED_MIN_VERSION, tf.version.VERSION)): is_supported_version = True - if version1_gte_version2(tf.version.VERSION, '2.6.0') and os.getenv('TF_ENABLE_ONEDNN_OPTS') == '1': + if version1_gte_version2(tf.version.VERSION, "2.6.0") and os.getenv("TF_ENABLE_ONEDNN_OPTS") == "1": is_supported_version = True - if version1_gte_version2(tf.version.VERSION, '2.9.0'): + if version1_gte_version2(tf.version.VERSION, "2.9.0"): is_supported_version = True - if tf.version.VERSION == '1.15.0-up3': + if tf.version.VERSION == "1.15.0-up3": is_supported_version = True if tf.version.VERSION in TF_SPR_BASE_VERSIONS: @@ -296,44 +310,53 @@ def _check_tf_version(self): finally: if version1_gt_version2(tf.version.VERSION, TF_SUPPORTED_MAX_VERSION) and not is_sprbase_version: logger.warning( - str('Please note the {} version of TensorFlow is not fully verified! ' - 'Suggest to use the versions between {} and {} if meet problem.') - .format(tf.version.VERSION, TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION)) - - if version1_eq_version2(tf.version.VERSION, '2.5.0') and os.getenv('TF_ENABLE_MKL_NATIVE_FORMAT') != '0': - logger.fatal("Please set environment variable TF_ENABLE_MKL_NATIVE_FORMAT=0 " - "when TensorFlow 2.5.0 installed.") - - if version1_gte_version2(tf.version.VERSION, '2.6.0') and \ - version1_lt_version2(tf.version.VERSION, '2.9.0') and \ - os.getenv('TF_ENABLE_ONEDNN_OPTS') != '1': - logger.fatal("Please set environment variable TF_ENABLE_ONEDNN_OPTS=1 " - "when TensorFlow >= 2.6.0 and < 2.9.0 installed.") + str( + "Please note the {} version of TensorFlow is not fully verified! " + "Suggest to use the versions between {} and {} if meet problem." + ).format(tf.version.VERSION, TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION) + ) + + if version1_eq_version2(tf.version.VERSION, "2.5.0") and os.getenv("TF_ENABLE_MKL_NATIVE_FORMAT") != "0": + logger.fatal( + "Please set environment variable TF_ENABLE_MKL_NATIVE_FORMAT=0 " "when TensorFlow 2.5.0 installed." + ) + + if ( + version1_gte_version2(tf.version.VERSION, "2.6.0") + and version1_lt_version2(tf.version.VERSION, "2.9.0") + and os.getenv("TF_ENABLE_ONEDNN_OPTS") != "1" + ): + logger.fatal( + "Please set environment variable TF_ENABLE_ONEDNN_OPTS=1 " + "when TensorFlow >= 2.6.0 and < 2.9.0 installed." + ) if not is_supported_version: raise ValueError( - str('Please install TensorFlow within version >={} and <={}.') - .format(TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION)) + str("Please install TensorFlow within version >={} and <={}.").format( + TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION + ) + ) def _check_args(self): """Check model's arguments.""" - if self.model.workspace_path and not os.path.isdir(self.model.workspace_path) \ - and not os.path.exists(os.path.dirname(self.model.workspace_path)): + if ( + self.model.workspace_path + and not os.path.isdir(self.model.workspace_path) + and not os.path.exists(os.path.dirname(self.model.workspace_path)) + ): raise ValueError('"output_graph" directory does not exist.') self._output_path = self.model.workspace_path def _gen_tmp_filenames(self): """Generate the temporary file names.""" - self._int8_dynamic_range_model_path = os.path.join(self._output_path, \ - 'int8_dynamic_range_graph') - self._int8_logged_model_path = os.path.join(self._output_path, 'int8_logged_graph') - self._fp32_logged_model_path = os.path.join(self._output_path, 'fp32_logged_graph') - self._int8_frozen_range_model_path = os.path.join(self._output_path, - 'int8_frozen_range_graph') - self._bf16_mixed_precision_model_path = os.path.join(self._output_path, - 'int8_bf16_mixed_precision_graph') - - self.output_graph = os.path.join(self._output_path, 'int8_final_fused_graph') + self._int8_dynamic_range_model_path = os.path.join(self._output_path, "int8_dynamic_range_graph") + self._int8_logged_model_path = os.path.join(self._output_path, "int8_logged_graph") + self._fp32_logged_model_path = os.path.join(self._output_path, "fp32_logged_graph") + self._int8_frozen_range_model_path = os.path.join(self._output_path, "int8_frozen_range_graph") + self._bf16_mixed_precision_model_path = os.path.join(self._output_path, "int8_bf16_mixed_precision_graph") + + self.output_graph = os.path.join(self._output_path, "int8_final_fused_graph") if self.performance_only: # reuse the fp32 model for performance only mode self._tmp_model = self._fp32_model @@ -342,10 +365,13 @@ def _gen_tmp_filenames(self): if "backend" in self.model.kwargs: self._tmp_model = Model(self.model._model, **self.model.kwargs) else: - self._tmp_model = Model(self.model._model, - **self.model.kwargs, - backend="itex" if self.itex_mode and not \ - isinstance(self.model, TensorflowSavedModelModel) else "default") + self._tmp_model = Model( + self.model._model, + **self.model.kwargs, + backend="itex" + if self.itex_mode and not isinstance(self.model, TensorflowSavedModelModel) + else "default" + ) self._tmp_model.graph_def = self.model.graph_def self._tmp_model.output_tensor_names = self.output_tensor_names self._tmp_model.input_tensor_names = self.input_tensor_names @@ -370,8 +396,7 @@ def convert(self): model = self.quantize() if self.itex_mode: - host_const_graph_def = \ - PostHostConstConverter(self._tmp_model.graph_def).do_transformation() + host_const_graph_def = PostHostConstConverter(self._tmp_model.graph_def).do_transformation() host_const_graph_def.library.CopyFrom(self.model.graph_def.library) self._tmp_model.graph_def = host_const_graph_def @@ -380,8 +405,11 @@ def convert(self): if self.exclude_node_names: self.bf16_ops.extend(self.exclude_node_names) - if len(self.bf16_ops) > 0 and (self.use_bf16 or self.performance_only) and \ - (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1'): + if ( + len(self.bf16_ops) > 0 + and (self.use_bf16 or self.performance_only) + and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") + ): model = self.bf16_convert() if self.new_api: @@ -410,55 +438,45 @@ def _get_fp32_print_node_names(self, specified_op_list): } target_conv_op = [] sorted_graph = QuantizeGraphHelper().get_sorted_graph( - self._fp32_model.graph_def, - self._fp32_model.input_node_names, - self._fp32_model.output_node_names) + self._fp32_model.graph_def, self._fp32_model.input_node_names, self._fp32_model.output_node_names + ) - node_name_mapping = { - node.name: node for node in self._tmp_graph_def.node if node.op != "Const" - } + node_name_mapping = {node.name: node for node in self._tmp_graph_def.node if node.op != "Const"} for node in self._tmp_graph_def.node: if node.op in offset_map: - target_conv_op.append(node.name.split('_eightbit_')[0]) - fp32_node_name_mapping = { - node.name: node - for node in sorted_graph.node if node.op != "Const" - } + target_conv_op.append(node.name.split("_eightbit_")[0]) + fp32_node_name_mapping = {node.name: node for node in sorted_graph.node if node.op != "Const"} sorted_node_names = [i.name for i in sorted_graph.node if i.op != "Const"] output_node_names = [] for i in target_conv_op: if specified_op_list and i not in specified_op_list: continue - if node_name_mapping[i + "_eightbit_quantized_conv"].op == \ - 'QuantizedConv2DWithBiasSumAndRelu': + if node_name_mapping[i + "_eightbit_quantized_conv"].op == "QuantizedConv2DWithBiasSumAndRelu": start_index = sorted_node_names.index(i) for index, value in enumerate(sorted_node_names[start_index:]): - if fp32_node_name_mapping[value].op.startswith( - "Add") and fp32_node_name_mapping[sorted_node_names[start_index + - index + - 1]].op == "Relu": + if ( + fp32_node_name_mapping[value].op.startswith("Add") + and fp32_node_name_mapping[sorted_node_names[start_index + index + 1]].op == "Relu" + ): output_node_names.append(sorted_node_names[start_index + index + 1]) self._print_node_mapping[sorted_node_names[start_index + index + 1]] = i elif i in sorted_node_names: start_index = sorted_node_names.index(i) - end_index = start_index + offset_map[node_name_mapping[ - i + "_eightbit_quantized_conv"].op] + end_index = start_index + offset_map[node_name_mapping[i + "_eightbit_quantized_conv"].op] output_node_names.append(sorted_node_names[end_index]) self._print_node_mapping[sorted_node_names[end_index]] = i for i in output_node_names: - self._kl_keys.append(';' + i + '__print__;__KL') + self._kl_keys.append(";" + i + "__print__;__KL") fp32_graph_def = graph_pb2.GraphDef() fp32_graph_def.CopyFrom(self._fp32_model.graph_def) - self._fp32_model.graph_def = InsertLogging(self._fp32_model.graph_def, - node_name_list=output_node_names, - message="__KL:", - summarize=-1, - dump_fp32=True).do_transformation() + self._fp32_model.graph_def = InsertLogging( + self._fp32_model.graph_def, node_name_list=output_node_names, message="__KL:", summarize=-1, dump_fp32=True + ).do_transformation() self._fp32_model.save(self._fp32_logged_model_path) self._fp32_model.graph_def = fp32_graph_def @@ -469,8 +487,8 @@ def _search_y_pattern_for_itex(self): g = GraphAnalyzer() g.graph = self._fp32_model.graph_def g.parse_graph() - y_pattern = [['Conv2D', 'MatMul'], ['BiasAdd'], ['Add', 'AddV2', 'AddN'], ('Relu',)] - y_pattern_variant = [['MaxPool', 'AvgPool'], ['Add', 'AddV2', 'AddN'], ('Relu',)] + y_pattern = [["Conv2D", "MatMul"], ["BiasAdd"], ["Add", "AddV2", "AddN"], ("Relu",)] + y_pattern_variant = [["MaxPool", "AvgPool"], ["Add", "AddV2", "AddN"], ("Relu",)] target_nodes = g.query_fusion_pattern_nodes(y_pattern) target_nodes_variant = g.query_fusion_pattern_nodes(y_pattern_variant) @@ -507,9 +525,7 @@ def quantize(self): else: if self._enable_kl_op_names: self._get_fp32_print_node_names(self._enable_kl_op_names) - self._generate_calibration_data(self._fp32_logged_model_path, - self._fp32_print_data, - True) + self._generate_calibration_data(self._fp32_logged_model_path, self._fp32_print_data, True) output_tensor_names = copy.deepcopy(self.model.output_tensor_names) sampling_graph_def = copy.deepcopy(self._fp32_model.graph_def) @@ -518,21 +534,19 @@ def quantize(self): # after enabling pad+conv2d in new API. non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops)))) sampling_graph_def = FusePadWithFP32Conv2DOptimizer( - sampling_graph_def, - non_pad_ops, - self._tmp_model.input_node_names, - self.op_wise_config, - self.new_api).do_transformation() + sampling_graph_def, non_pad_ops, self._tmp_model.input_node_names, self.op_wise_config, self.new_api + ).do_transformation() for i in self.quantized_node_info: sampling_graph_def, output_names = InsertPrintMinMaxNode( - sampling_graph_def, i[0], i[-1], self.new_api).do_transformation() + sampling_graph_def, i[0], i[-1], self.new_api + ).do_transformation() output_tensor_names.extend(output_names) if self.quantized_node_info: sampling_graph_def.library.CopyFrom(self.model.graph_def.library) self._sampling_model.graph_def = sampling_graph_def self._sampling_model.output_tensor_names = output_tensor_names - tmp_dump_file = tempfile.mkstemp(suffix='.log')[1] + tmp_dump_file = tempfile.mkstemp(suffix=".log")[1] with CaptureOutputToFile(tmp_dump_file): self._inference(self._sampling_model) self._calibration_data = Helper.gen_valid_sampling_log(tmp_dump_file) @@ -541,6 +555,7 @@ def quantize(self): del sampling_graph_def del self._sampling_model import gc + gc.collect() if len(self._calibration_data) > 0: @@ -553,6 +568,7 @@ def quantize(self): raise except Exception as e: import traceback + traceback.print_exc() self._tmp_model = None logger.error("Fail to quantize graph due to {}.".format(str(e))) @@ -567,12 +583,12 @@ def bf16_convert(self): try: logger.info("Start BF16 conversion.") self._tmp_model.graph_def = BF16Convert( - self._tmp_model.graph_def, - self.fp32_ops, - self.bf16_ops).do_transformation() + self._tmp_model.graph_def, self.fp32_ops, self.bf16_ops + ).do_transformation() except Exception as e: import traceback + traceback.print_exc() self._tmp_model = None logger.error("Fail to convert graph due to {}.".format(str(e))) @@ -587,16 +603,12 @@ def _quantize_graph(self): non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops)))) self._tmp_graph_def = FusePadWithConv2DOptimizer( - self._tmp_graph_def, - non_pad_ops, - self._tmp_model.input_node_names, - self.op_wise_config, - self.new_api).do_transformation() + self._tmp_graph_def, non_pad_ops, self._tmp_model.input_node_names, self.op_wise_config, self.new_api + ).do_transformation() self._tmp_graph_def = QuantizeGraphHelper().get_sorted_graph( - self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names) + self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names + ) self._tmp_graph_def, self.quantized_node_info, exclude_node_names = QuantizeGraphForIntel( self._tmp_graph_def, @@ -608,7 +620,8 @@ def _quantize_graph(self): self.fake_quant, self.new_api, self.performance_only, - self.itex_mode).do_transform() + self.itex_mode, + ).do_transform() self.exclude_node_names = exclude_node_names self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) if debug and not self.performance_only: @@ -617,29 +630,33 @@ def _quantize_graph(self): def _generate_calibration_data(self, tmp_path, output_data, enable_kl_algo=False): """Generate the calibration data.""" - tmp_dump_file = os.path.join(os.path.dirname(self.output_graph), 'requant_min_max.log') + tmp_dump_file = os.path.join(os.path.dirname(self.output_graph), "requant_min_max.log") logger.debug("Generate calibration data and save to {}.".format(tmp_dump_file)) if "backend" in self._tmp_model.kwargs: model = Model(tmp_path, **self._tmp_model.kwargs) else: - model = Model(tmp_path, **self._tmp_model.kwargs, backend="itex" if self.itex_mode and \ - not isinstance(self._tmp_model, TensorflowSavedModelModel) else "default") + model = Model( + tmp_path, + **self._tmp_model.kwargs, + backend="itex" + if self.itex_mode and not isinstance(self._tmp_model, TensorflowSavedModelModel) + else "default" + ) model.output_tensor_names = self.output_tensor_names model.input_tensor_names = self.input_tensor_names with CaptureOutputToFile(tmp_dump_file): self._inference(model) - with open(tmp_dump_file, errors='ignore') as f: + with open(tmp_dump_file, errors="ignore") as f: output_data.extend(f.readlines()) for line in output_data: - if enable_kl_algo and line.rsplit(':')[0] in self._kl_keys: - fp32_data = get_all_fp32_data(line.rsplit(':')[-1]) - key = self._print_node_mapping[line[1:].split('__print') - [0]] + '_eightbit_requant_range' + if enable_kl_algo and line.rsplit(":")[0] in self._kl_keys: + fp32_data = get_all_fp32_data(line.rsplit(":")[-1]) + key = self._print_node_mapping[line[1:].split("__print")[0]] + "_eightbit_requant_range" if key not in self._kl_op_dict: self._kl_op_dict[key] = get_tensor_histogram(fp32_data) else: @@ -648,29 +665,25 @@ def _generate_calibration_data(self, tmp_path, output_data, enable_kl_algo=False def _freeze_requantization_ranges(self, additional_data=None): """Freeze requantization ranges after doing quantization.""" self._tmp_graph_def, quantizev2_max = FreezeValueTransformer( - self._tmp_graph_def, - self._calibration_data, - '__max:', device=self.device).do_transformation() + self._tmp_graph_def, self._calibration_data, "__max:", device=self.device + ).do_transformation() self._tmp_graph_def, quantizev2_min = FreezeValueTransformer( + self._tmp_graph_def, self._calibration_data, "__min:", device=self.device + ).do_transformation() + self._tmp_graph_def, requant_min_max = FreezeValueTransformer( self._tmp_graph_def, self._calibration_data, - '__min:', device=self.device).do_transformation() - self._tmp_graph_def, requant_min_max= FreezeValueTransformer( - self._tmp_graph_def, - self._calibration_data, - '__requant_min_max', - tensor_data= additional_data, + "__requant_min_max", + tensor_data=additional_data, device=self.device, - ).do_transformation() + ).do_transformation() self.scale_info.update(quantizev2_max) self.scale_info.update(quantizev2_min) self.scale_info.update(requant_min_max) - if 'scale_propagation_max_pooling' in self.recipes and \ - self.recipes['scale_propagation_max_pooling']: - self._tmp_graph_def = ScaleProPagationTransformer( - self._tmp_graph_def).do_transformation() + if "scale_propagation_max_pooling" in self.recipes and self.recipes["scale_propagation_max_pooling"]: + self._tmp_graph_def = ScaleProPagationTransformer(self._tmp_graph_def).do_transformation() if debug and not self.new_api: self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) @@ -679,56 +692,53 @@ def _freeze_requantization_ranges(self, additional_data=None): def _fuse_requantize_with_fused_quantized_node(self): """Fuse the Requantize/Dequantize with fused quantized Ops.""" - if self.fake_quant: # pragma: no cover - self._tmp_graph_def = FreezeFakeQuantOpOptimizer( - self._tmp_graph_def).do_transformation() + if self.fake_quant: # pragma: no cover + self._tmp_graph_def = FreezeFakeQuantOpOptimizer(self._tmp_graph_def).do_transformation() self._tmp_graph_def = FuseConvRequantizeTransformer( - self._tmp_graph_def, - self.device, self.new_api).do_transformation() + self._tmp_graph_def, self.device, self.new_api + ).do_transformation() if not self.fake_quant: if self.qdq_enabled: - self._tmp_graph_def = FuseMatMulRequantizeNewAPITransformer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FuseMatMulRequantizeNewAPITransformer(self._tmp_graph_def).do_transformation() self._tmp_graph_def = FuseMatMulRequantizeDequantizeNewAPITransformer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def + ).do_transformation() else: - self._tmp_graph_def = FuseMatMulRequantizeTransformer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FuseMatMulRequantizeTransformer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = FuseMatMulRequantizeDequantizeTransformer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FuseMatMulRequantizeDequantizeTransformer(self._tmp_graph_def).do_transformation() self._tmp_graph_def = StripUnusedNodesOptimizer( - self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names).do_transformation() + self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names + ).do_transformation() input_output_names = self._tmp_model.input_node_names + self._tmp_model.output_node_names self._tmp_graph_def = RemoveTrainingNodesOptimizer( - self._tmp_graph_def, - protected_nodes=input_output_names).do_transformation() + self._tmp_graph_def, protected_nodes=input_output_names + ).do_transformation() - self._tmp_graph_def = FoldBatchNormNodesOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FoldBatchNormNodesOptimizer(self._tmp_graph_def).do_transformation() - if self.performance_only or ('scale_propagation_concat' in self.recipes \ - and self.recipes['scale_propagation_concat']): - self._tmp_graph_def = RerangeQuantizedConcat(self._tmp_graph_def, - self.device, performance_only=self.performance_only).do_transformation() + if self.performance_only or ( + "scale_propagation_concat" in self.recipes and self.recipes["scale_propagation_concat"] + ): + self._tmp_graph_def = RerangeQuantizedConcat( + self._tmp_graph_def, self.device, performance_only=self.performance_only + ).do_transformation() - self._tmp_graph_def = MetaInfoChangingMemOpOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = MetaInfoChangingMemOpOptimizer(self._tmp_graph_def).do_transformation() self._tmp_graph_def = StripEquivalentNodesOptimizer( - self._tmp_graph_def, self._tmp_model.output_node_names).do_transformation() + self._tmp_graph_def, self._tmp_model.output_node_names + ).do_transformation() - if self.advance_config is not None and \ - deep_get(self.advance_config, 'bias_correction') is not None: + if self.advance_config is not None and deep_get(self.advance_config, "bias_correction") is not None: self._tmp_graph_def = BiasCorrection( - self._tmp_graph_def, self.model.graph_def, self.new_api).do_transformation() + self._tmp_graph_def, self.model.graph_def, self.new_api + ).do_transformation() self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) @@ -739,13 +749,13 @@ def _post_clean(self): :return: None """ - if os.path.exists(self._int8_logged_model_path) and \ - os.path.isdir(self._int8_logged_model_path): + if os.path.exists(self._int8_logged_model_path) and os.path.isdir(self._int8_logged_model_path): import shutil + shutil.rmtree(self._int8_logged_model_path) - elif gfile.Exists(self._int8_logged_model_path + '.pb'): - os.remove(self._int8_logged_model_path + '.pb') + elif gfile.Exists(self._int8_logged_model_path + ".pb"): + os.remove(self._int8_logged_model_path + ".pb") def quantize_with_qdq_pattern(self): """Quantize model by inserting QDQ. @@ -763,6 +773,7 @@ def quantize_with_qdq_pattern(self): raise except Exception as e: import traceback + traceback.print_exc() self._tmp_model = None logger.error("Fail to quantize graph due to {}.".format(str(e))) @@ -777,41 +788,36 @@ def _insert_qdq_pairs(self): # Fuse Pad into Conv2D, Conv3D, DepthwiseConv2dNative non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops)))) self._tmp_graph_def = FusePadWithConv2DOptimizer( - self._tmp_graph_def, - non_pad_ops, - self._tmp_model.input_node_names, - self.op_wise_config, - self.new_api, - True).do_transformation() + self._tmp_graph_def, non_pad_ops, self._tmp_model.input_node_names, self.op_wise_config, self.new_api, True + ).do_transformation() # Sort graph self._tmp_graph_def = QuantizeGraphHelper().get_sorted_graph( - self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names) + self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names + ) self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) # Find out the quantized nodes - self.quantized_node_info = OptimizeQDQGraph(self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names, - self.op_wise_config, - self.int8_sequences, - self.device, - self.fake_quant, - self.new_api, - self.performance_only, - self.itex_mode).get_quantized_nodes() + self.quantized_node_info = OptimizeQDQGraph( + self._tmp_graph_def, + self._tmp_model.input_node_names, + self._tmp_model.output_node_names, + self.op_wise_config, + self.int8_sequences, + self.device, + self.fake_quant, + self.new_api, + self.performance_only, + self.itex_mode, + ).get_quantized_nodes() if self.itex_mode: self.quantized_node_info.extend(self._search_y_pattern_for_itex()) if self._enable_kl_op_names: self._get_fp32_print_node_names(self._enable_kl_op_names) - self._generate_calibration_data(self._fp32_logged_model_path, - self._fp32_print_data, - True) + self._generate_calibration_data(self._fp32_logged_model_path, self._fp32_print_data, True) # Calibration using sampling model output_tensor_names = copy.deepcopy(self.model.output_tensor_names) @@ -820,24 +826,20 @@ def _insert_qdq_pairs(self): # after enabling pad+conv2d in new API. non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops)))) sampling_graph_def = FusePadWithFP32Conv2DOptimizer( - sampling_graph_def, - non_pad_ops, - self._tmp_model.input_node_names, - self.op_wise_config, - self.new_api, - True).do_transformation() + sampling_graph_def, non_pad_ops, self._tmp_model.input_node_names, self.op_wise_config, self.new_api, True + ).do_transformation() for i in self.quantized_node_info: sampling_graph_def, output_names = InsertPrintMinMaxNode( - sampling_graph_def, i[0], i[-1], self.new_api).do_transformation() + sampling_graph_def, i[0], i[-1], self.new_api + ).do_transformation() output_tensor_names.extend(output_names) - if self.quantized_node_info: sampling_graph_def.library.CopyFrom(self.model.graph_def.library) self._sampling_model.graph_def = sampling_graph_def self._sampling_model.output_tensor_names = output_tensor_names - tmp_dump_file = tempfile.mkstemp(suffix='.log')[1] + tmp_dump_file = tempfile.mkstemp(suffix=".log")[1] with CaptureOutputToFile(tmp_dump_file): self._inference(self._sampling_model) self._calibration_data = Helper.gen_valid_sampling_log(tmp_dump_file) @@ -846,43 +848,48 @@ def _insert_qdq_pairs(self): del output_tensor_names del self._sampling_model import gc + gc.collect() # Insert QDQ pattern self._tmp_graph_def = GenerateGraphWithQDQPattern( - self._tmp_graph_def, self._calibration_data, self.op_wise_config, - self.fake_quant, self.fp32_ops, self.bf16_ops, self.quantized_node_info, - self.device, self.performance_only, self.itex_mode).do_transformation() + self._tmp_graph_def, + self._calibration_data, + self.op_wise_config, + self.fake_quant, + self.fp32_ops, + self.bf16_ops, + self.quantized_node_info, + self.device, + self.performance_only, + self.itex_mode, + ).do_transformation() def _convert_qdq(self): """Convert Dequantize + Op + QuantizeV2 into QuantizedOps.""" if self.itex_mode: self._tmp_graph_def, quantizev2_max = FreezeValueTransformer( - self._tmp_graph_def, - self._calibration_data, - '__max:', - self.itex_mode).do_transformation() + self._tmp_graph_def, self._calibration_data, "__max:", self.itex_mode + ).do_transformation() self._tmp_graph_def, quantizev2_min = FreezeValueTransformer( + self._tmp_graph_def, self._calibration_data, "__min:", self.itex_mode + ).do_transformation() + self._tmp_graph_def, requant_min_max = FreezeValueTransformer( self._tmp_graph_def, self._calibration_data, - '__min:', - self.itex_mode).do_transformation() - self._tmp_graph_def, requant_min_max= FreezeValueTransformer( - self._tmp_graph_def, - self._calibration_data, - '__requant_min_max', - tensor_data= self._kl_op_dict, + "__requant_min_max", + tensor_data=self._kl_op_dict, device=self.device, - itex_mode=self.itex_mode).do_transformation() + itex_mode=self.itex_mode, + ).do_transformation() self.scale_info.update(quantizev2_max) self.scale_info.update(quantizev2_min) self.scale_info.update(requant_min_max) self._tmp_graph_def = StripUnusedNodesOptimizer( - self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names).do_transformation() + self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names + ).do_transformation() self._tmp_graph_def = ShareQDQForItexYPatternOptimizer(self._tmp_graph_def).do_transformation() self._tmp_graph_def = MergeDuplicatedQDQOptimizer(self._tmp_graph_def).do_transformation() @@ -891,17 +898,19 @@ def _convert_qdq(self): self._tmp_model.graph_def = self._tmp_graph_def self._tmp_model.graph_def.library.CopyFrom(self.model.graph_def.library) else: - self._tmp_graph_def, exclude_node_names = OptimizeQDQGraph(self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names, - self.op_wise_config, - self.int8_sequences, - self.device, - self.fake_quant, - self.new_api, - self.performance_only, - self.itex_mode).do_transform() - self.exclude_node_names=exclude_node_names + self._tmp_graph_def, exclude_node_names = OptimizeQDQGraph( + self._tmp_graph_def, + self._tmp_model.input_node_names, + self._tmp_model.output_node_names, + self.op_wise_config, + self.int8_sequences, + self.device, + self.fake_quant, + self.new_api, + self.performance_only, + self.itex_mode, + ).do_transform() + self.exclude_node_names = exclude_node_names if len(self._calibration_data) > 0: self._freeze_requantization_ranges(self._kl_op_dict) diff --git a/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py b/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py index 88e2d1caaa3..e4d85815b0b 100644 --- a/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +++ b/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py @@ -18,52 +18,57 @@ """Without calibration Graph Converter Class.""" import copy -import os import logging -import tensorflow as tf +import os +import tensorflow as tf from tensorflow.python.platform import gfile + from neural_compressor.conf.dotdict import deep_get from neural_compressor.model import Model -from .transform_graph.rerange_quantized_concat import RerangeQuantizedConcat -from .transform_graph.bias_correction import BiasCorrection -from .quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel -from .quantize_graph_common import QuantizeGraphHelper -from .graph_util import GraphAnalyzer -from .graph_rewriter.generic.remove_training_nodes import RemoveTrainingNodesOptimizer -from .graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer +from .graph_rewriter.bf16.bf16_convert import BF16Convert from .graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer from .graph_rewriter.generic.fuse_pad_with_conv import FusePadWithConv2DOptimizer - +from .graph_rewriter.generic.remove_training_nodes import RemoveTrainingNodesOptimizer +from .graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer from .graph_rewriter.int8.freeze_value_without_calib import FreezeValueWithoutCalibTransformer from .graph_rewriter.int8.fuse_conv_requantize import FuseConvRequantizeTransformer -from .graph_rewriter.int8.fuse_matmul_requantize import FuseMatMulRequantizeTransformer -from .graph_rewriter.int8.fuse_matmul_requantize import FuseMatMulRequantizeDequantizeTransformer -from .graph_rewriter.int8.scale_propagation import ScaleProPagationTransformer -from .graph_rewriter.bf16.bf16_convert import BF16Convert -from .graph_rewriter.int8.post_quantized_op_cse import PostCseOptimizer +from .graph_rewriter.int8.fuse_matmul_requantize import ( + FuseMatMulRequantizeDequantizeTransformer, + FuseMatMulRequantizeTransformer, +) from .graph_rewriter.int8.meta_op_optimizer import MetaInfoChangingMemOpOptimizer +from .graph_rewriter.int8.post_quantized_op_cse import PostCseOptimizer from .graph_rewriter.int8.rnn_convert import QuantizedRNNConverter -from .util import version1_gte_version2,version1_gt_version2,version1_eq_version2 -from .util import version1_lt_version2, version1_lte_version2 -from .util import TF_SPR_BASE_VERSIONS - -TF_SUPPORTED_MAX_VERSION = '2.12.0' -TF_SUPPORTED_MIN_VERSION = '1.14.0' +from .graph_rewriter.int8.scale_propagation import ScaleProPagationTransformer +from .graph_util import GraphAnalyzer +from .quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel +from .quantize_graph_common import QuantizeGraphHelper +from .transform_graph.bias_correction import BiasCorrection +from .transform_graph.rerange_quantized_concat import RerangeQuantizedConcat +from .util import ( + TF_SPR_BASE_VERSIONS, + version1_eq_version2, + version1_gt_version2, + version1_gte_version2, + version1_lt_version2, + version1_lte_version2, +) + +TF_SUPPORTED_MAX_VERSION = "2.12.0" +TF_SUPPORTED_MIN_VERSION = "1.14.0" logger = logging.getLogger("neural_compressor") debug = bool(logger.level == logging.DEBUG) + class GraphConverterWithoutCalib: """Graph Converter without calibration Class is used to generate the quantization graph without calibration.""" - def __init__(self, - model, - data_loader=None, - recover_config=None, - new_api=False, - performance_only=False, - use_bf16=False): + + def __init__( + self, model, data_loader=None, recover_config=None, new_api=False, performance_only=False, use_bf16=False + ): """Convert graph without calibration. :param model: input tensorflow model. @@ -75,17 +80,17 @@ def __init__(self, """ # Logger initial self.model = model - #(TODO) does it right to make the internal model format as graph_def + # (TODO) does it right to make the internal model format as graph_def self.output_tensor_names = self.model.output_tensor_names self.input_tensor_names = self.model.input_tensor_names # quantize specific config - self.op_wise_config = recover_config['op_wise_config'] - self.advance_config = deep_get(recover_config, 'advance') - self.device = recover_config['device'] if 'device' in recover_config else 'cpu' - self.int8_sequences = recover_config['int8_sequences'] - self.fp32_ops = recover_config['fp32_ops'] - self.bf16_ops = recover_config['bf16_ops'] - self.recipes = recover_config['recipes'] + self.op_wise_config = recover_config["op_wise_config"] + self.advance_config = deep_get(recover_config, "advance") + self.device = recover_config["device"] if "device" in recover_config else "cpu" + self.int8_sequences = recover_config["int8_sequences"] + self.fp32_ops = recover_config["fp32_ops"] + self.bf16_ops = recover_config["bf16_ops"] + self.recipes = recover_config["recipes"] self.quantized_node_info = [] self._calibration_data = [] self._fp32_print_data = [] @@ -106,8 +111,10 @@ def _check_tf_version(self): is_sprbase_version = False try: from tensorflow import python - if (hasattr(python, "pywrap_tensorflow") - and hasattr(python.pywrap_tensorflow, "IsMklEnabled")):# pragma: no cover + + if hasattr(python, "pywrap_tensorflow") and hasattr( + python.pywrap_tensorflow, "IsMklEnabled" + ): # pragma: no cover from tensorflow.python.pywrap_tensorflow import IsMklEnabled elif hasattr(python.util, "_pywrap_util_port"): from tensorflow.python.util._pywrap_util_port import IsMklEnabled @@ -115,14 +122,14 @@ def _check_tf_version(self): from tensorflow.python._pywrap_util_port import IsMklEnabled if IsMklEnabled() and (version1_lte_version2(TF_SUPPORTED_MIN_VERSION, tf.version.VERSION)): is_supported_version = True - - if version1_gte_version2(tf.version.VERSION, '2.6.0') and os.getenv('TF_ENABLE_ONEDNN_OPTS') == '1': + + if version1_gte_version2(tf.version.VERSION, "2.6.0") and os.getenv("TF_ENABLE_ONEDNN_OPTS") == "1": is_supported_version = True - if version1_gte_version2(tf.version.VERSION, '2.9.0'): + if version1_gte_version2(tf.version.VERSION, "2.9.0"): is_supported_version = True - if tf.version.VERSION == '1.15.0-up3': + if tf.version.VERSION == "1.15.0-up3": is_supported_version = True if tf.version.VERSION in TF_SPR_BASE_VERSIONS: @@ -131,47 +138,56 @@ def _check_tf_version(self): except Exception as e: raise ValueError(e) - finally:# pragma: no cover + finally: # pragma: no cover if version1_gt_version2(tf.version.VERSION, TF_SUPPORTED_MAX_VERSION) and not is_sprbase_version: logger.warning( - str('Please note the {} version of TensorFlow is not fully verified! ' - 'Suggest to use the versions between {} and {} if meet problem.') - .format(tf.version.VERSION, TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION)) - - if version1_eq_version2(tf.version.VERSION, '2.5.0') and os.getenv('TF_ENABLE_MKL_NATIVE_FORMAT') != '0': - logger.fatal("Please set environment variable TF_ENABLE_MKL_NATIVE_FORMAT=0 " - "when TensorFlow 2.5.0 installed.") - - if version1_gte_version2(tf.version.VERSION, '2.6.0') and \ - version1_lt_version2(tf.version.VERSION, '2.9.0') and \ - os.getenv('TF_ENABLE_ONEDNN_OPTS') != '1': - logger.fatal("Please set environment variable TF_ENABLE_ONEDNN_OPTS=1 " - "when TensorFlow >= 2.6.0 and < 2.9.0 installed.") + str( + "Please note the {} version of TensorFlow is not fully verified! " + "Suggest to use the versions between {} and {} if meet problem." + ).format(tf.version.VERSION, TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION) + ) + + if version1_eq_version2(tf.version.VERSION, "2.5.0") and os.getenv("TF_ENABLE_MKL_NATIVE_FORMAT") != "0": + logger.fatal( + "Please set environment variable TF_ENABLE_MKL_NATIVE_FORMAT=0 " "when TensorFlow 2.5.0 installed." + ) + + if ( + version1_gte_version2(tf.version.VERSION, "2.6.0") + and version1_lt_version2(tf.version.VERSION, "2.9.0") + and os.getenv("TF_ENABLE_ONEDNN_OPTS") != "1" + ): + logger.fatal( + "Please set environment variable TF_ENABLE_ONEDNN_OPTS=1 " + "when TensorFlow >= 2.6.0 and < 2.9.0 installed." + ) if not is_supported_version: raise ValueError( - str('Please install TensorFlow within version >={} and <={}.') - .format(TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION)) + str("Please install TensorFlow within version >={} and <={}.").format( + TF_SUPPORTED_MIN_VERSION, TF_SUPPORTED_MAX_VERSION + ) + ) def _check_args(self): """Check model's arguments.""" - if self.model.workspace_path and not os.path.isdir(self.model.workspace_path) \ - and not os.path.exists(os.path.dirname(self.model.workspace_path)): + if ( + self.model.workspace_path + and not os.path.isdir(self.model.workspace_path) + and not os.path.exists(os.path.dirname(self.model.workspace_path)) + ): raise ValueError('"output_graph" directory does not exist.') self._output_path = self.model.workspace_path def _gen_tmp_filenames(self): """Generate the temporary file names.""" - self._int8_dynamic_range_model_path = os.path.join(self._output_path, \ - 'int8_dynamic_range_graph') - self._int8_logged_model_path = os.path.join(self._output_path, 'int8_logged_graph') - self._fp32_logged_model_path = os.path.join(self._output_path, 'fp32_logged_graph') - self._int8_frozen_range_model_path = os.path.join(self._output_path, - 'int8_frozen_range_graph') - self._bf16_mixed_precision_model_path = os.path.join(self._output_path, - 'int8_bf16_mixed_precision_graph') - - self.output_graph = os.path.join(self._output_path, 'int8_final_fused_graph') + self._int8_dynamic_range_model_path = os.path.join(self._output_path, "int8_dynamic_range_graph") + self._int8_logged_model_path = os.path.join(self._output_path, "int8_logged_graph") + self._fp32_logged_model_path = os.path.join(self._output_path, "fp32_logged_graph") + self._int8_frozen_range_model_path = os.path.join(self._output_path, "int8_frozen_range_graph") + self._bf16_mixed_precision_model_path = os.path.join(self._output_path, "int8_bf16_mixed_precision_graph") + + self.output_graph = os.path.join(self._output_path, "int8_final_fused_graph") # to keep temp model self._tmp_model = Model(self.model._model, **self.model.kwargs) self._tmp_model.output_tensor_names = self.output_tensor_names @@ -201,13 +217,12 @@ def _analysis_rnn_model(self): g = GraphAnalyzer() g.graph = self._tmp_graph_def graph_info = g.parse_graph() - rnn_pattern = [['TensorArrayV3'], ['Enter'], ['TensorArrayReadV3'], \ - ['MatMul'], ['BiasAdd']] + rnn_pattern = [["TensorArrayV3"], ["Enter"], ["TensorArrayReadV3"], ["MatMul"], ["BiasAdd"]] target_nodes = g.query_fusion_pattern_nodes(rnn_pattern) res = {} for i in target_nodes: if i[-3] not in self.bf16_ops and i[-3] not in self.fp32_ops: - res[(i[-3], i[-2])] = graph_info[i[1]].node.attr['frame_name'].s.decode() + res[(i[-3], i[-2])] = graph_info[i[1]].node.attr["frame_name"].s.decode() return res @@ -227,9 +242,10 @@ def quantize_without_calib(self): self._fuse_requantize_with_fused_quantized_node() except Exception as e: import traceback + traceback.print_exc() self._tmp_model = None - logger.error('Fail to quantize graph due to {}.'.format(str(e))) + logger.error("Fail to quantize graph due to {}.".format(str(e))) finally: if not debug: self._post_clean() @@ -239,13 +255,12 @@ def bf16_convert(self): """Convert fp32 nodes in bf16_node to bf16 dtype based on FP32 + INT8 mixed precision graph.""" try: self._tmp_model.graph_def = BF16Convert( - self._tmp_model.graph_def, - self.fp32_ops, - self.bf16_ops).do_transformation() + self._tmp_model.graph_def, self.fp32_ops, self.bf16_ops + ).do_transformation() except Exception as e: self._tmp_model = None - logger.error('Fail to convert graph due to {}.'.format(str(e))) + logger.error("Fail to convert graph due to {}.".format(str(e))) finally: if debug: self._tmp_model.save(self._bf16_mixed_precision_model_path) @@ -256,16 +271,12 @@ def _quantize_graph(self): """Quantize graph.""" non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops)))) self._tmp_graph_def = FusePadWithConv2DOptimizer( - self._tmp_graph_def, - non_pad_ops, - self._tmp_model.input_node_names, - self.op_wise_config, - self.new_api).do_transformation() + self._tmp_graph_def, non_pad_ops, self._tmp_model.input_node_names, self.op_wise_config, self.new_api + ).do_transformation() self._tmp_graph_def = QuantizeGraphHelper().get_sorted_graph( - self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names) + self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names + ) self._tmp_graph_def, self.quantized_node_info, _ = QuantizeGraphForIntel( self._tmp_graph_def, @@ -276,7 +287,8 @@ def _quantize_graph(self): self.device, False, self.new_api, - self.performance_only).do_transform() + self.performance_only, + ).do_transform() self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) if debug: @@ -286,26 +298,21 @@ def _quantize_graph(self): def _freeze_requantization_ranges_without_calib(self): """Freeze requantization ranges after doing quantization.""" self._tmp_graph_def = FreezeValueWithoutCalibTransformer( - self._tmp_graph_def, - self.recover_config, - postfix='__min').do_transformation_without_calib() + self._tmp_graph_def, self.recover_config, postfix="__min" + ).do_transformation_without_calib() self._tmp_graph_def = FreezeValueWithoutCalibTransformer( - self._tmp_graph_def, - self.recover_config, - postfix='__max').do_transformation_without_calib() + self._tmp_graph_def, self.recover_config, postfix="__max" + ).do_transformation_without_calib() self._tmp_graph_def = FreezeValueWithoutCalibTransformer( - self._tmp_graph_def, - self.recover_config, - postfix='__requant_min_max', - device = self.device).do_transformation_without_calib() + self._tmp_graph_def, self.recover_config, postfix="__requant_min_max", device=self.device + ).do_transformation_without_calib() self._tmp_graph_def = QuantizedRNNConverter( - self._tmp_graph_def, self._calibration_data, self._rnn_details).do_transformation() + self._tmp_graph_def, self._calibration_data, self._rnn_details + ).do_transformation() - if 'scale_propagation_max_pooling' in self.recipes and \ - self.recipes['scale_propagation_max_pooling']: - self._tmp_graph_def = ScaleProPagationTransformer( - self._tmp_graph_def).do_transformation() + if "scale_propagation_max_pooling" in self.recipes and self.recipes["scale_propagation_max_pooling"]: + self._tmp_graph_def = ScaleProPagationTransformer(self._tmp_graph_def).do_transformation() if debug: self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) @@ -315,39 +322,30 @@ def _freeze_requantization_ranges_without_calib(self): def _fuse_requantize_with_fused_quantized_node(self): """Fuse the Requantize/Dequantize with fused quantized Ops.""" self._tmp_graph_def = FuseConvRequantizeTransformer( - self._tmp_graph_def, - self.device, - self.new_api).do_transformation() + self._tmp_graph_def, self.device, self.new_api + ).do_transformation() - self._tmp_graph_def = FuseMatMulRequantizeTransformer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FuseMatMulRequantizeTransformer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = FuseMatMulRequantizeDequantizeTransformer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FuseMatMulRequantizeDequantizeTransformer(self._tmp_graph_def).do_transformation() self._tmp_graph_def = StripUnusedNodesOptimizer( - self._tmp_graph_def, - self._tmp_model.input_node_names, - self._tmp_model.output_node_names).do_transformation() + self._tmp_graph_def, self._tmp_model.input_node_names, self._tmp_model.output_node_names + ).do_transformation() self._tmp_graph_def = RemoveTrainingNodesOptimizer( - self._tmp_graph_def, - protected_nodes=self._tmp_model.output_node_names).do_transformation() + self._tmp_graph_def, protected_nodes=self._tmp_model.output_node_names + ).do_transformation() - self._tmp_graph_def = FoldBatchNormNodesOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FoldBatchNormNodesOptimizer(self._tmp_graph_def).do_transformation() - if 'scale_propagation_concat' in self.recipes and self.recipes['scale_propagation_concat']: - self._tmp_graph_def = RerangeQuantizedConcat(self._tmp_graph_def, - self.device).do_transformation() + if "scale_propagation_concat" in self.recipes and self.recipes["scale_propagation_concat"]: + self._tmp_graph_def = RerangeQuantizedConcat(self._tmp_graph_def, self.device).do_transformation() - self._tmp_graph_def = MetaInfoChangingMemOpOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = MetaInfoChangingMemOpOptimizer(self._tmp_graph_def).do_transformation() - if self.advance_config is not None and \ - deep_get(self.advance_config, 'bias_correction') is not None: - self._tmp_graph_def = BiasCorrection( - self._tmp_graph_def, self.model.graph_def).do_transformation() + if self.advance_config is not None and deep_get(self.advance_config, "bias_correction") is not None: + self._tmp_graph_def = BiasCorrection(self._tmp_graph_def, self.model.graph_def).do_transformation() self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) @@ -358,10 +356,10 @@ def _post_clean(self): :return: None """ - if os.path.exists(self._int8_logged_model_path) and \ - os.path.isdir(self._int8_logged_model_path): + if os.path.exists(self._int8_logged_model_path) and os.path.isdir(self._int8_logged_model_path): import shutil + shutil.rmtree(self._int8_logged_model_path) - elif gfile.Exists(self._int8_logged_model_path + '.pb'): - os.remove(self._int8_logged_model_path + '.pb') + elif gfile.Exists(self._int8_logged_model_path + ".pb"): + os.remove(self._int8_logged_model_path + ".pb") diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py index d2f03c3e288..8cc85f8f761 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py @@ -17,36 +17,32 @@ # """Graph rewriter BF16 Converter Class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -import logging import copy +import logging + +import tensorflow as tf from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import op_def_registry +from tensorflow.python.framework import dtypes, op_def_registry, tensor_util from tensorflow.python.framework.kernels import get_registered_kernels_for_op -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -from ..generic.graph_cse_optimizer import GraphCseOptimizer -from ..generic.dequantize_cast_optimizer import DequantizeCastOptimizer -import tensorflow as tf from neural_compressor.adaptor.tf_utils.util import TF_SPR_BASE_VERSIONS -DT_FLOAT32 = attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) +from ..generic.dequantize_cast_optimizer import DequantizeCastOptimizer +from ..generic.graph_cse_optimizer import GraphCseOptimizer +from ..graph_base import GraphRewriterBase + +DT_FLOAT32 = attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) DT_BFLOAT16 = attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum) + class BF16Convert(GraphRewriterBase): """BF16 node convert transformation.""" - def __init__(self, - model, - fp32_ops=[], - bf16_ops=[]): + def __init__(self, model, fp32_ops=[], bf16_ops=[]): """Initilization. Args: model: the model to be converted to BF16. @@ -60,7 +56,7 @@ def __init__(self, self.fp32_ops = fp32_ops self.bf16_ops = bf16_ops self.converted_ops = [] - self.device = ["CPU", "DEFAULT"] #TODO support differnt device types, such as GPU + self.device = ["CPU", "DEFAULT"] # TODO support differnt device types, such as GPU def _dtype(self, node): """Get the dtype of the node.""" @@ -71,14 +67,14 @@ def _dtype(self, node): inputs_num = node.attr[i.number_attr].i if i.number_attr else 1 for j in range(inputs_num): if i.type: - inputs_dt.append('') + inputs_dt.append("") else: inputs_dt.append(i.type_attr) for i in op_def.output_arg: outputs_num = node.attr[i.number_attr].i if i.number_attr else 1 for j in range(outputs_num): if i.type: - outputs_dt.append('') + outputs_dt.append("") else: outputs_dt.append(i.type_attr) return inputs_dt, outputs_dt @@ -110,7 +106,7 @@ def _allowed_dtype_val(self, node): allowed_dt_val = {} for attr_def in op_def.attr: if attr_def.type != "type": - continue + continue if attr_def.HasField("allowed_values"): allowed_dt_val[attr_def.name] = attr_def.allowed_values.list.type # The supported data type in op_def may be different with registered kernels. @@ -141,77 +137,87 @@ def _bf16_convert(self, bf16_node_name): if bf16_node.name in self.converted_ops: return - elif 'Dequantize' in bf16_node.op: + elif "Dequantize" in bf16_node.op: return else: self.converted_ops.append(bf16_node.name) - + inputs_dt, outputs_dt = self._dtype(bf16_node) inputs_dt_val, outputs_dt_val = self._dtype_val(bf16_node) allowed_dt_val = self._allowed_dtype_val(bf16_node) for index, input_name in enumerate(bf16_node.input): - if input_name.startswith('^'): + if input_name.startswith("^"): continue - input_detail = self.cur_graph.node_name_details[Helper.node_name_from_input( - input_name)] + input_detail = self.cur_graph.node_name_details[Helper.node_name_from_input(input_name)] input_node = input_detail.node input_node_outputs = input_detail.outputs - if inputs_dt[index] in allowed_dt_val and \ - dtypes.bfloat16.as_datatype_enum not in allowed_dt_val[inputs_dt[index]]: + if ( + inputs_dt[index] in allowed_dt_val + and dtypes.bfloat16.as_datatype_enum not in allowed_dt_val[inputs_dt[index]] + ): continue if inputs_dt_val[index] != DT_FLOAT32: continue - if input_node.op == 'Cast' and \ - input_node.attr["SrcT"] == DT_BFLOAT16 and \ - input_node.attr["DstT"] == DT_FLOAT32 and len(input_node_outputs) == 1: - parent_input_name = Helper.node_name_from_input(input_node.input[0]) - bf16_node.input[index] = input_node.input[0] - outputs = self.cur_graph.node_name_details[parent_input_name].outputs - outputs = list(map(lambda x: x.replace(input_name, bf16_node.name), outputs)) - self.cur_graph.remove_node(input_name) - elif input_node.op == 'Cast' and \ - input_node.attr["DstT"] == DT_FLOAT32 and len(input_node_outputs) == 1: + if ( + input_node.op == "Cast" + and input_node.attr["SrcT"] == DT_BFLOAT16 + and input_node.attr["DstT"] == DT_FLOAT32 + and len(input_node_outputs) == 1 + ): + parent_input_name = Helper.node_name_from_input(input_node.input[0]) + bf16_node.input[index] = input_node.input[0] + outputs = self.cur_graph.node_name_details[parent_input_name].outputs + outputs = list(map(lambda x: x.replace(input_name, bf16_node.name), outputs)) + self.cur_graph.remove_node(input_name) + elif input_node.op == "Cast" and input_node.attr["DstT"] == DT_FLOAT32 and len(input_node_outputs) == 1: input_node.attr["DstT"].CopyFrom(DT_BFLOAT16) elif input_node.op == "Const" and len(input_node_outputs) == 1: - fp32_value = tensor_util.MakeNdarray(input_node.attr.get('value').tensor) + fp32_value = tensor_util.MakeNdarray(input_node.attr.get("value").tensor) Helper.set_attr_dtype(input_node, "dtype", dtypes.bfloat16) - input_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - fp32_value, dtypes.bfloat16, fp32_value.shape))) - elif 'Dequantize' == input_node.op and len(input_node_outputs) == 1 \ - and input_node.attr['mode'].s != b'MIN_FIRST' \ - and tf.version.VERSION in TF_SPR_BASE_VERSIONS: + input_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(fp32_value, dtypes.bfloat16, fp32_value.shape) + ) + ) + elif ( + "Dequantize" == input_node.op + and len(input_node_outputs) == 1 + and input_node.attr["mode"].s != b"MIN_FIRST" + and tf.version.VERSION in TF_SPR_BASE_VERSIONS + ): # Dequantize with mode MIN_FIRST does not support bf16 in both eigen and mkl _, outputs_dt_input_node = self._dtype(input_node) allowed_input_node_dt_val = self._allowed_dtype_val(input_node) - if outputs_dt_input_node[0] in allowed_input_node_dt_val and \ - dtypes.bfloat16.as_datatype_enum in allowed_input_node_dt_val[outputs_dt_input_node[0]]: + if ( + outputs_dt_input_node[0] in allowed_input_node_dt_val + and dtypes.bfloat16.as_datatype_enum in allowed_input_node_dt_val[outputs_dt_input_node[0]] + ): input_node.attr[outputs_dt_input_node[0]].CopyFrom(DT_BFLOAT16) # ResizeBilinear input can be of different types but output is always float - elif input_node.name in self.bf16_ops and "Dequantize" not in input_node.op and \ - input_node.op != 'ResizeBilinear': + elif ( + input_node.name in self.bf16_ops + and "Dequantize" not in input_node.op + and input_node.op != "ResizeBilinear" + ): self._bf16_convert(input_node.name) else: - cast_node_name = input_name.replace(':', '_') + "/" + bf16_node_name + "_FP32toBF16" + cast_node_name = input_name.replace(":", "_") + "/" + bf16_node_name + "_FP32toBF16" if cast_node_name not in list(self.cur_graph.node_name_details.keys()): - input_cast_node = Helper.create_node( - "Cast", cast_node_name, [input_name]) + input_cast_node = Helper.create_node("Cast", cast_node_name, [input_name]) Helper.set_attr_dtype(input_cast_node, "DstT", dtypes.bfloat16) Helper.set_attr_dtype(input_cast_node, "SrcT", dtypes.float32) Helper.set_attr_bool(input_cast_node, "Truncate", False) bf16_node.input[index] = cast_node_name - outputs = self.cur_graph.node_name_details[ \ - Helper.node_name_from_input(input_name)].outputs + outputs = self.cur_graph.node_name_details[Helper.node_name_from_input(input_name)].outputs outputs = list(map(lambda x: x.replace(bf16_node.name, cast_node_name), outputs)) self.cur_graph.add_node(input_cast_node, input_name, [bf16_node_name]) - bf16_node.attr[inputs_dt[index]].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum)) + bf16_node.attr[inputs_dt[index]].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum)) for output_name in bf16_node_outputs: - if bf16_node.op == 'ResizeBilinear': + if bf16_node.op == "ResizeBilinear": continue output_detail = self.cur_graph.node_name_details[output_name] output_node = output_detail.node @@ -219,33 +225,38 @@ def _bf16_convert(self, bf16_node_name): allowed_output_node_dt_val = self._allowed_dtype_val(output_node) for i, input_name in enumerate(output_node.input): - if input_name.startswith('^'): + if input_name.startswith("^"): continue - if bf16_node.name != input_name.split(':')[0]: + if bf16_node.name != input_name.split(":")[0]: continue - index = int(input_name.split(':')[-1]) if ':' in input_name else 0 - if outputs_dt[index] in allowed_dt_val and \ - dtypes.bfloat16.as_datatype_enum not in allowed_dt_val[outputs_dt[index]]: + index = int(input_name.split(":")[-1]) if ":" in input_name else 0 + if ( + outputs_dt[index] in allowed_dt_val + and dtypes.bfloat16.as_datatype_enum not in allowed_dt_val[outputs_dt[index]] + ): continue if outputs_dt_val[index] != DT_FLOAT32: continue - if output_node.op == 'Cast': + if output_node.op == "Cast": output_node.attr["SrcT"].CopyFrom(DT_BFLOAT16) - elif output_node.op == 'QuantizeV2' and 'dtype' in output_node.attr: - if 'dtype' in allowed_output_node_dt_val and \ - dtypes.bfloat16.as_datatype_enum in allowed_output_node_dt_val['dtype']: + elif output_node.op == "QuantizeV2" and "dtype" in output_node.attr: + if ( + "dtype" in allowed_output_node_dt_val + and dtypes.bfloat16.as_datatype_enum in allowed_output_node_dt_val["dtype"] + ): output_node.attr["dtype"].CopyFrom(DT_BFLOAT16) - elif output_node.name not in self.bf16_ops or \ - inputs_dt_input_node[i] in allowed_output_node_dt_val and \ - dtypes.bfloat16.as_datatype_enum not in allowed_output_node_dt_val[inputs_dt_input_node[i]]: + elif ( + output_node.name not in self.bf16_ops + or inputs_dt_input_node[i] in allowed_output_node_dt_val + and dtypes.bfloat16.as_datatype_enum not in allowed_output_node_dt_val[inputs_dt_input_node[i]] + ): cast_node_name = bf16_node_name + "/" + output_node.name + "_BF16toFP32" if cast_node_name in self.cur_graph.node_name_details.keys(): continue - output_cast_node = Helper.create_node( - "Cast", cast_node_name, [input_name]) + output_cast_node = Helper.create_node("Cast", cast_node_name, [input_name]) Helper.set_attr_dtype(output_cast_node, "DstT", dtypes.float32) Helper.set_attr_dtype(output_cast_node, "SrcT", dtypes.bfloat16) Helper.set_attr_bool(output_cast_node, "Truncate", False) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py index b6bf6dc1c69..7f58bdbd0a2 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py @@ -17,16 +17,18 @@ """Convert Add OP to BiasAdd OP Graph Rewriter.""" import numpy as np -from tensorflow.python.framework import dtypes -from neural_compressor.utils.utility import dump_elapsed_time +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -from tensorflow.python.framework import tensor_util +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase + class ConvertAddToBiasAddOptimizer(GraphRewriterBase): """Convert MatMul/Conv2D + Add(AddV2) to MatMul + BiasAdd.""" + @dump_elapsed_time("Pass ConvertAddToBiasAddOptimizer") def do_transformation(self): """Execute convertion Add to BiasAdd.""" @@ -35,23 +37,24 @@ def do_transformation(self): graph_info = g.parse_graph() import tensorflow as tf - if tf.version.VERSION not in ('2.11.0202242', '2.11.0202250', '2.11.0202317', '2.11.0202323'): - target_nodes = g.query_fusion_pattern_nodes([['MatMul', 'Conv2D'], ['Add', 'AddV2']]) + + if tf.version.VERSION not in ("2.11.0202242", "2.11.0202250", "2.11.0202317", "2.11.0202323"): + target_nodes = g.query_fusion_pattern_nodes([["MatMul", "Conv2D"], ["Add", "AddV2"]]) else: - target_nodes = g.query_fusion_pattern_nodes([['MatMul'], ['Add', 'AddV2']]) + target_nodes = g.query_fusion_pattern_nodes([["MatMul"], ["Add", "AddV2"]]) for i in target_nodes: successor_node_names = graph_info[i[1]].outputs matmul_input_name = graph_info[i[0]].node.input[0] matmul_input_node = graph_info[Helper.node_name_from_input(matmul_input_name)].node - #Fixme below two lines was added due to MatMul kernel limitation for matmul input type + # Fixme below two lines was added due to MatMul kernel limitation for matmul input type # should be quint8. - if matmul_input_node.op == 'Const': + if matmul_input_node.op == "Const": continue add_second_input_name = graph_info[i[1]].node.input[1] add_second_const_node = graph_info[add_second_input_name].node - if add_second_const_node.op != 'Const': + if add_second_const_node.op != "Const": continue - bias_tensor = tensor_util.MakeNdarray(add_second_const_node.attr['value'].tensor) + bias_tensor = tensor_util.MakeNdarray(add_second_const_node.attr["value"].tensor) if bias_tensor.ndim > 2: continue @@ -63,13 +66,12 @@ def do_transformation(self): bias_node_name = i[1] bias_const_node_name = add_second_const_node.name + "_flattern" - bias_const_node = Helper.create_constant_node( - bias_const_node_name, new_bias_tensor, dtypes.float32) + bias_const_node = Helper.create_constant_node(bias_const_node_name, new_bias_tensor, dtypes.float32) - bias_node = Helper.create_node('BiasAdd', bias_node_name, [i[0], bias_const_node_name]) + bias_node = Helper.create_node("BiasAdd", bias_node_name, [i[0], bias_const_node_name]) Helper.set_attr_dtype(bias_node, "T", dtypes.float32) g.add_node(bias_const_node, None, [bias_node_name]) - g.replace_single_node(bias_node, [i[0]], i[1], successor_node_names, i[1]) + g.replace_single_node(bias_node, [i[0]], i[1], successor_node_names, i[1]) return g.dump_graph() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py index 038d04ecdd2..d28a12e41da 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py @@ -17,14 +17,15 @@ """Convert Layout Graph Rewriter.""" import tensorflow as tf -from tensorflow.python.training import saver as saver_lib -from tensorflow.core.protobuf import config_pb2 -from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.core.protobuf import config_pb2, meta_graph_pb2, rewriter_config_pb2 from tensorflow.python.grappler import tf_optimizer -from tensorflow.core.protobuf import meta_graph_pb2 +from tensorflow.python.training import saver as saver_lib + +from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 from neural_compressor.utils.utility import dump_elapsed_time + from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 + class ConvertLayoutOptimizer(GraphRewriterBase): """The layout convertion optimizer, convert NCHW to NHWC format. @@ -36,6 +37,7 @@ class ConvertLayoutOptimizer(GraphRewriterBase): Return: converted graph_def """ + def __init__(self, model, outputs): """Initilization.""" super().__init__(model) @@ -46,26 +48,24 @@ def do_transformation(self): """Execute converting layout.""" convert = False for node in self.model.node: - if 'Conv' in node.op and \ - 'data_format' in node.attr and \ - node.attr['data_format'].s == b'NCHW': + if "Conv" in node.op and "data_format" in node.attr and node.attr["data_format"].s == b"NCHW": convert = True break - if convert and version1_gt_version2(tf.version.VERSION, '2.3.0'): + if convert and version1_gt_version2(tf.version.VERSION, "2.3.0"): g = tf.Graph() - with g.as_default(): # pylint: disable=not-context-manager - g = tf.compat.v1.import_graph_def(self.model, name='') - meta_graph = saver_lib.export_meta_graph( - graph_def=self.model, graph=g, clear_devices=False) + with g.as_default(): # pylint: disable=not-context-manager + g = tf.compat.v1.import_graph_def(self.model, name="") + meta_graph = saver_lib.export_meta_graph(graph_def=self.model, graph=g, clear_devices=False) fetch_collection = meta_graph_pb2.CollectionDef() for fetch in self.outputs: - fetch_collection.node_list.value.append(fetch) # pylint: disable=no-member - meta_graph.collection_def["train_op"].CopyFrom( # pylint: disable=no-member - fetch_collection) # pylint: disable=no-member + fetch_collection.node_list.value.append(fetch) # pylint: disable=no-member + meta_graph.collection_def["train_op"].CopyFrom( # pylint: disable=no-member + fetch_collection + ) # pylint: disable=no-member config = config_pb2.ConfigProto() - convert = rewriter_config_pb2.RewriterConfig.NCHW_TO_NHWC # pylint: disable=no-member - config.graph_options.rewrite_options.CopyFrom( # pylint: disable=no-member + convert = rewriter_config_pb2.RewriterConfig.NCHW_TO_NHWC # pylint: disable=no-member + config.graph_options.rewrite_options.CopyFrom( # pylint: disable=no-member rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, constant_folding=rewriter_config_pb2.RewriterConfig.OFF, @@ -77,7 +77,9 @@ def do_transformation(self): function_optimization=rewriter_config_pb2.RewriterConfig.OFF, remapping=rewriter_config_pb2.RewriterConfig.OFF, implementation_selector=rewriter_config_pb2.RewriterConfig.OFF, - cpu_layout_conversion=convert)) + cpu_layout_conversion=convert, + ) + ) optimized_graph = tf_optimizer.OptimizeGraph(config, meta_graph) return optimized_graph diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py index 701b3c1a47e..abfe15c8f08 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py @@ -16,13 +16,13 @@ # limitations under the License. """Convert LeakyRelu Graph Rewriter.""" -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util -from neural_compressor.utils.utility import dump_elapsed_time +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase class ConvertLeakyReluOptimizer(GraphRewriterBase): @@ -37,13 +37,14 @@ class ConvertLeakyReluOptimizer(GraphRewriterBase): Maximum LeakyRelu Note, the coefficient of Mul should be less than 1 or the conversion is not valid. """ + @dump_elapsed_time("Pass ConvertLeakyReluOptimizer") def do_transformation(self): """Fuse small ops to LeakyRelu.""" g = GraphAnalyzer() g.graph = self.model graph_info = g.parse_graph() - target_nodes = g.query_fusion_pattern_nodes([['Mul'], ['Maximum']]) + target_nodes = g.query_fusion_pattern_nodes([["Mul"], ["Maximum"]]) for i in target_nodes: successor_node_names = graph_info[i[1]].outputs @@ -55,17 +56,17 @@ def do_transformation(self): continue mul_coeff_node_name = list(set(mul_input_names).difference(set(max_input_names)))[0] mul_coeff_node = graph_info[mul_coeff_node_name].node - if mul_coeff_node.op != 'Const': + if mul_coeff_node.op != "Const": continue - nd = tensor_util.MakeNdarray(mul_coeff_node.attr['value'].tensor).ndim + nd = tensor_util.MakeNdarray(mul_coeff_node.attr["value"].tensor).ndim if nd > 1: continue - alpha_value = float(tensor_util.MakeNdarray(mul_coeff_node.attr['value'].tensor)) + alpha_value = float(tensor_util.MakeNdarray(mul_coeff_node.attr["value"].tensor)) if alpha_value > 1.0: continue - leaky_relu_node_name = i[1] + '_leakyrelu' - leaky_relu_node = Helper.create_node('LeakyRelu', leaky_relu_node_name, common_input) + leaky_relu_node_name = i[1] + "_leakyrelu" + leaky_relu_node = Helper.create_node("LeakyRelu", leaky_relu_node_name, common_input) Helper.set_attr_dtype(leaky_relu_node, "T", dtypes.float32) Helper.set_attr_float(leaky_relu_node, "alpha", alpha_value) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py index 9acdc773c13..3de10686d69 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py @@ -17,11 +17,12 @@ """Convert NAN to random Graph Rewriter.""" import numpy as np -from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util from tensorflow.core.framework import attr_value_pb2 +from tensorflow.python.framework import dtypes, tensor_util + +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer + +from ..graph_base import GraphRewriterBase class ConvertNanToRandom(GraphRewriterBase): @@ -38,10 +39,14 @@ def do_transformation(self): for i in target_nodes: const_node = graph_info[i[0]].node - const_content = tensor_util.MakeNdarray(const_node.attr['value'].tensor) + const_content = tensor_util.MakeNdarray(const_node.attr["value"].tensor) if const_content.dtype == np.float32 and np.any(np.isnan(const_content)): - const_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - np.random.rand(*const_content.shape), dtypes.float32, const_content.shape))) + const_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + np.random.rand(*const_content.shape), dtypes.float32, const_content.shape + ) + ) + ) return cur_graph.dump_graph() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py index e4559f38411..9e9c675a973 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py @@ -16,17 +16,19 @@ # limitations under the License. """Convert placeholder to const Graph Rewriter.""" -from neural_compressor.utils.utility import dump_elapsed_time -from ..graph_base import GraphRewriterBase +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase + class ConvertPlaceholderToConst(GraphRewriterBase): """Convert placeholder to const for removing training nodes.""" + @dump_elapsed_time("Pass ConvertPlaceholderToConst") def do_transformation(self): """Rename the PlaceHolderWithDefault node to constant. @@ -47,37 +49,40 @@ def do_transformation(self): graph_info = cur_graph.parse_graph() - target_nodes = cur_graph.query_fusion_pattern_nodes([ - ["PlaceholderWithDefault"]]) + target_nodes = cur_graph.query_fusion_pattern_nodes([["PlaceholderWithDefault"]]) for i in target_nodes: placeholder_node = graph_info[i[0]].node new_node = node_def_pb2.NodeDef() - if dtypes.bool.as_datatype_enum == placeholder_node.attr['dtype'].type: + if dtypes.bool.as_datatype_enum == placeholder_node.attr["dtype"].type: placeholder_input_node = None if placeholder_node.input: - placeholder_input_node = graph_info[ - Helper.node_name_from_input(placeholder_node.input[0])].node + placeholder_input_node = graph_info[Helper.node_name_from_input(placeholder_node.input[0])].node - if placeholder_input_node and placeholder_input_node.op != 'Const': + if placeholder_input_node and placeholder_input_node.op != "Const": continue if placeholder_input_node: - new_val_str = placeholder_input_node.attr['value'].tensor.bool_val + new_val_str = placeholder_input_node.attr["value"].tensor.bool_val else: continue new_node.op = "Const" - new_node.name = placeholder_node.name + '_const' + new_node.name = placeholder_node.name + "_const" new_node.attr["dtype"].CopyFrom(placeholder_node.attr["dtype"]) new_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - self.strtobool(new_val_str), dtype=dtypes.bool,shape=[]))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(self.strtobool(new_val_str), dtype=dtypes.bool, shape=[]) + ) + ) cur_graph.add_node(new_node, None, graph_info[placeholder_node.name].outputs) for each_output in graph_info[placeholder_node.name].outputs: for i, input_name in enumerate(graph_info[each_output].node.input): if input_name == placeholder_node.name: - new_input = graph_info[each_output].node.input[:i] + [new_node.name] +\ - graph_info[each_output].node.input[i+1:] - graph_info[each_output].node.ClearField('input') + new_input = ( + graph_info[each_output].node.input[:i] + + [new_node.name] + + graph_info[each_output].node.input[i + 1 :] + ) + graph_info[each_output].node.ClearField("input") graph_info[each_output].node.input.extend(new_input) cur_graph.remove_node(placeholder_node.name) else: diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dequantize_cast_optimizer.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dequantize_cast_optimizer.py index a341d81b054..0776854fd34 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dequantize_cast_optimizer.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dequantize_cast_optimizer.py @@ -16,14 +16,16 @@ # limitations under the License. """Dequantize Cast Graph Rerewriter.""" +import tensorflow as tf from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.framework import dtypes -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from neural_compressor.utils.utility import dump_elapsed_time -import tensorflow as tf from neural_compressor.adaptor.tf_utils.util import TF_SPR_BASE_VERSIONS +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase + class DequantizeCastOptimizer(GraphRewriterBase): """Remove the Cast OP and set Dequantize output to B16 if the Cast OP output is BF16.""" @@ -40,7 +42,7 @@ def do_transformation(self): """ # stock TF _MklDequantize doesn't support BF16 currently. # TODO remove this when spr-base upstream to stock TF. - if not tf.version.VERSION in TF_SPR_BASE_VERSIONS: + if tf.version.VERSION not in TF_SPR_BASE_VERSIONS: return self.model DT_BFLOAT16 = attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum) @@ -54,18 +56,18 @@ def do_transformation(self): if len(dq_outputs) > 1: continue - if dq_node.attr['mode'].s == b'MIN_FIRST': + if dq_node.attr["mode"].s == b"MIN_FIRST": continue cast_node = graph_info[i[1]].node cast_outputs = graph_info[i[1]].outputs all_cast_outputs_bf16 = True for cast_output in cast_outputs: cast_output_node = graph_info[cast_output].node - if cast_output_node.attr['T'] != DT_BFLOAT16:# des dtype of the cast must be bfloat16 + if cast_output_node.attr["T"] != DT_BFLOAT16: # des dtype of the cast must be bfloat16 all_cast_outputs_bf16 = False if not all_cast_outputs_bf16: continue - dq_node.attr['dtype'].CopyFrom(DT_BFLOAT16) + dq_node.attr["dtype"].CopyFrom(DT_BFLOAT16) for cast_output in cast_outputs: successor_node = graph_info[cast_output].node replace_index = None diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py index a8751d15320..85294066673 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py @@ -17,17 +17,18 @@ """Dilated Contraction Graph Rewriter.""" from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes -from neural_compressor.utils.utility import dump_elapsed_time +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase class DilatedContraction(GraphRewriterBase): """Fuse the SpaceToBatchND + Conv + BatchToSpaceND pattern.""" + @dump_elapsed_time("Pass DilatedContraction") def do_transformation(self): """Dilated Contraction fusion.""" @@ -36,7 +37,8 @@ def do_transformation(self): graph_info = cur_graph.parse_graph() target_nodes = cur_graph.query_fusion_pattern_nodes( - ["SpaceToBatchND", ["Conv2D", "DepthwiseConv2dNative"], "BatchToSpaceND"]) + ["SpaceToBatchND", ["Conv2D", "DepthwiseConv2dNative"], "BatchToSpaceND"] + ) for node_combination in target_nodes: stob_node = graph_info[node_combination[0]].node @@ -47,26 +49,23 @@ def do_transformation(self): block_shape_node = graph_info[btos_node.input[1]].node crops_node = graph_info[btos_node.input[2]].node - block_value = [i for i in tensor_util.MakeNdarray( - block_shape_node.attr['value'].tensor).flat] + block_value = [i for i in tensor_util.MakeNdarray(block_shape_node.attr["value"].tensor).flat] new_dilation = [1, block_value[0], block_value[1], 1] # if padding input of SpaceToBatchND can't be directly fetched, we continue - if stob_padding_node.op != 'Const': + if stob_padding_node.op != "Const": continue - padding_value = [i for i in tensor_util.MakeNdarray( - stob_padding_node.attr['value'].tensor).flat] - crops_value = [i for i in tensor_util.MakeNdarray( - crops_node.attr['value'].tensor).flat] + padding_value = [i for i in tensor_util.MakeNdarray(stob_padding_node.attr["value"].tensor).flat] + crops_value = [i for i in tensor_util.MakeNdarray(crops_node.attr["value"].tensor).flat] contraction_node.input[0] = stob_node.input[0] - Helper.set_attr_int_list(contraction_node, 'dilations', new_dilation) + Helper.set_attr_int_list(contraction_node, "dilations", new_dilation) real_padding = [padding_value[i] - crops_value[i] for i in range(4)] explict_padding = [0, 0, 0, 0, 0, 0, 0, 0] - data_format = contraction_node.attr['data_format'].s.decode() + data_format = contraction_node.attr["data_format"].s.decode() if any(real_padding): - contraction_node.attr['padding'].s = "EXPLICIT".encode() - assert data_format in ('NHWC', 'NCHW') + contraction_node.attr["padding"].s = "EXPLICIT".encode() + assert data_format in ("NHWC", "NCHW") if data_format == "NHWC": explict_padding[2] = real_padding[0] explict_padding[3] = real_padding[1] @@ -79,7 +78,7 @@ def do_transformation(self): explict_padding[7] = real_padding[3] Helper.set_attr_int_list(contraction_node, "explicit_paddings", explict_padding) - contraction_node.attr.pop('_output_shapes') + contraction_node.attr.pop("_output_shapes") cur_graph.remove_node(stob_node.name) following_node_name = graph_info[node_combination[2]].outputs[0] following_node = graph_info[following_node_name].node @@ -87,4 +86,4 @@ def do_transformation(self): following_node.input[0] = btos_node.input[0] cur_graph.remove_node(btos_node.name) - return cur_graph.dump_graph() \ No newline at end of file + return cur_graph.dump_graph() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py index 2a58f63710f..8a5a06a8a67 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py @@ -18,16 +18,18 @@ import tensorflow as tf from tensorflow.python.framework import dtypes -from neural_compressor.utils.utility import dump_elapsed_time -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase class InjectDummyBiasAddOptimizer(GraphRewriterBase): """Inject dummy BiasAdd for MatMul, Conv2D for pattern fusion.""" + def __init__(self, model, outputs): """Initilization.""" super().__init__(model) @@ -40,66 +42,86 @@ def do_transformation(self): g.graph = self.model graph_info = g.parse_graph() g.get_frame_info() - valid_ops = ('BiasAdd', 'Add', 'AddV2', 'AddN') - target_nodes = g.query_fusion_pattern_nodes([['MatMul', 'Conv2D'],]) + valid_ops = ("BiasAdd", "Add", "AddV2", "AddN") + target_nodes = g.query_fusion_pattern_nodes( + [ + ["MatMul", "Conv2D"], + ] + ) for i in target_nodes: # only apply this pass for tensorflow old quantization API, pre_optimize does this check - # use conv+dummy_biasadd+relu because TF do not support conv+relu now. + # use conv+dummy_biasadd+relu because TF do not support conv+relu now. if i[0] in self.outputs: continue next_node_names = graph_info[i[0]].outputs - if next_node_names and len(next_node_names) == 1 and \ - graph_info[Helper.node_name_from_input(next_node_names[0])].node.op in valid_ops: + if ( + next_node_names + and len(next_node_names) == 1 + and graph_info[Helper.node_name_from_input(next_node_names[0])].node.op in valid_ops + ): continue - bias_node_name = i[0] + '_dummy_biasadd' - bias_const_node_name = i[0] + '_dummy_biasadd_const' + bias_node_name = i[0] + "_dummy_biasadd" + bias_const_node_name = i[0] + "_dummy_biasadd_const" matmul_a_node_name = Helper.node_name_from_input(graph_info[i[0]].node.input[0]) matmul_a_node = graph_info[matmul_a_node_name].node matmul_b_node_name = Helper.node_name_from_input(graph_info[i[0]].node.input[1]) matmul_b_node = graph_info[matmul_b_node_name].node - if matmul_a_node.op == 'Const' or matmul_b_node.op not in ['Const', 'Enter']: + if matmul_a_node.op == "Const" or matmul_b_node.op not in ["Const", "Enter"]: continue - if matmul_b_node.op == 'Enter': # pragma: no cover + if matmul_b_node.op == "Enter": # pragma: no cover parent_node = graph_info[Helper.node_name_from_input(matmul_b_node.input[0])].node - if parent_node.op != 'Const': + if parent_node.op != "Const": continue else: matmul_b_node = parent_node matmul_b_node_name = matmul_b_node.name - if graph_info[i[0]].node.op == 'MatMul': - t_b_index = 0 if graph_info[i[0]].node.attr['transpose_b'].b else 1 - elif graph_info[i[0]].node.op == 'Conv2D' and graph_info[i[0]].node.attr['data_format'].s == b'NHWC': + if graph_info[i[0]].node.op == "MatMul": + t_b_index = 0 if graph_info[i[0]].node.attr["transpose_b"].b else 1 + elif graph_info[i[0]].node.op == "Conv2D" and graph_info[i[0]].node.attr["data_format"].s == b"NHWC": t_b_index = 3 - elif graph_info[i[0]].node.op == 'Conv2D' and graph_info[i[0]].node.attr['data_format'].s == b'NCHW': + elif graph_info[i[0]].node.op == "Conv2D" and graph_info[i[0]].node.attr["data_format"].s == b"NCHW": t_b_index = 1 else: continue - bias_add_length = matmul_b_node.attr['value'].tensor.tensor_shape.dim[t_b_index].size + bias_add_length = matmul_b_node.attr["value"].tensor.tensor_shape.dim[t_b_index].size - bias_add_content = [0.] * bias_add_length + bias_add_content = [0.0] * bias_add_length bias_const_node = Helper.create_constant_node( - bias_const_node_name, bias_add_content, dtypes.float32, shape=[bias_add_length]) + bias_const_node_name, bias_add_content, dtypes.float32, shape=[bias_add_length] + ) - if i[0] in g.parent_frame_details and g.parent_frame_details[i[0]]: # pragma: no cover + if i[0] in g.parent_frame_details and g.parent_frame_details[i[0]]: # pragma: no cover bias_const_enter_node = Helper.create_node( - 'Enter', bias_const_node_name+'_enter', [bias_const_node_name]) - Helper.set_attr_string(bias_const_enter_node, - 'frame_name', g.parent_frame_details[i[0]].attr['frame_name'].s) - Helper.set_attr_dtype(bias_const_enter_node, 'T', dtypes.float32) - Helper.set_attr_bool(bias_const_enter_node, 'is_constant', True) - Helper.set_attr_int(bias_const_enter_node, 'parallel_iterations', \ - g.parent_frame_details[i[0]].attr['parallel_iterations'].i) + "Enter", bias_const_node_name + "_enter", [bias_const_node_name] + ) + Helper.set_attr_string( + bias_const_enter_node, "frame_name", g.parent_frame_details[i[0]].attr["frame_name"].s + ) + Helper.set_attr_dtype(bias_const_enter_node, "T", dtypes.float32) + Helper.set_attr_bool(bias_const_enter_node, "is_constant", True) + Helper.set_attr_int( + bias_const_enter_node, + "parallel_iterations", + g.parent_frame_details[i[0]].attr["parallel_iterations"].i, + ) - bias_node = Helper.create_node('BiasAdd', bias_node_name, \ - [i[0], bias_const_enter_node.name if i[0] in g.parent_frame_details \ - and g.parent_frame_details[i[0]] else bias_const_node_name]) + bias_node = Helper.create_node( + "BiasAdd", + bias_node_name, + [ + i[0], + bias_const_enter_node.name + if i[0] in g.parent_frame_details and g.parent_frame_details[i[0]] + else bias_const_node_name, + ], + ) Helper.set_attr_dtype(bias_node, "T", dtypes.float32) g.add_node(bias_node, i[0], next_node_names) - if i[0] in g.parent_frame_details and g.parent_frame_details[i[0]]: # pragma: no cover + if i[0] in g.parent_frame_details and g.parent_frame_details[i[0]]: # pragma: no cover g.add_node(bias_const_node, None, [bias_const_enter_node.name]) g.add_node(bias_const_enter_node, bias_const_node_name, [bias_node_name]) else: diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py index a9b9d1d9a2b..4e2e6e5dc2e 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py @@ -17,15 +17,19 @@ """ExpandDims Graph Rewriter.""" -from ..graph_base import GraphRewriterBase +import numpy as np +from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.utils.utility import dump_elapsed_time -from tensorflow.python.framework import dtypes -import numpy as np + +from ..graph_base import GraphRewriterBase + class ExpandDimsOptimizer(GraphRewriterBase): """Calculate ExpandDims and remove it if its input is weight and next node is Conv2D.""" + @dump_elapsed_time("Pass ExpandDimsOptimizer") def do_transformation(self): """Handle all ExpandDims ops whose input is weight and output is Conv2D. @@ -44,15 +48,15 @@ def do_transformation(self): for node_combination in target_nodes: expanddims_node = graph_info[node_combination[0]].node - dims_node = graph_info[expanddims_node.input[1]].node - next_node = graph_info[graph_info[node_combination[0]].outputs[0]].node + dims_node = graph_info[expanddims_node.input[1]].node + next_node = graph_info[graph_info[node_combination[0]].outputs[0]].node # to solve the case that input 0 of ExpandDims is a tensor, not a node if expanddims_node.input[0] in graph_info: - weight_node = graph_info[expanddims_node.input[0]].node + weight_node = graph_info[expanddims_node.input[0]].node else: continue - if weight_node.op == 'Const' and next_node.op == 'Conv2D': + if weight_node.op == "Const" and next_node.op == "Conv2D": dims = Helper.values_from_const(dims_node) weight_value = np.array(Helper.values_from_const(weight_node)) new_weight_value = np.expand_dims(weight_value, axis=dims) @@ -66,9 +70,9 @@ def do_transformation(self): if value == expanddims_node.name: replace_index = index break - # weight->conv2d + # weight->conv2d cur_graph.add_node(new_weight_node, None, [successor_node.name]) - successor_node.input[replace_index] = new_weight_node.name + successor_node.input[replace_index] = new_weight_node.name # remove ExpandDims and weight_node cur_graph.remove_node(expanddims_node.name) else: diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py index 9cae616f65e..9e327d98098 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py @@ -17,12 +17,15 @@ """Featch Weight from Reshape Graph Rewriter.""" -from ..graph_base import GraphRewriterBase +import numpy as np +from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.utils.utility import dump_elapsed_time -from tensorflow.python.framework import dtypes -import numpy as np + +from ..graph_base import GraphRewriterBase + class FetchWeightFromReshapeOptimizer(GraphRewriterBase): """Handle the Pack + Reshape + Conv2D fusion pattern.""" @@ -40,38 +43,40 @@ def do_transformation(self): cur_graph.graph = self.model graph_info = cur_graph.parse_graph() - target_nodes = cur_graph.query_fusion_pattern_nodes([["Pack"],["Reshape"],["Conv2D"]]) + target_nodes = cur_graph.query_fusion_pattern_nodes([["Pack"], ["Reshape"], ["Conv2D"]]) for i, node_combination in enumerate(target_nodes): pack_node = graph_info[node_combination[0]].node reshape_node = graph_info[node_combination[1]].node - shape_node = graph_info[reshape_node.input[1]].node - conv_node = graph_info[node_combination[2]].node - if not (pack_node.op == 'Pack' and reshape_node.op == 'Reshape' and conv_node.op == 'Conv2D'): + shape_node = graph_info[reshape_node.input[1]].node + conv_node = graph_info[node_combination[2]].node + if not (pack_node.op == "Pack" and reshape_node.op == "Reshape" and conv_node.op == "Conv2D"): continue reshape_outputs_length = len(graph_info[node_combination[1]].outputs) unpack_values = [] for index in range(pack_node.attr["N"].i): - values_node = graph_info[pack_node.input[index]].node - if values_node.op == 'Const': + values_node = graph_info[pack_node.input[index]].node + if values_node.op == "Const": unpack_values.append(Helper.values_from_const(values_node)) input_reshape = np.stack(unpack_values, axis=pack_node.attr["axis"].i) - if shape_node.op != 'Const': + if shape_node.op != "Const": continue shape = Helper.values_from_const(shape_node) weight = np.reshape(input_reshape, shape) - weight_node = Helper.create_constant_node(reshape_node.name+'/weight'+'_'+str(i), weight, dtypes.float32) + weight_node = Helper.create_constant_node( + reshape_node.name + "/weight" + "_" + str(i), weight, dtypes.float32 + ) if i > 0: - conv_node_j = graph_info[target_nodes[i-1][2]].node + conv_node_j = graph_info[target_nodes[i - 1][2]].node graph_info[node_combination[1]].outputs.remove(conv_node_j.name) for output in graph_info[node_combination[1]].outputs: successor_node = graph_info[output].node replace_index = None for index, value in enumerate(successor_node.input): - if value == reshape_node.name or value == reshape_node.name+'/weight'+'_'+str(i - 1): + if value == reshape_node.name or value == reshape_node.name + "/weight" + "_" + str(i - 1): replace_index = index break - # weight->conv2d + # weight->conv2d cur_graph.add_node(weight_node, None, [successor_node.name]) successor_node.input[replace_index] = weight_node.name diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py index 70397d0b4c8..61a7c6d6115 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py @@ -17,34 +17,35 @@ """Folding BatchNorm Graph Rewriter.""" import math -import numpy as np -from tensorflow.core.framework import node_def_pb2 -from tensorflow.core.framework import attr_value_pb2 +import numpy as np +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 from tensorflow.python.framework import tensor_util -from neural_compressor.utils.utility import dump_elapsed_time -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase + class FoldBatchNormNodesOptimizer(GraphRewriterBase): """Folding BatchNorm nodes into Conv.""" + INPUT_ORDER = { # Order of inputs for BatchNormWithGlobalNormalization. - "BatchNormWithGlobalNormalization": - ["conv_op", "mean_op", "var_op", "beta_op", "gamma_op"], + "BatchNormWithGlobalNormalization": ["conv_op", "mean_op", "var_op", "beta_op", "gamma_op"], # Order of inputs for FusedBatchNorm. "FusedBatchNorm": ["conv_op", "gamma_op", "beta_op", "mean_op", "var_op"], "FusedBatchNormV3": ["conv_op", "gamma_op", "beta_op", "mean_op", "var_op"], - "_FusedBatchNormEx": ["conv_op", "gamma_op", "beta_op", "mean_op", "var_op"] + "_FusedBatchNormEx": ["conv_op", "gamma_op", "beta_op", "mean_op", "var_op"], } # Name of the attribute epsilon value is stored in. EPSILON_ATTR = { "BatchNormWithGlobalNormalization": "variance_epsilon", "FusedBatchNorm": "epsilon", "FusedBatchNormV3": "epsilon", - "_FusedBatchNormEx": "epsilon" + "_FusedBatchNormEx": "epsilon", } def scale_after_normalization(self, node): @@ -88,14 +89,17 @@ def do_transformation(self): graph_info = cur_graph.parse_graph() target_nodes = cur_graph.query_fusion_pattern_nodes( - [["Conv2D", "DepthwiseConv2dNative"], ("BiasAdd", "Add", "AddV2"), - ["BatchNormWithGlobalNormalization", "FusedBatchNorm", "FusedBatchNormV3", "_FusedBatchNormEx"]]) + [ + ["Conv2D", "DepthwiseConv2dNative"], + ("BiasAdd", "Add", "AddV2"), + ["BatchNormWithGlobalNormalization", "FusedBatchNorm", "FusedBatchNormV3", "_FusedBatchNormEx"], + ] + ) for node_combination in target_nodes: matched_node = node_combination[:-1] has_add_op = True if len(node_combination[-1]) == 3 else False conv_node = graph_info[Helper.node_name_from_input(matched_node[0])].node - weights_node_name = graph_info[Helper.node_name_from_input( - matched_node[0])].node.input[1] + weights_node_name = graph_info[Helper.node_name_from_input(matched_node[0])].node.input[1] weights_node = graph_info[Helper.node_name_from_input(weights_node_name)].node bn_node = graph_info[Helper.node_name_from_input(matched_node[-1])].node @@ -104,14 +108,17 @@ def do_transformation(self): if bn_node.op == "_FusedBatchNormEx": if bn_node.attr["num_side_inputs"].i != 0: continue - if not (bn_node.attr["activation_mode"].s == b"Identity" or - bn_node.attr["activation_mode"].s == b"Relu"): + if not ( + bn_node.attr["activation_mode"].s == b"Identity" or bn_node.attr["activation_mode"].s == b"Relu" + ): continue if weights_node.op != "Const": - self.logger.warning("Didn't find expected conv Constant input to '%s', " - "found %s instead. Maybe freeze_graph wasn't " - "run first?" % (bn_node.name, weights_node_name)) + self.logger.warning( + "Didn't find expected conv Constant input to '%s', " + "found %s instead. Maybe freeze_graph wasn't " + "run first?" % (bn_node.name, weights_node_name) + ) continue weights = Helper.values_from_const(weights_node) @@ -120,8 +127,7 @@ def do_transformation(self): elif conv_node.op == "DepthwiseConv2dNative": channel_count = weights.shape[2] * weights.shape[3] - mean_node_name = Helper.node_name_from_input( - bn_node.input[self.INPUT_ORDER[bn_node.op].index("mean_op")]) + mean_node_name = Helper.node_name_from_input(bn_node.input[self.INPUT_ORDER[bn_node.op].index("mean_op")]) mean_node = graph_info[mean_node_name].node if mean_node.op != "Const": @@ -130,82 +136,74 @@ def do_transformation(self): mean_value = Helper.values_from_const(mean_node) if has_add_op: - bias_node_name = graph_info[Helper.node_name_from_input( - matched_node[1])].node.input[1] + bias_node_name = graph_info[Helper.node_name_from_input(matched_node[1])].node.input[1] bias_node = graph_info[Helper.node_name_from_input(bias_node_name)].node if bias_node.op != "Const": continue - if mean_value.shape != (channel_count, ): + if mean_value.shape != (channel_count,): continue mean_value = mean_value - Helper.values_from_const(bias_node) cur_graph.remove_node(bias_node.name) cur_graph.remove_node(matched_node[1]) - if mean_value.shape != (channel_count, ): - self.logger.warning("Incorrect shape for mean, found {}, expected {}, " - "for node {}.".format(str(mean_value.shape), str( - (channel_count, )), conv_node.name)) + if mean_value.shape != (channel_count,): + self.logger.warning( + "Incorrect shape for mean, found {}, expected {}, " + "for node {}.".format(str(mean_value.shape), str((channel_count,)), conv_node.name) + ) continue - var_node_name = Helper.node_name_from_input( - bn_node.input[self.INPUT_ORDER[bn_node.op].index("var_op")]) + var_node_name = Helper.node_name_from_input(bn_node.input[self.INPUT_ORDER[bn_node.op].index("var_op")]) var_node = graph_info[var_node_name].node if var_node.op != "Const": continue var_value = Helper.values_from_const(var_node) - if var_value.shape != (channel_count, ): + if var_value.shape != (channel_count,): continue - beta_node_name = Helper.node_name_from_input( - bn_node.input[self.INPUT_ORDER[bn_node.op].index("beta_op")]) + beta_node_name = Helper.node_name_from_input(bn_node.input[self.INPUT_ORDER[bn_node.op].index("beta_op")]) beta_node = graph_info[beta_node_name].node if beta_node.op != "Const": continue beta_value = Helper.values_from_const(beta_node) - if beta_value.shape != (channel_count, ): + if beta_value.shape != (channel_count,): continue - gamma_node_name = Helper.node_name_from_input( - bn_node.input[self.INPUT_ORDER[bn_node.op].index("gamma_op")]) + gamma_node_name = Helper.node_name_from_input(bn_node.input[self.INPUT_ORDER[bn_node.op].index("gamma_op")]) gamma_node = graph_info[gamma_node_name].node if gamma_node.op != "Const": continue gamma_value = Helper.values_from_const(gamma_node) - if gamma_value.shape != (channel_count, ): + if gamma_value.shape != (channel_count,): continue variance_epsilon_value = bn_node.attr[self.EPSILON_ATTR[bn_node.op]].f if self.scale_after_normalization(bn_node): - scale_value = ( - (1.0 / np.vectorize(math.sqrt)(var_value + variance_epsilon_value)) * - gamma_value) + scale_value = (1.0 / np.vectorize(math.sqrt)(var_value + variance_epsilon_value)) * gamma_value else: - scale_value = (1.0 / np.vectorize(math.sqrt)(var_value + variance_epsilon_value)) + scale_value = 1.0 / np.vectorize(math.sqrt)(var_value + variance_epsilon_value) offset_value = (-mean_value * scale_value) + beta_value - if conv_node.op == "Conv2D": - original_shape =weights.shape - tmp_shape = (original_shape[-1], int(weights.size/original_shape[-1])) + original_shape = weights.shape + tmp_shape = (original_shape[-1], int(weights.size / original_shape[-1])) tmp_order = [weights.ndim - 1] + [i for i in range(weights.ndim - 1)] scaled_weights = np.copy(weights).transpose(tmp_order).ravel().reshape(tmp_shape) reshape_scale = np.array(scale_value).reshape(len(scale_value), 1) - scaled_weights = np.multiply( - scaled_weights, reshape_scale).transpose().reshape(original_shape) + scaled_weights = np.multiply(scaled_weights, reshape_scale).transpose().reshape(original_shape) elif conv_node.op == "DepthwiseConv2dNative": scaled_weights = np.copy(weights) it = np.nditer(scaled_weights, flags=["multi_index"], op_flags=["readwrite"]) channel_multiplier = weights.shape[3] while not it.finished: - current_scale = scale_value[it.multi_index[2] * channel_multiplier + - it.multi_index[3]] + current_scale = scale_value[it.multi_index[2] * channel_multiplier + it.multi_index[3]] it[0] *= current_scale it.iternext() @@ -214,8 +212,10 @@ def do_transformation(self): scaled_weights_node.name = weights_node_name + "_bn_offset" scaled_weights_node.attr["dtype"].CopyFrom(weights_node.attr["dtype"]) scaled_weights_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - scaled_weights, weights.dtype.type, weights.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(scaled_weights, weights.dtype.type, weights.shape) + ) + ) cur_graph.replace_const_node(scaled_weights_node, [conv_node.name], weights_node_name) offset_node = node_def_pb2.NodeDef() @@ -223,8 +223,10 @@ def do_transformation(self): offset_node.name = conv_node.name + "_bn_offset" offset_node.attr["dtype"].CopyFrom(mean_node.attr["dtype"]) offset_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - offset_value, mean_value.dtype.type, offset_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(offset_value, mean_value.dtype.type, offset_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.op = "BiasAdd" bias_add_node.name = bn_node.name @@ -245,8 +247,9 @@ def do_transformation(self): relu_node.input.extend([bias_add_node.name]) cur_graph.add_node(offset_node, [], [bias_add_node.name]) - cur_graph.add_node(bias_add_node, conv_node.name, - graph_info[Helper.node_name_from_input(matched_node[-1])].outputs) + cur_graph.add_node( + bias_add_node, conv_node.name, graph_info[Helper.node_name_from_input(matched_node[-1])].outputs + ) if bn_node.op == "_FusedBatchNormEx" and bn_node.attr["activation_mode"].s == b"Relu": matchd_node_outputs = graph_info[Helper.node_name_from_input(matched_node[-1])].outputs cur_graph.add_node(offset_node, [], [bias_add_node.name]) @@ -254,8 +257,9 @@ def do_transformation(self): cur_graph.add_node(relu_node, bias_add_node.name, matchd_node_outputs) else: cur_graph.add_node(offset_node, [], [bias_add_node.name]) - cur_graph.add_node(bias_add_node, conv_node.name, - graph_info[Helper.node_name_from_input(matched_node[-1])].outputs) + cur_graph.add_node( + bias_add_node, conv_node.name, graph_info[Helper.node_name_from_input(matched_node[-1])].outputs + ) cur_graph.replace_const_node(scaled_weights_node, [conv_node.name], weights_node_name) cur_graph.remove_node(weights_node_name) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py index e8c36032a5a..0ed704b3060 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py @@ -20,14 +20,16 @@ import numpy as np import tensorflow as tf from tensorflow.python.platform import tf_logging + +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer, GraphRewriterHelper from neural_compressor.utils.utility import dump_elapsed_time from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer, GraphRewriterHelper class GraphFoldConstantOptimizer(GraphRewriterBase): """Folding all the sequences only consist of const and self.supported_op_type.""" + supported_op_type = ["Add", "AddV2", "Const", "Mul", "Rsqrt", "Sub"] def __init__(self, model=None): @@ -70,7 +72,7 @@ def can_broadcast(s1, s2): if end_node.op == "Mul": first_value = self._fold_value(list(end_node.input)[0]) first_type = first_value.dtype - fold_value = np.array(1.).astype(first_type) + fold_value = np.array(1.0).astype(first_type) for index, input in enumerate(end_node.input): # broadcast if needed input_value = self._fold_value(input) @@ -78,45 +80,46 @@ def can_broadcast(s1, s2): if can_broadcast(fold_value, input_value): fold_value = fold_value * input_value else: - raise ValueError("input {} of node {} can't be broadcast".format( - input.name, end_node.name)) + raise ValueError("input {} of node {} can't be broadcast".format(input.name, end_node.name)) return fold_value.astype(first_type) elif end_node.op == "Add" or end_node.op == "AddV2": first_value = self._fold_value(list(end_node.input)[0]) first_type = first_value.dtype - fold_value = np.array(0.).astype(first_type).reshape(()) + fold_value = np.array(0.0).astype(first_type).reshape(()) for index, input in enumerate(end_node.input): # broadcast if needed input_value = self._fold_value(input) if can_broadcast(fold_value, input_value): fold_value = fold_value + input_value else: - raise ValueError("input {} of node {} can't be broadcast".format( - input.name, end_node.name)) + raise ValueError("input {} of node {} can't be broadcast".format(input.name, end_node.name)) return fold_value.astype(first_type) elif end_node.op == "Rsqrt": return 1 / np.sqrt(self._fold_value(end_node.input[0])) elif end_node.op == "Sub": first_value = self._fold_value(list(end_node.input)[0]) first_type = first_value.dtype - fold_value = np.array(0., dtype=first_type) + fold_value = np.array(0.0, dtype=first_type) for index, input in enumerate(end_node.input): # broadcast if needed input_value = self._fold_value(input) if first_type != input_value.dtype: raise ValueError( "input of node {} must be in same dtype but get {}and {}".format( - input.name, first_type, input_value.dtype)) + input.name, first_type, input_value.dtype + ) + ) if can_broadcast(fold_value, input_value): - fold_value = fold_value + (-1)**index * input_value + fold_value = fold_value + (-1) ** index * input_value else: - raise ValueError("input {} of node {} can't be broadcast".format( - input.name, end_node.name)) + raise ValueError("input {} of node {} can't be broadcast".format(input.name, end_node.name)) return fold_value.astype(first_type) else: tf_logging.info( "Currently fold-constant only support limited ops {} but face {}".format( - self.supported_op_type, end_node.op)) + self.supported_op_type, end_node.op + ) + ) else: return GraphRewriterHelper.values_from_const(end_node) @@ -170,9 +173,9 @@ def do_transformation(self): fold_value = self._fold_value(node_name) fold_type = tf.as_dtype(fold_value.dtype) new_constant_node = GraphRewriterHelper.create_constant_node( - node_name + "_const", fold_value, fold_type) - self.graph_analyzer.replace_constant_graph_with_constant_node( - new_constant_node, node_name) + node_name + "_const", fold_value, fold_type + ) + self.graph_analyzer.replace_constant_graph_with_constant_node(new_constant_node, node_name) output_graph_def = self.graph_analyzer.dump_graph() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py index 619c8288361..bd547a2fc7f 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py @@ -17,11 +17,12 @@ """Fuse BiasAdd and Add Graph Rewriter.""" import tensorflow as tf -from ..graph_base import GraphRewriterBase +from tensorflow.python.framework import dtypes, tensor_util + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util + +from ..graph_base import GraphRewriterBase class FuseBiasAddAndAddOptimizer(GraphRewriterBase): @@ -35,7 +36,8 @@ def do_transformation(self): graph_info = cur_graph.parse_graph() target_nodes = cur_graph.query_fusion_pattern_nodes( - [["Conv2D", "Conv3D"], "BiasAdd", ["Add", "AddV2"], ["Relu", "Relu6", "swish_f32"], ["Mul"], ["Mul"]]) + [["Conv2D", "Conv3D"], "BiasAdd", ["Add", "AddV2"], ["Relu", "Relu6", "swish_f32"], ["Mul"], ["Mul"]] + ) for i in target_nodes: biasadd_const_name = graph_info[i[1]].node.input[1] @@ -53,19 +55,16 @@ def do_transformation(self): add_const_node = graph_info[add_node_const_name].node - if add_const_node.op != 'Const': + if add_const_node.op != "Const": continue - value= tensor_util.MakeNdarray( - biasadd_const_node.attr['value'].tensor) - add_value = tensor_util.MakeNdarray( - add_const_node.attr['value'].tensor) - - new_bias_tensor = (value+add_value) - fused_const_node = Helper.create_constant_node( - i[2]+'_fused', new_bias_tensor, dtypes.float32) + value = tensor_util.MakeNdarray(biasadd_const_node.attr["value"].tensor) + add_value = tensor_util.MakeNdarray(add_const_node.attr["value"].tensor) + + new_bias_tensor = value + add_value + fused_const_node = Helper.create_constant_node(i[2] + "_fused", new_bias_tensor, dtypes.float32) cur_graph.remove_node(graph_info[i[1]].node.input[1]) - graph_info[i[1]].node.input[1] = i[2] + '_fused' + graph_info[i[1]].node.input[1] = i[2] + "_fused" cur_graph.remove_node(add_node_const_name) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py index 37ee8599066..f2f4e03f6a8 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py @@ -17,13 +17,14 @@ """Fuse Columnwise Mul Graph Rewriter.""" from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes -from neural_compressor.utils.utility import dump_elapsed_time +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase + class FuseColumnWiseMulOptimizer(GraphRewriterBase): """Fuse Mul op into Conv2D/DepthwiseConv2dNative/MatMul.""" @@ -35,8 +36,7 @@ def do_transformation(self): cur_graph.graph = self.model graph_info = cur_graph.parse_graph() - target_nodes = cur_graph.query_fusion_pattern_nodes( - [["Conv2D", "DepthwiseConv2dNative", "MatMul"], "Mul"]) + target_nodes = cur_graph.query_fusion_pattern_nodes([["Conv2D", "DepthwiseConv2dNative", "MatMul"], "Mul"]) for node_combination in target_nodes: upper_node = graph_info[node_combination[0]].node @@ -47,18 +47,22 @@ def do_transformation(self): mul_value_node = graph_info[graph_info[node_combination[1]].node.input[1]].node upper_node_type = upper_node.op - if upper_node_type == 'Conv2D': - weights_col = weights_node.attr['value'].tensor.tensor_shape.dim[3].size - elif upper_node_type == 'DepthwiseConv2dNative': - weights_col = weights_node.attr['value'].tensor.tensor_shape.dim[2].size * \ - weights_node.attr['value'].tensor.tensor_shape.dim[3].size + if upper_node_type == "Conv2D": + weights_col = weights_node.attr["value"].tensor.tensor_shape.dim[3].size + elif upper_node_type == "DepthwiseConv2dNative": + weights_col = ( + weights_node.attr["value"].tensor.tensor_shape.dim[2].size + * weights_node.attr["value"].tensor.tensor_shape.dim[3].size + ) else: - weights_col = weights_node.attr['value'].tensor.tensor_shape.dim[1].size + weights_col = weights_node.attr["value"].tensor.tensor_shape.dim[1].size - mul_value_node_tensor = mul_value_node.attr['value'].tensor - weights_node_tensor = weights_node.attr['value'].tensor - if len(mul_value_node_tensor.tensor_shape.dim - ) != 1 or mul_value_node_tensor.tensor_shape.dim[0].size != weights_col: + mul_value_node_tensor = mul_value_node.attr["value"].tensor + weights_node_tensor = weights_node.attr["value"].tensor + if ( + len(mul_value_node_tensor.tensor_shape.dim) != 1 + or mul_value_node_tensor.tensor_shape.dim[0].size != weights_col + ): self.logger.warning("Invalid Mul OP fusion.") return self.model @@ -68,10 +72,13 @@ def do_transformation(self): new_weights_value = i * mul_value_node_list[index % len(mul_value_node_list)] new_weights.append(new_weights_value) - weights_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - new_weights, dtypes.float32, - tensor_util.MakeNdarray(weights_node_tensor).shape))) + weights_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + new_weights, dtypes.float32, tensor_util.MakeNdarray(weights_node_tensor).shape + ) + ) + ) cur_graph.remove_node_with_single_input_output(mul_node.name) cur_graph.remove_node(mul_node.input[1]) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py index c6a509cb585..cfc60de139a 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py @@ -17,14 +17,13 @@ """Fuse Conv with Math Graph Rewriter.""" import numpy as np +from tensorflow.python.framework import dtypes, tensor_util -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer +from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.utils.utility import dump_elapsed_time from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper class FuseConvWithMathOptimizer(GraphRewriterBase): @@ -40,13 +39,14 @@ class FuseConvWithMathOptimizer(GraphRewriterBase): | | BiasAdd BiasAdd """ + @dump_elapsed_time("Pass FuseConvWithMathOptimizer") def do_transformation(self): """Fuse Conv + Sub + RealDiv + Mul + BiasAdd to Conv + BiasAdd.""" g = GraphAnalyzer() g.graph = self.model graph_info = g.parse_graph() - pattern_definition = [['Conv2D'], ['Sub'], ['RealDiv'], ['Mul'], ['BiasAdd']] + pattern_definition = [["Conv2D"], ["Sub"], ["RealDiv"], ["Mul"], ["BiasAdd"]] target_nodes = g.query_fusion_pattern_nodes(pattern_definition) for i in target_nodes: weights_node_name = graph_info[i[0]].node.input[1] @@ -55,44 +55,43 @@ def do_transformation(self): sub_input_names = list(graph_info[i[1]].node.input) sub_content_node_name = list(set(sub_input_names).difference([i[0]]))[0] sub_content_node = graph_info[sub_content_node_name].node - sub_tensor = tensor_util.MakeNdarray(sub_content_node.attr['value'].tensor) + sub_tensor = tensor_util.MakeNdarray(sub_content_node.attr["value"].tensor) real_div_input_names = list(graph_info[i[2]].node.input) real_div_content_node_name = list(set(real_div_input_names).difference([i[1]]))[0] real_div_node = graph_info[real_div_content_node_name].node - real_div_tensor = tensor_util.MakeNdarray(real_div_node.attr['value'].tensor) + real_div_tensor = tensor_util.MakeNdarray(real_div_node.attr["value"].tensor) mul_input_names = list(graph_info[i[3]].node.input) mul_content_node_name = list(set(mul_input_names).difference([i[2]]))[0] mul_content_node = graph_info[mul_content_node_name].node - mul_tensor = tensor_util.MakeNdarray(mul_content_node.attr['value'].tensor) + mul_tensor = tensor_util.MakeNdarray(mul_content_node.attr["value"].tensor) bias_input_names = list(graph_info[i[4]].node.input) bias_content_node_name = list(set(bias_input_names).difference([i[3]]))[0] bias_content_node = graph_info[bias_content_node_name].node - bias_tensor = tensor_util.MakeNdarray(bias_content_node.attr['value'].tensor) + bias_tensor = tensor_util.MakeNdarray(bias_content_node.attr["value"].tensor) - bias_offset_value = bias_tensor - sub_tensor*mul_tensor / real_div_tensor + bias_offset_value = bias_tensor - sub_tensor * mul_tensor / real_div_tensor weights_offset = mul_tensor / real_div_tensor weights = Helper.values_from_const(weights_node) original_shape = weights.shape - tmp_shape = (original_shape[-1], int(weights.size/original_shape[-1])) + tmp_shape = (original_shape[-1], int(weights.size / original_shape[-1])) tmp_order = [weights.ndim - 1] + [i for i in range(weights.ndim - 1)] scaled_weights = np.copy(weights).transpose(tmp_order).ravel().reshape(tmp_shape) reshape_scale = np.array(weights_offset).reshape(len(weights_offset), 1) - scaled_weights = np.multiply( - scaled_weights, reshape_scale).transpose().reshape(original_shape) + scaled_weights = np.multiply(scaled_weights, reshape_scale).transpose().reshape(original_shape) scaled_weight_name = weights_node_name + "_conv_math_offset" - scaled_weights_node = Helper.create_constant_node(scaled_weight_name, - scaled_weights, dtypes.float32, shape=weights.shape) + scaled_weights_node = Helper.create_constant_node( + scaled_weight_name, scaled_weights, dtypes.float32, shape=weights.shape + ) g.add_node(scaled_weights_node, None, [i[0]]) g.replace_const_node(scaled_weights_node, [i[0]], weights_node_name) - offset_node = Helper.create_constant_node(i[0] + "_biasadd_math_offset", - bias_offset_value, dtypes.float32) + offset_node = Helper.create_constant_node(i[0] + "_biasadd_math_offset", bias_offset_value, dtypes.float32) g.add_node(offset_node, None, [i[4]]) graph_info[i[4]].node.input[0] = i[0] diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py index 5617241205b..b79b35e3797 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py @@ -21,20 +21,17 @@ import re import numpy as np - -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import dtypes from tensorflow.compat.v1 import graph_util -from tensorflow.python.framework import tensor_util +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from tensorflow.python.platform import flags as flags_lib from tensorflow.python.platform import tf_logging from tensorflow.python.tools import strip_unused_lib from neural_compressor.utils.utility import dump_elapsed_time -class FuseDecomposedBNOptimizer(): + +class FuseDecomposedBNOptimizer: """Fuse decomposed small ops to BatchNormalization.""" def __init__(self, input_graph_def): @@ -149,8 +146,7 @@ def do_transformation(self): if "data_format" in node.attr.keys(): data_format = node.attr["data_format"] if data_format is not None and data_format.s != b"NHWC": - tf_logging.warn("%s in %s format, not candidate for batchnorm fusion." - % (node.name, data_format.s)) + tf_logging.warn("%s in %s format, not candidate for batchnorm fusion." % (node.name, data_format.s)) return self.input_graph_def else: continue @@ -177,21 +173,21 @@ def do_transformation(self): # Mul (input, Mul) input_data_op = node_from_map(input_node_map, data_scale_mul_op.input[0]) # Workaround for model ava-person-vehicle-detection-stage2-2_0_0 - # FusedBatchNorm requires a 4D Tensor for input data, + # FusedBatchNorm requires a 4D Tensor for input data, # but the MatMul before FusedBatchNorm only support 2D output. # Don't fuse the small ops to FusedBatchNorm when the upstream has MatMul. - if input_data_op.op == 'MatMul': + if input_data_op.op == "MatMul": continue - + # Workaround for DIEN_Deep-Interest-Evolution-Network - if input_data_op.op == 'ConcatV2' and input_data_op.name == 'concat_8': + if input_data_op.op == "ConcatV2" and input_data_op.name == "concat_8": continue - + if input_data_op.input: ancestor_input_data_op = node_from_map(input_node_map, input_data_op.input[0]) if ancestor_input_data_op.op == "MatMul": continue - + scale_op = node_from_map(input_node_map, data_scale_mul_op.input[1]) if scale_op.op == "Rsqrt": @@ -201,8 +197,7 @@ def do_transformation(self): elif scale_op.op == "Mul": # Mul (Rsqrt, Constant_gamma) rsqrt_op = node_from_map(input_node_map, scale_op.input[0]) - gamma_op, gamma_reshape_op = bypass_reshape(input_node_map, - scale_op.input[1]) + gamma_op, gamma_reshape_op = bypass_reshape(input_node_map, scale_op.input[1]) if rsqrt_op.op != "Rsqrt": continue if gamma_op.op != "Const" or get_const_dim_count(gamma_op) != 1: @@ -211,21 +206,18 @@ def do_transformation(self): continue # Sub (Constant_beta, Mul) - beta_op, beta_reshape_op = bypass_reshape(input_node_map, - bias_mean_sub_op.input[0]) + beta_op, beta_reshape_op = bypass_reshape(input_node_map, bias_mean_sub_op.input[0]) mean_scale_mul_op = node_from_map(input_node_map, bias_mean_sub_op.input[1]) if mean_scale_mul_op.op != "Mul": continue if beta_op.op != "Const" or get_const_dim_count(beta_op) != 1: continue - # Common scale applies to both input and running mean - if scale_op != node_from_map(input_node_map, - mean_scale_mul_op.input[1]): + # Common scale applies to both input and running mean + if scale_op != node_from_map(input_node_map, mean_scale_mul_op.input[1]): continue - mean_op, mean_reshape_op = bypass_reshape(input_node_map, - mean_scale_mul_op.input[0]) + mean_op, mean_reshape_op = bypass_reshape(input_node_map, mean_scale_mul_op.input[0]) if mean_op.op != "Const" or get_const_dim_count(mean_op) != 1: continue @@ -234,10 +226,8 @@ def do_transformation(self): if variance_epsilon_add_op.op != "Add": continue - variance_op, variance_reshape_op = bypass_reshape( - input_node_map, variance_epsilon_add_op.input[0]) - epsilon_op = node_from_map(input_node_map, - variance_epsilon_add_op.input[1]) + variance_op, variance_reshape_op = bypass_reshape(input_node_map, variance_epsilon_add_op.input[0]) + epsilon_op = node_from_map(input_node_map, variance_epsilon_add_op.input[1]) if epsilon_op.op != "Const" or get_const_dim_count(epsilon_op) != 0: continue if variance_op.op != "Const" or get_const_dim_count(variance_op) != 1: @@ -274,24 +264,25 @@ def do_transformation(self): gamma_op.attr["dtype"].CopyFrom(beta_op.attr["dtype"]) beta_value = values_from_const(beta_op) gamma_op.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - 1, beta_value.dtype.type, beta_value.shape, - allow_broadcast=True))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + 1, beta_value.dtype.type, beta_value.shape, allow_broadcast=True + ) + ) + ) new_ops.append(gamma_op) new_fused_batchnorm_op = node_def_pb2.NodeDef() new_fused_batchnorm_op.op = "FusedBatchNorm" new_fused_batchnorm_op.name = node.name new_fused_batchnorm_op.attr["T"].CopyFrom(node.attr["T"]) - new_fused_batchnorm_op.attr["is_training"].CopyFrom( - attr_value_pb2.AttrValue(b=False)) - new_fused_batchnorm_op.attr["epsilon"].CopyFrom( - attr_value_pb2.AttrValue(f=epsilon.tolist())) + new_fused_batchnorm_op.attr["is_training"].CopyFrom(attr_value_pb2.AttrValue(b=False)) + new_fused_batchnorm_op.attr["epsilon"].CopyFrom(attr_value_pb2.AttrValue(f=epsilon.tolist())) if data_format is not None: new_fused_batchnorm_op.attr["data_format"].CopyFrom(data_format) - new_fused_batchnorm_op.input.extend([input_data_op.name, gamma_op.name, - beta_op.name, mean_op.name, - variance_op.name]) + new_fused_batchnorm_op.input.extend( + [input_data_op.name, gamma_op.name, beta_op.name, mean_op.name, variance_op.name] + ) new_ops.append(new_fused_batchnorm_op) @@ -312,6 +303,7 @@ def do_transformation(self): result_graph_def.versions.CopyFrom(self.input_graph_def.versions) return result_graph_def + def node_name_from_input(node_name): """Strips off ports and other decorations to get the underlying node name.""" if node_name.startswith("^"): @@ -321,6 +313,7 @@ def node_name_from_input(node_name): node_name = m.group(1) return node_name + def node_from_map(node_map, name): """Pulls a node def from a dictionary for a given name. @@ -339,6 +332,7 @@ def node_from_map(node_map, name): raise ValueError("No node named '%s' found in map." % name) return node_map[stripped_name] + def values_from_const(node_def): """Extracts the values from a const NodeDef as a numpy ndarray. @@ -352,47 +346,47 @@ def values_from_const(node_def): ValueError: If the node isn't a Const. """ if node_def.op != "Const": - raise ValueError( - "Can not extract constant value from a node that is not Const. Got:\n" - f"{node_def}") + raise ValueError("Can not extract constant value from a node that is not Const. Got:\n" f"{node_def}") input_tensor = node_def.attr["value"].tensor tensor_value = tensor_util.MakeNdarray(input_tensor) return tensor_value + def valid_reshape_inputs(reshape_in0_ndef, reshape_in1_ndef): """Check if the inputs of the Reshape are valid.""" - if reshape_in0_ndef.op != "Const" or reshape_in1_ndef.op != "Const" \ - or get_const_dim_count(reshape_in0_ndef) != 1: + if reshape_in0_ndef.op != "Const" or reshape_in1_ndef.op != "Const" or get_const_dim_count(reshape_in0_ndef) != 1: return False input0_vec_size = values_from_const(reshape_in0_ndef).shape[0] const_value = values_from_const(reshape_in1_ndef) shape_ndims = const_value.ndim if shape_ndims != 1: - raise ValueError("Num of dims of the shape must be 1, got {}.".format( - shape_ndims)) + raise ValueError("Num of dims of the shape must be 1, got {}.".format(shape_ndims)) for value in const_value.tolist()[:-1]: if value != 1: return False - if (const_value.tolist()[-1] != input0_vec_size): + if const_value.tolist()[-1] != input0_vec_size: return False return True + def bypass_reshape(input_node_map, input_name): """Get Reshape input nodes.""" reshape_ndef = None maybe_reshape_ndef = node_from_map(input_node_map, input_name) input_ndef = maybe_reshape_ndef if maybe_reshape_ndef.op == "Reshape": - reshpae_input0_ndef = node_from_map(input_node_map, - maybe_reshape_ndef.input[0]) - reshpae_input1_ndef = node_from_map(input_node_map, - maybe_reshape_ndef.input[1]) - if reshpae_input0_ndef.op == "Const" and reshpae_input1_ndef.op == "Const" \ - and valid_reshape_inputs(reshpae_input0_ndef, reshpae_input1_ndef): + reshpae_input0_ndef = node_from_map(input_node_map, maybe_reshape_ndef.input[0]) + reshpae_input1_ndef = node_from_map(input_node_map, maybe_reshape_ndef.input[1]) + if ( + reshpae_input0_ndef.op == "Const" + and reshpae_input1_ndef.op == "Const" + and valid_reshape_inputs(reshpae_input0_ndef, reshpae_input1_ndef) + ): input_ndef = reshpae_input0_ndef reshape_ndef = maybe_reshape_ndef return input_ndef, reshape_ndef + def get_const_dim_count(node_def): """Get the number of dimensions for a Const node. diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py index 5ef58a88d2e..ff30d9fd269 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py @@ -18,16 +18,15 @@ import re -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from tensorflow.python.platform import tf_logging + from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper from neural_compressor.utils.utility import dump_elapsed_time -class FuseDecomposedINOptimizer(): # pragma: no cover + +class FuseDecomposedINOptimizer: # pragma: no cover """Fuse decomposed small ops into InstanceNorm.""" def __init__(self, input_graph_def): @@ -125,8 +124,7 @@ def do_transformation(self): # Mul1 (Rsqrt, Constant_gamma) if scale_op.op == "Mul": rsqrt_op = node_from_map(input_node_map, scale_op.input[0]) - gamma_op, gamma_reshape_op = bypass_reshape(input_node_map, - scale_op.input[1]) + gamma_op, gamma_reshape_op = bypass_reshape(input_node_map, scale_op.input[1]) if rsqrt_op.op != "Rsqrt": continue if gamma_op.op != "Const": @@ -135,8 +133,7 @@ def do_transformation(self): continue # Sub (Constant_beta, Mul2) - beta_op, beta_reshape_op = bypass_reshape(input_node_map, - bias_mean_sub_op.input[0]) + beta_op, beta_reshape_op = bypass_reshape(input_node_map, bias_mean_sub_op.input[0]) mean_scale_mul_op = node_from_map(input_node_map, bias_mean_sub_op.input[1]) if mean_scale_mul_op.op != "Mul": continue @@ -144,12 +141,10 @@ def do_transformation(self): continue # Common scale applies to both input and running mean - if scale_op != node_from_map(input_node_map, - mean_scale_mul_op.input[1]): + if scale_op != node_from_map(input_node_map, mean_scale_mul_op.input[1]): continue - mean_op, mean_reshape_op = bypass_reshape(input_node_map, - mean_scale_mul_op.input[0]) + mean_op, mean_reshape_op = bypass_reshape(input_node_map, mean_scale_mul_op.input[0]) if mean_op.op != "Mean": continue @@ -158,10 +153,8 @@ def do_transformation(self): if variance_epsilon_add_op.op != "Add" and variance_epsilon_add_op.op != "AddV2": continue - variance_op, variance_reshape_op = bypass_reshape( - input_node_map, variance_epsilon_add_op.input[0]) - epsilon_op = node_from_map(input_node_map, - variance_epsilon_add_op.input[1]) + variance_op, variance_reshape_op = bypass_reshape(input_node_map, variance_epsilon_add_op.input[0]) + epsilon_op = node_from_map(input_node_map, variance_epsilon_add_op.input[1]) if epsilon_op.op != "Const": continue if variance_op.op != "Mean": @@ -172,9 +165,9 @@ def do_transformation(self): # Mean (SquaredDifference, Constant_r_indices0) squared_diff_op, squared_reshape_op = bypass_reshape(input_node_map, variance_op.input[0]) r_indices0_op = node_from_map(input_node_map, variance_op.input[1]) - if squared_diff_op.op != 'SquaredDifference': + if squared_diff_op.op != "SquaredDifference": continue - if r_indices0_op.op != 'Const': + if r_indices0_op.op != "Const": continue if input_data_op != node_from_map(input_node_map, squared_diff_op.input[0]): @@ -187,12 +180,16 @@ def do_transformation(self): continue r_indices1_op = node_from_map(input_node_map, mean_op.input[1]) - if r_indices1_op.op != 'Const': + if r_indices1_op.op != "Const": continue r_indices1 = values_from_const(r_indices1_op) - if r_indices1.tolist() != [1, 2] and r_indices1.tolist() != [2, 3] and \ - r_indices1.tolist() != [1, 2, 3] and r_indices1.tolist() != [2, 3, 4]: + if ( + r_indices1.tolist() != [1, 2] + and r_indices1.tolist() != [2, 3] + and r_indices1.tolist() != [1, 2, 3] + and r_indices1.tolist() != [2, 3, 4] + ): continue nodes_to_skip[node.name] = True @@ -226,33 +223,34 @@ def do_transformation(self): gamma_op.attr["dtype"].CopyFrom(beta_op.attr["dtype"]) beta_value = values_from_const(beta_op) gamma_op.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - 1, beta_value.dtype.type, beta_value.shape, - allow_broadcast=True))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + 1, beta_value.dtype.type, beta_value.shape, allow_broadcast=True + ) + ) + ) new_ops.append(gamma_op) new_fused_instancenorm_op = node_def_pb2.NodeDef() new_fused_instancenorm_op.op = "_MklFusedInstanceNorm" new_fused_instancenorm_op.name = node.name new_fused_instancenorm_op.attr["T"].CopyFrom(node.attr["T"]) - new_fused_instancenorm_op.attr["epsilon"].CopyFrom( - attr_value_pb2.AttrValue(f=epsilon.tolist())) + new_fused_instancenorm_op.attr["epsilon"].CopyFrom(attr_value_pb2.AttrValue(f=epsilon.tolist())) list_value = attr_value_pb2.AttrValue.ListValue(i=r_indices1.flatten()) - new_fused_instancenorm_op.attr["reduction_axes"].CopyFrom( - attr_value_pb2.AttrValue(list=list_value)) + new_fused_instancenorm_op.attr["reduction_axes"].CopyFrom(attr_value_pb2.AttrValue(list=list_value)) # Mean and variance values will be computed at runtime for fp32 & bf16 input. # Pass a "dummy" node for mean and variance. mean_variance_dim = tensor_util.MakeNdarray(gamma_op.attr["value"].tensor).shape[-1] - dummy_mean_node = \ - helper.create_constant_node(node.name + '_dummy_mean', - [0.]*mean_variance_dim, dtypes.float32) - dummy_variance_node = \ - helper.create_constant_node(node.name + '_dummy_variance', - [1.]*mean_variance_dim, dtypes.float32) - new_fused_instancenorm_op.input.extend([input_data_op.name, gamma_op.name, - beta_op.name, dummy_mean_node.name, - dummy_variance_node.name]) + dummy_mean_node = helper.create_constant_node( + node.name + "_dummy_mean", [0.0] * mean_variance_dim, dtypes.float32 + ) + dummy_variance_node = helper.create_constant_node( + node.name + "_dummy_variance", [1.0] * mean_variance_dim, dtypes.float32 + ) + new_fused_instancenorm_op.input.extend( + [input_data_op.name, gamma_op.name, beta_op.name, dummy_mean_node.name, dummy_variance_node.name] + ) new_ops.append(dummy_mean_node) new_ops.append(dummy_variance_node) new_ops.append(new_fused_instancenorm_op) @@ -274,6 +272,7 @@ def do_transformation(self): result_graph_def.versions.CopyFrom(self.input_graph_def.versions) return result_graph_def + def node_name_from_input(node_name): """Strips off ports and other decorations to get the underlying node name.""" if node_name.startswith("^"): @@ -283,6 +282,7 @@ def node_name_from_input(node_name): node_name = m.group(1) return node_name + def node_from_map(node_map, name): """Pulls a node def from a dictionary for a given name. @@ -301,6 +301,7 @@ def node_from_map(node_map, name): raise ValueError("No node named '%s' found in map." % name) return node_map[stripped_name] + def values_from_const(node_def): """Extracts the values from a const NodeDef as a numpy ndarray. @@ -314,47 +315,47 @@ def values_from_const(node_def): ValueError: If the node isn't a Const. """ if node_def.op != "Const": - raise ValueError( - "Can not extract constant value from a node that is not Const. Got:\n" - f"{node_def}") + raise ValueError("Can not extract constant value from a node that is not Const. Got:\n" f"{node_def}") input_tensor = node_def.attr["value"].tensor tensor_value = tensor_util.MakeNdarray(input_tensor) return tensor_value + def valid_reshape_inputs(reshape_in0_ndef, reshape_in1_ndef): """Check if the inputs of the Reshape are valid.""" - if reshape_in0_ndef.op != "Const" or reshape_in1_ndef.op != "Const" \ - or get_const_dim_count(reshape_in0_ndef) != 1: + if reshape_in0_ndef.op != "Const" or reshape_in1_ndef.op != "Const" or get_const_dim_count(reshape_in0_ndef) != 1: return False input0_vec_size = values_from_const(reshape_in0_ndef).shape[0] const_value = values_from_const(reshape_in1_ndef) shape_ndims = const_value.ndim if shape_ndims != 1: - raise ValueError("Num of dims of the shape must be 1, got {}.".format( - shape_ndims)) + raise ValueError("Num of dims of the shape must be 1, got {}.".format(shape_ndims)) for value in const_value.tolist()[:-1]: if value != 1: return False - if (const_value.tolist()[-1] != input0_vec_size): + if const_value.tolist()[-1] != input0_vec_size: return False return True + def bypass_reshape(input_node_map, input_name): """Get Reshape input nodes.""" reshape_ndef = None maybe_reshape_ndef = node_from_map(input_node_map, input_name) input_ndef = maybe_reshape_ndef if maybe_reshape_ndef.op == "Reshape": - reshpae_input0_ndef = node_from_map(input_node_map, - maybe_reshape_ndef.input[0]) - reshpae_input1_ndef = node_from_map(input_node_map, - maybe_reshape_ndef.input[1]) - if reshpae_input0_ndef.op == "Const" and reshpae_input1_ndef.op == "Const" \ - and valid_reshape_inputs(reshpae_input0_ndef, reshpae_input1_ndef): + reshpae_input0_ndef = node_from_map(input_node_map, maybe_reshape_ndef.input[0]) + reshpae_input1_ndef = node_from_map(input_node_map, maybe_reshape_ndef.input[1]) + if ( + reshpae_input0_ndef.op == "Const" + and reshpae_input1_ndef.op == "Const" + and valid_reshape_inputs(reshpae_input0_ndef, reshpae_input1_ndef) + ): input_ndef = reshpae_input0_ndef reshape_ndef = maybe_reshape_ndef return input_ndef, reshape_ndef + def get_const_dim_count(node_def): """Get the number of dimensions for a Const node. diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py index ea13e9bc3ec..5fc0a239962 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py @@ -17,35 +17,38 @@ """Fuse small ops to Gelu Graph Rewriter.""" import tensorflow as tf -from ..graph_base import GraphRewriterBase +from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -from tensorflow.python.framework import dtypes from neural_compressor.adaptor.tf_utils.util import TF_SPR_BASE_VERSIONS +from ..graph_base import GraphRewriterBase + -class FuseGeluOptimizer(GraphRewriterBase): # pragma: no cover +class FuseGeluOptimizer(GraphRewriterBase): # pragma: no cover """Fuse Sqrt + RealDiv + Erf + AddV2 + Mul + Mul into Gelu op.""" def do_transformation(self): """Execute the fusion from small ops to Gelu.""" - if not (tf.version.VERSION in ('1.15.0-up2','1.15.0-up3') or \ - tf.version.VERSION in TF_SPR_BASE_VERSIONS): + if not (tf.version.VERSION in ("1.15.0-up2", "1.15.0-up3") or tf.version.VERSION in TF_SPR_BASE_VERSIONS): return self.model cur_graph = GraphAnalyzer() cur_graph.graph = self.model graph_info = cur_graph.parse_graph() - #Below code is relative to expression on + # Below code is relative to expression on # https://github.com/IntelAI/models/blob/master/models/language_modeling/tensorflow/ # bert_large/inference/generic_ops.py#L105 target_nodes = cur_graph.query_fusion_pattern_nodes( - [["Pow"], ["Mul"], ["AddV2"], ["Mul"], ["Tanh"], ["AddV2"], ["Mul"], ['Mul']]) + [["Pow"], ["Mul"], ["AddV2"], ["Mul"], ["Tanh"], ["AddV2"], ["Mul"], ["Mul"]] + ) if not target_nodes: target_nodes = cur_graph.query_fusion_pattern_nodes( - [["Pow"], ["Mul"], ["AddV2"], ["Mul"], ["Tanh"], ["AddV2"], ["Mul"]]) + [["Pow"], ["Mul"], ["AddV2"], ["Mul"], ["Tanh"], ["AddV2"], ["Mul"]] + ) for node_combination in target_nodes: match_node_length = len(node_combination) @@ -66,12 +69,12 @@ def do_transformation(self): for i in pow_node.input: node_name = Helper.node_name_from_input(i) - if graph_info[node_name].node.op != 'Const': + if graph_info[node_name].node.op != "Const": gelu_input_name = i - if graph_info[node_name].node.op == 'Const': + if graph_info[node_name].node.op == "Const": pow_const_node_name = i - pow_value = graph_info[node_name].node.attr['value'].tensor.float_val[0] + pow_value = graph_info[node_name].node.attr["value"].tensor.float_val[0] break if pow_value != 3: @@ -80,9 +83,9 @@ def do_transformation(self): mul_1_const_node_name = None for i in mul_1_node.input: i = Helper.node_name_from_input(i) - if i != pow_node.name and graph_info[i].node.op == 'Const': + if i != pow_node.name and graph_info[i].node.op == "Const": mul_1_const_node_name = i - mul_1_value = graph_info[i].node.attr['value'].tensor.float_val[0] + mul_1_value = graph_info[i].node.attr["value"].tensor.float_val[0] break if mul_1_value != 0.044714998453855515: continue @@ -91,9 +94,9 @@ def do_transformation(self): mul_2_const_node_name = None for i in mul_2_node.input: i = Helper.node_name_from_input(i) - if i != addv2_1_node.name and graph_info[i].node.op == 'Const': + if i != addv2_1_node.name and graph_info[i].node.op == "Const": mul_2_const_node_name = i - mul_2_value = graph_info[i].node.attr['value'].tensor.float_val[0] + mul_2_value = graph_info[i].node.attr["value"].tensor.float_val[0] break if mul_2_value != 0.7978845834732056: continue @@ -102,9 +105,9 @@ def do_transformation(self): addv2_2_const_node_name = None for i in addv2_2_node.input: i = Helper.node_name_from_input(i) - if i != tanh_node.name and graph_info[i].node.op == 'Const': + if i != tanh_node.name and graph_info[i].node.op == "Const": addv2_2_const_node_name = i - addv2_2_value = graph_info[i].node.attr['value'].tensor.float_val[0] + addv2_2_value = graph_info[i].node.attr["value"].tensor.float_val[0] break if addv2_2_value != 1: continue @@ -117,7 +120,7 @@ def do_transformation(self): rest_mul_node = graph_info[i].node break - if not rest_mul_node or rest_mul_node.op != 'Mul': + if not rest_mul_node or rest_mul_node.op != "Mul": continue else: rest_mul_node = graph_info[node_combination[6]].node @@ -126,8 +129,7 @@ def do_transformation(self): rest_mul_const_node_name = None for i in rest_mul_node.input: i = Helper.node_name_from_input(i) - if graph_info[i].node.op == 'Const' and \ - graph_info[i].node.attr['value'].tensor.float_val[0] == 0.5: + if graph_info[i].node.op == "Const" and graph_info[i].node.attr["value"].tensor.float_val[0] == 0.5: rest_mul_const_node_name = i rest_mul_valid = True break @@ -157,10 +159,10 @@ def do_transformation(self): cur_graph.add_node(gelu_node, gelu_input_name, original_last) target_nodes = cur_graph.query_fusion_pattern_nodes( - [["Sqrt"], ["RealDiv"], ["Erf"], ["AddV2"], ["Mul"], ["Mul"]]) + [["Sqrt"], ["RealDiv"], ["Erf"], ["AddV2"], ["Mul"], ["Mul"]] + ) for node_combination in target_nodes: - sqrt_node = graph_info[node_combination[0]].node realdiv_node = graph_info[node_combination[1]].node erf_node = graph_info[node_combination[2]].node @@ -169,7 +171,7 @@ def do_transformation(self): mul2_node = graph_info[node_combination[5]].node sqrt_input_name = Helper.node_name_from_input(sqrt_node.input[0]) - sqrt_value = graph_info[sqrt_input_name].node.attr['value'].tensor.float_val[0] + sqrt_value = graph_info[sqrt_input_name].node.attr["value"].tensor.float_val[0] if sqrt_value != 2: continue @@ -188,7 +190,7 @@ def do_transformation(self): for i in addv2_node.input: i = Helper.node_name_from_input(i) if i != erf_node.name: - addv2_value = graph_info[i].node.attr['value'].tensor.float_val[0] + addv2_value = graph_info[i].node.attr["value"].tensor.float_val[0] addv2_const_name = i break @@ -199,7 +201,7 @@ def do_transformation(self): for i in mul1_node.input: i = Helper.node_name_from_input(i) if i != addv2_node.name: - mul1_value = graph_info[i].node.attr['value'].tensor.float_val[0] + mul1_value = graph_info[i].node.attr["value"].tensor.float_val[0] mul1_const_name = i break diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py index 62c5feb1b65..7b87fc1b0f3 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py @@ -17,19 +17,20 @@ """Fuse samll ops to LayerNorm Graph Rewriter.""" import re -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 + +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 from tensorflow.python.framework import tensor_util from neural_compressor.utils.utility import dump_elapsed_time -class FuseLayerNormOptimizer(): # pragma: no cover + +class FuseLayerNormOptimizer: # pragma: no cover """Remap smaller ops into fused LayerNorm. Current fusion is only for the case, when LayerNormalization uses FusedBatcNormV3. And further restrict it to only 2D or 3D tensor inputs to keras LayerNormalization api. """ + def __init__(self, input_graph_def): """Constructor.""" self.input_graph_def = input_graph_def @@ -123,21 +124,21 @@ def do_transformation(self): # FusedBatchNormV3(Reshape, Fill, Fill, Mean, Variance) pre_reshape_op = node_from_map(input_node_map, fused_batch_norm_op.input[0]) - if pre_reshape_op.op != 'Reshape': + if pre_reshape_op.op != "Reshape": continue fill_scale_op = node_from_map(input_node_map, fused_batch_norm_op.input[1]) - if fill_scale_op.op != 'Fill': + if fill_scale_op.op != "Fill": continue fill_offset_op = node_from_map(input_node_map, fused_batch_norm_op.input[2]) - if fill_offset_op.op != 'Fill': + if fill_offset_op.op != "Fill": continue # FusedBatchNorm node should have mean/variance as empty constant mean_op = node_from_map(input_node_map, fused_batch_norm_op.input[3]) - if mean_op.op != 'Const': + if mean_op.op != "Const": continue variance_op = node_from_map(input_node_map, fused_batch_norm_op.input[4]) - if variance_op.op != 'Const': + if variance_op.op != "Const": continue mean_value = values_from_const(mean_op) if mean_value.any(): @@ -153,13 +154,13 @@ def do_transformation(self): # Fill Scale(*dims_fill_scale, unit_gamma) dims_fill_scale_op = node_from_map(input_node_map, fill_scale_op.input[0]) unit_gamma_op = node_from_map(input_node_map, fill_scale_op.input[1]) - if unit_gamma_op.op != 'Const': + if unit_gamma_op.op != "Const": continue # Fill Offset(*dims_fill_scale, unit_gamma) dims_fill_offset_op = node_from_map(input_node_map, fill_offset_op.input[0]) zero_beta_op = node_from_map(input_node_map, fill_offset_op.input[1]) - if zero_beta_op.op != 'Const': + if zero_beta_op.op != "Const": continue nodes_to_skip[node.name] = True @@ -169,13 +170,11 @@ def do_transformation(self): nodes_to_skip[fill_scale_op.name] = True nodes_to_skip[fill_offset_op.name] = True - new_fused_layernorm_op = node_def_pb2.NodeDef() new_fused_layernorm_op.op = "_MklLayerNorm" new_fused_layernorm_op.name = node.name new_fused_layernorm_op.attr["T"].CopyFrom(node.attr["T"]) - new_fused_layernorm_op.input.extend([input_op.name, gamma_op.name, - beta_op.name]) + new_fused_layernorm_op.input.extend([input_op.name, gamma_op.name, beta_op.name]) new_ops.append(new_fused_layernorm_op) @@ -196,6 +195,7 @@ def do_transformation(self): result_graph_def.versions.CopyFrom(self.input_graph_def.versions) return result_graph_def + def node_name_from_input(node_name): # pragma: no cover """Strips off ports and other decorations to get the underlying node name.""" if node_name.startswith("^"): @@ -205,6 +205,7 @@ def node_name_from_input(node_name): # pragma: no cover node_name = m.group(1) return node_name + def node_from_map(node_map, name): # pragma: no cover """Pulls a node def from a dictionary for a given name. @@ -223,6 +224,7 @@ def node_from_map(node_map, name): # pragma: no cover raise ValueError("No node named '%s' found in map." % name) return node_map[stripped_name] + def values_from_const(node_def): # pragma: no cover """Extracts the values from a const NodeDef as a numpy ndarray. @@ -236,9 +238,7 @@ def values_from_const(node_def): # pragma: no cover ValueError: If the node isn't a Const. """ if node_def.op != "Const": - raise ValueError( - "Can not extract constant value from a node that is not Const. Got:\n" - f"{node_def}") + raise ValueError("Can not extract constant value from a node that is not Const. Got:\n" f"{node_def}") input_tensor = node_def.attr["value"].tensor tensor_value = tensor_util.MakeNdarray(input_tensor) return tensor_value diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py index 77261de574c..9ce35348787 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py @@ -17,14 +17,15 @@ """Fuse Pad into Conv Graph Rewriter.""" import tensorflow as tf - from tensorflow.python.framework import tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 +from ..graph_base import GraphRewriterBase + + class FusePadWithConv2DOptimizer(GraphRewriterBase): """Fuse Pad op into Conv2D/DepthwiseConv2dNative/Conv3D.""" @@ -45,7 +46,8 @@ def do_transformation(self): graph_info = cur_graph.parse_graph() target_nodes = cur_graph.query_fusion_pattern_nodes( - [["Pad"], ["Conv2D", "Conv3D", "DepthwiseConv2dNative"], ('BiasAdd', 'Add', 'AddV2')]) + [["Pad"], ["Conv2D", "Conv3D", "DepthwiseConv2dNative"], ("BiasAdd", "Add", "AddV2")] + ) padding_tensor_dict = {} for node_combination in target_nodes: @@ -61,11 +63,11 @@ def do_transformation(self): # Line 55 to line 65 should be removed once the TFDO enabling the single quantized # conv2D supporting. if len(pattern) == 2: - #TODO we need to enable single quantizedconv2d with s8 input. + # TODO we need to enable single quantizedconv2d with s8 input. if not is_perchannel and not cur_graph.has_positive_input(conv_name): continue - # TFDO has the limitation that the single QuantizedConv2DPerchannel doesn't - # support padding_list filed. + # TFDO has the limitation that the single QuantizedConv2DPerchannel doesn't + # support padding_list filed. if is_perchannel: continue @@ -76,35 +78,36 @@ def do_transformation(self): pad_node = None if node_combination[0] not in padding_tensor_dict: pad_node = graph_info[node_combination[0]].node - if graph_info[pad_node.input[1]].node.op != 'Const': + if graph_info[pad_node.input[1]].node.op != "Const": input_node = graph_info[pad_node.input[1]].node - if input_node.op == 'DataFormatVecPermute': + if input_node.op == "DataFormatVecPermute": parent_input_node = graph_info[input_node.input[0]].node - if parent_input_node.op == 'Const': - padding_tensor = tensor_util.MakeNdarray( \ - parent_input_node.attr["value"].tensor).flatten() + if parent_input_node.op == "Const": + padding_tensor = tensor_util.MakeNdarray(parent_input_node.attr["value"].tensor).flatten() else: continue else: continue else: padding_tensor = tensor_util.MakeNdarray( - graph_info[pad_node.input[1]].node.attr["value"].tensor).flatten() + graph_info[pad_node.input[1]].node.attr["value"].tensor + ).flatten() padding_tensor_dict[node_combination[0]] = padding_tensor else: padding_tensor = padding_tensor_dict[node_combination[0]] if self.itex_qdq_mode: - enabled_pad_conv2d = bool(tf.version.VERSION == '1.15.0-up3' or \ - version1_gt_version2(tf.version.VERSION, '2.7')) + enabled_pad_conv2d = bool( + tf.version.VERSION == "1.15.0-up3" or version1_gt_version2(tf.version.VERSION, "2.7") + ) else: - enabled_pad_conv2d = bool(tf.version.VERSION == '1.15.0-up3' or self.new_api) + enabled_pad_conv2d = bool(tf.version.VERSION == "1.15.0-up3" or self.new_api) - if any(padding_tensor) and not enabled_pad_conv2d: # pragma: no cover + if any(padding_tensor) and not enabled_pad_conv2d: # pragma: no cover continue if pad_node: - if graph_info[pad_node.input[1]].node.op != 'Const': + if graph_info[pad_node.input[1]].node.op != "Const": cur_graph.node_name_details[pad_node.name].node.input.remove(pad_node.input[1]) cur_graph.remove_node_with_single_input_output(pad_node.name) else: @@ -112,12 +115,12 @@ def do_transformation(self): cur_graph.remove_node(pad_node.input[1]) conv_node = graph_info[node_combination[1]].node if self.itex_qdq_mode: - if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover - Helper.set_attr_string(conv_node, 'padding', b'EXPLICIT') + if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover + Helper.set_attr_string(conv_node, "padding", b"EXPLICIT") Helper.set_attr_int_list(conv_node, "explicit_paddings", padding_tensor) else: Helper.set_attr_int_list(conv_node, "padding_list", padding_tensor) - if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover - Helper.set_attr_string(conv_node, 'padding', b'EXPLICIT') + if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover + Helper.set_attr_string(conv_node, "padding", b"EXPLICIT") return cur_graph.dump_graph() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py index 5995cca65e8..bf086bb9de5 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py @@ -17,14 +17,14 @@ """Fuse Pad into Conv Graph Rewriter.""" import tensorflow as tf - from tensorflow.python.framework import tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 +from ..graph_base import GraphRewriterBase + class FusePadWithFP32Conv2DOptimizer(GraphRewriterBase): """Fuse Pad op into Conv.""" @@ -46,7 +46,8 @@ def do_transformation(self): graph_info = cur_graph.parse_graph() target_nodes = cur_graph.query_fusion_pattern_nodes( - [["Pad"], ["Conv2D", "DepthwiseConv2dNative"], ('BiasAdd', 'Add', 'AddV2')]) + [["Pad"], ["Conv2D", "DepthwiseConv2dNative"], ("BiasAdd", "Add", "AddV2")] + ) padding_tensor_dict = {} for node_combination in target_nodes: @@ -62,11 +63,11 @@ def do_transformation(self): # Line 55 to line 65 should be removed once the TFDO enabling the single quantized # conv2D supporting. if len(pattern) == 2: - #TODO we need to enable single quantizedconv2d with s8 input. + # TODO we need to enable single quantizedconv2d with s8 input. if not is_perchannel and not cur_graph.has_positive_input(conv_name): continue - # TFDO has the limitation that the single QuantizedConv2DPerchannel doesn't - # support padding_list filed. + # TFDO has the limitation that the single QuantizedConv2DPerchannel doesn't + # support padding_list filed. if is_perchannel: continue @@ -77,35 +78,36 @@ def do_transformation(self): pad_node = None if node_combination[0] not in padding_tensor_dict: pad_node = graph_info[node_combination[0]].node - if graph_info[pad_node.input[1]].node.op != 'Const': + if graph_info[pad_node.input[1]].node.op != "Const": input_node = graph_info[pad_node.input[1]].node - if input_node.op == 'DataFormatVecPermute': + if input_node.op == "DataFormatVecPermute": parent_input_node = graph_info[input_node.input[0]].node - if parent_input_node.op == 'Const': - padding_tensor = tensor_util.MakeNdarray( \ - parent_input_node.attr["value"].tensor).flatten() + if parent_input_node.op == "Const": + padding_tensor = tensor_util.MakeNdarray(parent_input_node.attr["value"].tensor).flatten() else: continue else: continue else: padding_tensor = tensor_util.MakeNdarray( - graph_info[pad_node.input[1]].node.attr["value"].tensor).flatten() + graph_info[pad_node.input[1]].node.attr["value"].tensor + ).flatten() padding_tensor_dict[node_combination[0]] = padding_tensor else: padding_tensor = padding_tensor_dict[node_combination[0]] if self.itex_qdq_mode: - enabled_pad_conv2d = bool(tf.version.VERSION == '1.15.0-up3' or \ - version1_gt_version2(tf.version.VERSION, '2.7')) + enabled_pad_conv2d = bool( + tf.version.VERSION == "1.15.0-up3" or version1_gt_version2(tf.version.VERSION, "2.7") + ) else: - enabled_pad_conv2d = bool(tf.version.VERSION == '1.15.0-up3' or self.new_api) + enabled_pad_conv2d = bool(tf.version.VERSION == "1.15.0-up3" or self.new_api) - if any(padding_tensor) and not enabled_pad_conv2d: # pragma: no cover + if any(padding_tensor) and not enabled_pad_conv2d: # pragma: no cover continue if pad_node: - if graph_info[pad_node.input[1]].node.op != 'Const': + if graph_info[pad_node.input[1]].node.op != "Const": cur_graph.node_name_details[pad_node.name].node.input.remove(pad_node.input[1]) cur_graph.remove_node_with_single_input_output(pad_node.name) else: @@ -116,12 +118,12 @@ def do_transformation(self): # only when padding attr is explicit, the explicit_paddings is not empty if self.itex_qdq_mode: - if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover - Helper.set_attr_string(conv_node, 'padding', b'EXPLICIT') + if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover + Helper.set_attr_string(conv_node, "padding", b"EXPLICIT") Helper.set_attr_int_list(conv_node, "explicit_paddings", padding_tensor) else: - if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover - Helper.set_attr_string(conv_node, 'padding', b'EXPLICIT') + if any(padding_tensor) and enabled_pad_conv2d: # pragma: no cover + Helper.set_attr_string(conv_node, "padding", b"EXPLICIT") Helper.set_attr_int_list(conv_node, "explicit_paddings", padding_tensor) return cur_graph.dump_graph() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py index 2e45d8cdb65..cef2ae7407f 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py @@ -16,13 +16,13 @@ # limitations under the License. """Fuse Transpose and Reshape Graph Rewriter.""" -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes -from neural_compressor.utils.utility import dump_elapsed_time +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase class FuseTransposeReshapeOptimizer(GraphRewriterBase): @@ -35,8 +35,11 @@ def do_transformation(self): g.graph = self.model graph_info = g.parse_graph() - patterns = [['Transpose'], ['Reshape'], - ['MatMul', 'DepthwiseConv2dNative', 'Conv2D', 'BatchMatMul', 'BatchMatMulV2']] + patterns = [ + ["Transpose"], + ["Reshape"], + ["MatMul", "DepthwiseConv2dNative", "Conv2D", "BatchMatMul", "BatchMatMulV2"], + ] matched_nodes = g.query_fusion_pattern_nodes(patterns) @@ -44,13 +47,13 @@ def do_transformation(self): for i in matched_nodes: transpose_input_node_name = graph_info[i[0]].node.input[0] transpose_input_node = graph_info[transpose_input_node_name].node - if transpose_input_node.op == 'Const': + if transpose_input_node.op == "Const": valid_match.append(i) - elif transpose_input_node.op == 'Enter': + elif transpose_input_node.op == "Enter": if transpose_input_node.input: enter_input_node_name = transpose_input_node.input[0] enter_input_node = graph_info[enter_input_node_name].node - if enter_input_node.op == 'Const': + if enter_input_node.op == "Const": valid_match.append(i) else: continue @@ -61,58 +64,57 @@ def do_transformation(self): transpose_input_perm = graph_info[transpose_node.input[1]].node reshape_node = graph_info[i[1]].node reshape_shape_node = graph_info[reshape_node.input[1]].node - if transpose_input_node.op == 'Const': - transpose_input_node_content = tensor_util.MakeNdarray( - transpose_input_node.attr['value'].tensor) - elif transpose_input_node.op == 'Enter': + if transpose_input_node.op == "Const": + transpose_input_node_content = tensor_util.MakeNdarray(transpose_input_node.attr["value"].tensor) + elif transpose_input_node.op == "Enter": enter_input_node_name = transpose_input_node.input[0] enter_input_node = graph_info[enter_input_node_name].node - transpose_input_node_content = tensor_util.MakeNdarray( - enter_input_node.attr['value'].tensor) + transpose_input_node_content = tensor_util.MakeNdarray(enter_input_node.attr["value"].tensor) else: continue - if transpose_input_perm.op == 'Const': - transpose_perm_node_content = tensor_util.MakeNdarray( - transpose_input_perm.attr['value'].tensor) - elif transpose_input_perm.op == 'Enter': + if transpose_input_perm.op == "Const": + transpose_perm_node_content = tensor_util.MakeNdarray(transpose_input_perm.attr["value"].tensor) + elif transpose_input_perm.op == "Enter": enter_transpose_input_perm = transpose_input_perm.input[0] enter_transpose_input_perm_node = graph_info[enter_transpose_input_perm].node transpose_perm_node_content = tensor_util.MakeNdarray( - enter_transpose_input_perm_node.attr['value'].tensor) + enter_transpose_input_perm_node.attr["value"].tensor + ) else: continue - if reshape_shape_node.op == 'Const': - reshape_shape_node_content = tensor_util.MakeNdarray( - reshape_shape_node.attr['value'].tensor) - elif reshape_shape_node.op == 'Enter': + if reshape_shape_node.op == "Const": + reshape_shape_node_content = tensor_util.MakeNdarray(reshape_shape_node.attr["value"].tensor) + elif reshape_shape_node.op == "Enter": enter_reshape_shape = reshape_shape_node.input[0] enter_reshape_shape_node = graph_info[enter_reshape_shape].node - reshape_shape_node_content = tensor_util.MakeNdarray( - enter_reshape_shape_node.attr['value'].tensor) + reshape_shape_node_content = tensor_util.MakeNdarray(enter_reshape_shape_node.attr["value"].tensor) else: continue - converted_node = transpose_input_node_content.transpose( - transpose_perm_node_content).reshape(reshape_shape_node_content) + converted_node = transpose_input_node_content.transpose(transpose_perm_node_content).reshape( + reshape_shape_node_content + ) g.remove_node(i[0]) - if transpose_input_node.op == 'Const': + if transpose_input_node.op == "Const": g.remove_node(transpose_input_node.name) g.remove_node(transpose_input_perm.name) - new_node_name = transpose_input_node.name + '_converted' + new_node_name = transpose_input_node.name + "_converted" new_node = Helper.create_constant_node( - new_node_name, converted_node, dtype=dtypes.float32, shape=converted_node.shape) + new_node_name, converted_node, dtype=dtypes.float32, shape=converted_node.shape + ) g.replace_const_node(new_node, [i[2]], i[1]) g.remove_node(i[1]) g.remove_node(reshape_shape_node.name) else: g.remove_node(enter_input_node.name) g.remove_node(transpose_input_perm.name) - new_node_name = enter_input_node.name + '_converted' + new_node_name = enter_input_node.name + "_converted" new_node = Helper.create_constant_node( - new_node_name, converted_node, dtype=dtypes.float32, shape=converted_node.shape) + new_node_name, converted_node, dtype=dtypes.float32, shape=converted_node.shape + ) g.add_node(new_node, [], [transpose_input_node.name]) transpose_input_node.input[0] = new_node.name for index, node_name in enumerate(graph_info[i[2]].node.input): diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py index ad60e54184e..76cd3cd9004 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py @@ -17,11 +17,12 @@ """CSE Graph Rewriter.""" from tensorflow.core.framework import graph_pb2 -from neural_compressor.utils.utility import dump_elapsed_time -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase class GraphCseOptimizer(GraphRewriterBase): @@ -67,12 +68,13 @@ class GraphCseOptimizer(GraphRewriterBase): Returns: [graphdef]: A optimized graphdef object. """ + computational_op_type = ("Conv2D", "Conv3D", "DepthwiseConv2dNative", "MatMul") @dump_elapsed_time("Pass GraphCseOptimizer") def do_transformation(self): """Optimize the graph contains multi output nodes. - + If those nodes' type are identical, those nodes should be elimated. Currently, we supported memory bound ops only. @@ -87,10 +89,11 @@ def do_transformation(self): graph_info = GraphAnalyzer().parse_graph() need_to_update_node = [] - #TODO Enhance below code snippet by using recursive method. + # TODO Enhance below code snippet by using recursive method. for _, i in graph_info.items(): candidate_node = [ - graph_info[child_name].node for child_name in i.outputs + graph_info[child_name].node + for child_name in i.outputs if graph_info[child_name].node.op not in self.computational_op_type ] candidate_node_unique_type = set([i.op for i in candidate_node]) @@ -98,7 +101,7 @@ def do_transformation(self): # it means each sub node has their own type. continue node_type_name_mapping = {} - #Created dict which key is op type and value is node has identical op type. + # Created dict which key is op type and value is node has identical op type. for each_node in candidate_node: node_type = each_node.op node_name = each_node.name @@ -111,11 +114,10 @@ def do_transformation(self): # ignore unique op type and node with multi-outputs if len(node_names) == 1 or len(graph_info[node_names[0]].outputs) > 1: continue - #TODO Need to enhance below algorithm before golden. + # TODO Need to enhance below algorithm before golden. filter_node = [node_names[0]] for sub_node_name in node_names[1:]: - if not Helper.compare_node_attr(graph_info[node_names[0]].node, - graph_info[sub_node_name].node): + if not Helper.compare_node_attr(graph_info[node_names[0]].node, graph_info[sub_node_name].node): continue filter_node.append(sub_node_name) @@ -127,14 +129,12 @@ def do_transformation(self): for removeable_node_name in lower_node_name[1:]: graph_info[upper_node_name].outputs.remove(removeable_node_name) for grand_child_node_name in graph_info[removeable_node_name].outputs: - - filter_input_name = [Helper.node_name_from_input( - i) for i in graph_info[grand_child_node_name].node.input] + filter_input_name = [ + Helper.node_name_from_input(i) for i in graph_info[grand_child_node_name].node.input + ] replace_index = filter_input_name.index(removeable_node_name) - graph_info[grand_child_node_name].node.input[ - replace_index] = keep_sub_node_name - graph_info[grand_child_node_name].node.input[ - replace_index] = keep_sub_node_name + graph_info[grand_child_node_name].node.input[replace_index] = keep_sub_node_name + graph_info[grand_child_node_name].node.input[replace_index] = keep_sub_node_name graph_info.pop(removeable_node_name) output_graph_def = graph_pb2.GraphDef() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py index 026e584a061..d371635cfe7 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py @@ -16,16 +16,16 @@ # limitations under the License. """Tensorflow Grappler Graph Rewriter.""" +import tensorflow as tf +from tensorflow.core.protobuf import config_pb2, meta_graph_pb2 +from tensorflow.python.grappler import tf_optimizer +from tensorflow.python.training import saver + +from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 from neural_compressor.utils.utility import dump_elapsed_time from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 -from tensorflow.python.training import saver -from tensorflow.core.protobuf import config_pb2 -from tensorflow.python.grappler import tf_optimizer -from tensorflow.core.protobuf import meta_graph_pb2 -import tensorflow as tf class GrapplerOptimizer(GraphRewriterBase): """A python wrapper that leverages the built-in tensorflow grappler API to optimize the graph.""" @@ -35,8 +35,8 @@ def __init__(self, model, input_output_names, opt_cfg): super().__init__(model) self.input_output_names = input_output_names self.opt_cfg = opt_cfg - self.generic_optimizer = ('pruning', 'shape', 'dependency', 'debug_stripper', 'loop') - self.tf_2_optimizer = ('constfold', 'arithmetic', 'min_graph_nodes') + self.generic_optimizer = ("pruning", "shape", "dependency", "debug_stripper", "loop") + self.tf_2_optimizer = ("constfold", "arithmetic", "min_graph_nodes") @dump_elapsed_time("Pass GrapplerOptimizer") def do_transformation(self): @@ -44,9 +44,8 @@ def do_transformation(self): try: g = tf.Graph() with g.as_default(): - g = tf.compat.v1.import_graph_def(self.model, name='') - meta_graph = saver.export_meta_graph( - graph_def=self.model, graph=g, clear_devices=True) + g = tf.compat.v1.import_graph_def(self.model, name="") + meta_graph = saver.export_meta_graph(graph_def=self.model, graph=g, clear_devices=True) fetch_collection = meta_graph_pb2.CollectionDef() for fetch in self.input_output_names: fetch_collection.node_list.value.append(fetch) @@ -57,7 +56,7 @@ def do_transformation(self): if optimizer in self.opt_cfg and self.opt_cfg[optimizer]: rewriter_config.optimizers.append(optimizer) - if version1_gt_version2(tf.version.VERSION,'2.2.0'): + if version1_gt_version2(tf.version.VERSION, "2.2.0"): for optimizer in self.tf_2_optimizer: if optimizer in self.opt_cfg and self.opt_cfg[optimizer]: rewriter_config.optimizers.append(optimizer) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py index e820cbdc084..0e9d11e7227 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py @@ -1,4 +1,3 @@ - #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -21,11 +20,14 @@ from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util as tu -from ..graph_base import GraphRewriterBase + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 +from ..graph_base import GraphRewriterBase + + class InsertPrintMinMaxNode(GraphRewriterBase): """InsertPrintMinMaxNode Pass for tensorflow sampling.""" @@ -45,34 +47,34 @@ def do_transformation(self): graph_info = cur_graph.parse_graph() insert_node_pairs = [] top_node = graph_info[self.pre_node_name].node - if top_node.op == 'ConcatV2': - for i in range(top_node.attr['N'].i): + if top_node.op == "ConcatV2": + for i in range(top_node.attr["N"].i): insert_node_pairs.append([top_node.input[i], self.post_node_name]) - elif top_node.op in ('BatchMatMul', 'BatchMatMulV2'): + elif top_node.op in ("BatchMatMul", "BatchMatMulV2"): insert_node_pairs.append([top_node.input[0], self.post_node_name]) - if graph_info[top_node.input[1]].node.op != 'Const': + if graph_info[top_node.input[1]].node.op != "Const": insert_node_pairs.append([top_node.input[1], self.post_node_name]) - elif top_node.op in ('Conv2DBackpropInput', 'Conv3DBackpropInputV2'): + elif top_node.op in ("Conv2DBackpropInput", "Conv3DBackpropInputV2"): insert_node_pairs.append([top_node.input[2], self.post_node_name]) else: refresh_pre_node_name = graph_info[self.pre_node_name].node.input[0] # Check the Conv2D could be fused with previous Pad or not. # If so, we need to update the pre-node name correspondingly. refresh_pre_node = graph_info[Helper.node_name_from_input(refresh_pre_node_name)].node - if refresh_pre_node.op == 'Pad' and top_node.op in ('Conv2D', 'Conv3D'): + if refresh_pre_node.op == "Pad" and top_node.op in ("Conv2D", "Conv3D"): pad_const_node_name = refresh_pre_node.input[1] pad_const_node = graph_info[pad_const_node_name].node padding_tensor = None - if graph_info[pad_const_node_name].node.op != 'Const': - if pad_const_node.op == 'DataFormatVecPermute': + if graph_info[pad_const_node_name].node.op != "Const": + if pad_const_node.op == "DataFormatVecPermute": parent_input_node = graph_info[pad_const_node.input[0]].node - if parent_input_node.op == 'Const': - padding_tensor = tu.MakeNdarray( \ - parent_input_node.attr["value"].tensor).flatten() + if parent_input_node.op == "Const": + padding_tensor = tu.MakeNdarray(parent_input_node.attr["value"].tensor).flatten() else: padding_tensor = tu.MakeNdarray(pad_const_node.attr["value"].tensor).flatten() - if not any(padding_tensor) or \ - (any(padding_tensor) and (tf.version.VERSION == '1.15.0-up3' or self.new_api)): + if not any(padding_tensor) or ( + any(padding_tensor) and (tf.version.VERSION == "1.15.0-up3" or self.new_api) + ): insert_node_pairs.append([refresh_pre_node_name, self.post_node_name]) refresh_pre_node_name = refresh_pre_node.input[0] @@ -86,61 +88,60 @@ def do_transformation(self): reshape_dims_name = node_name_prefix + "_reshape_dims" reduction_dims_name = node_name_prefix + "_reduction_dims" - reshape_dims_node = Helper.create_constant_node( - reshape_dims_name, -1, dtypes.int32, [1]) + reshape_dims_node = Helper.create_constant_node(reshape_dims_name, -1, dtypes.int32, [1]) + + reduction_dims_node = Helper.create_constant_node(reduction_dims_name, 0, dtypes.int32, [1]) - reduction_dims_node = Helper.create_constant_node( - reduction_dims_name, 0, dtypes.int32, [1]) - # the training input QueueDequeueManyV2 has issue with implicit dependency # skip the input node of show_and_tell model - if not (Helper.node_name_from_input(each_node_name) == 'batch_and_pad' and \ - graph_info[Helper.node_name_from_input(each_node_name)].node.op == 'QueueDequeueManyV2'): + if not ( + Helper.node_name_from_input(each_node_name) == "batch_and_pad" + and graph_info[Helper.node_name_from_input(each_node_name)].node.op == "QueueDequeueManyV2" + ): reshape_dims_node.input.append("^" + Helper.node_name_from_input(each_node_name)) reduction_dims_node.input.append("^" + Helper.node_name_from_input(each_node_name)) reshape_input_name = node_name_prefix + "_reshape_" - reshape_input_node = Helper.create_node("Reshape", reshape_input_name, - [each_node_name, reshape_dims_name]) + reshape_input_node = Helper.create_node( + "Reshape", reshape_input_name, [each_node_name, reshape_dims_name] + ) min_input_name = node_name_prefix + "_min" - min_input_node = Helper.create_node( - "Min", min_input_name, [reshape_input_name, reduction_dims_name]) + min_input_node = Helper.create_node("Min", min_input_name, [reshape_input_name, reduction_dims_name]) Helper.set_attr_dtype(min_input_node, "Tidx", dtypes.int32) Helper.set_attr_bool(min_input_node, "keep_dims", False) max_input_name = node_name_prefix + "_max" - max_input_node = Helper.create_node( - "Max", max_input_name, [reshape_input_name, reduction_dims_name]) + max_input_node = Helper.create_node("Max", max_input_name, [reshape_input_name, reduction_dims_name]) Helper.set_attr_dtype(max_input_node, "Tidx", dtypes.int32) Helper.set_attr_bool(max_input_node, "keep_dims", False) max_print_node = Helper.create_node( - "Print", node_name_prefix + "_print_max__{}".format(index), - [max_input_name + ':0', max_input_name+':0']) + "Print", + node_name_prefix + "_print_max__{}".format(index), + [max_input_name + ":0", max_input_name + ":0"], + ) min_print_node = Helper.create_node( - "Print", node_name_prefix + "_print_min__{}".format(index), - [min_input_name+':0', min_input_name+':0']) + "Print", + node_name_prefix + "_print_min__{}".format(index), + [min_input_name + ":0", min_input_name + ":0"], + ) if index == 0: - max_msg = ';{}_eightbit_max_{}__print__;__max:'.format( - self.pre_node_name, each_node_name) - min_msg = ';{}_eightbit_min_{}__print__;__min:'.format( - self.pre_node_name, each_node_name) + max_msg = ";{}_eightbit_max_{}__print__;__max:".format(self.pre_node_name, each_node_name) + min_msg = ";{}_eightbit_min_{}__print__;__min:".format(self.pre_node_name, each_node_name) # workround for swish_f32, attribute T is not in the op definition - if 'swish_f32' in graph_info[self.pre_node_name].node.name: - src_dt=attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) + if "swish_f32" in graph_info[self.pre_node_name].node.name: + src_dt = attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) else: src_dt = graph_info[self.pre_node_name].node.attr["T"] else: - max_msg = ';{}_eightbit_requant_range__print__;__requant_max:'.format( - self.pre_node_name) - min_msg = ';{}_eightbit_requant_range__print__;__requant_min:'.format( - self.pre_node_name) + max_msg = ";{}_eightbit_requant_range__print__;__requant_max:".format(self.pre_node_name) + min_msg = ";{}_eightbit_requant_range__print__;__requant_min:".format(self.pre_node_name) # workround for swish_f32, attribute T is not in the op definition - if 'swish_f32' in graph_info[each_node_name].node.op: - src_dt=attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) + if "swish_f32" in graph_info[each_node_name].node.op: + src_dt = attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) else: src_dt = graph_info[each_node_name].node.attr["T"] @@ -159,34 +160,39 @@ def do_transformation(self): max_print_node.attr["summarize"].i = 1024 attr_u = [dtypes.as_dtype(src_dt.type).as_datatype_enum] - min_print_node.attr["U"].list.CopyFrom( - attr_value_pb2.AttrValue.ListValue(type=attr_u)) - max_print_node.attr["U"].list.CopyFrom( - attr_value_pb2.AttrValue.ListValue(type=attr_u)) + min_print_node.attr["U"].list.CopyFrom(attr_value_pb2.AttrValue.ListValue(type=attr_u)) + max_print_node.attr["U"].list.CopyFrom(attr_value_pb2.AttrValue.ListValue(type=attr_u)) post_node_names = graph_info[Helper.node_name_from_input(each_node_name)].outputs if post_node_names: for post_node_name in post_node_names: post_node = graph_info[post_node_name].node if each_node_name not in post_node.input: continue - if post_node.op == 'FusedBatchNormV3' and "_print_identity" not in \ - graph_info[Helper.node_name_from_input(post_node.name)].node.input[0]: - identity_node = Helper.create_node("Identity", post_node.name+'_print_identity', - [graph_info[Helper.node_name_from_input(post_node.name)].node.input[0]]) + if ( + post_node.op == "FusedBatchNormV3" + and "_print_identity" + not in graph_info[Helper.node_name_from_input(post_node.name)].node.input[0] + ): + identity_node = Helper.create_node( + "Identity", + post_node.name + "_print_identity", + [graph_info[Helper.node_name_from_input(post_node.name)].node.input[0]], + ) identity_node.attr["T"].CopyFrom(src_dt) - cur_graph.add_node(identity_node, - graph_info[Helper.node_name_from_input(post_node.name)].node.input[0], - [post_node.name]) + cur_graph.add_node( + identity_node, + graph_info[Helper.node_name_from_input(post_node.name)].node.input[0], + [post_node.name], + ) identity_node.input.append("^" + min_print_node.name) identity_node.input.append("^" + max_print_node.name) else: post_node.input.append("^" + min_print_node.name) post_node.input.append("^" + max_print_node.name) - + cur_graph.add_node(reshape_dims_node, None, [reshape_input_name]) cur_graph.add_node(reduction_dims_node, None, [max_input_name, min_input_name]) - cur_graph.add_node(reshape_input_node, each_node_name, - [max_input_name, min_input_name]) + cur_graph.add_node(reshape_input_node, each_node_name, [max_input_name, min_input_name]) cur_graph.add_node(max_input_node, reshape_input_name, [max_print_node.name]) cur_graph.add_node(min_input_node, reshape_input_name, [min_print_node.name]) @@ -194,24 +200,25 @@ def do_transformation(self): cur_graph.add_node(max_print_node, max_input_name, []) else: identity_node0 = Helper.create_node( - "Identity", min_print_node.name+'_identity', [min_print_node.name]) + "Identity", min_print_node.name + "_identity", [min_print_node.name] + ) identity_node0.attr["T"].CopyFrom(src_dt) identity_node1 = Helper.create_node( - "Identity", max_print_node.name+'_identity', [max_print_node.name]) + "Identity", max_print_node.name + "_identity", [max_print_node.name] + ) identity_node1.attr["T"].CopyFrom(src_dt) cur_graph.add_node(reshape_dims_node, None, [reshape_input_name]) cur_graph.add_node(reduction_dims_node, None, [max_input_name, min_input_name]) - cur_graph.add_node(reshape_input_node, each_node_name, - [max_input_name, min_input_name]) + cur_graph.add_node(reshape_input_node, each_node_name, [max_input_name, min_input_name]) cur_graph.add_node(max_input_node, reshape_input_name, [max_print_node.name]) cur_graph.add_node(min_input_node, reshape_input_name, [min_print_node.name]) cur_graph.add_node(min_print_node, min_input_name, [identity_node0.name]) cur_graph.add_node(max_print_node, max_input_name, [identity_node1.name]) cur_graph.add_node(identity_node0, min_print_node.name, []) cur_graph.add_node(identity_node1, max_print_node.name, []) - #identity_node0.input.append("^" + min_print_node.name) - #identity_node1.input.append("^" + max_print_node.name) + # identity_node0.input.append("^" + min_print_node.name) + # identity_node1.input.append("^" + max_print_node.name) output_names.append(identity_node0.name) output_names.append(identity_node1.name) return cur_graph.dump_graph(), output_names diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py index daace422768..f8cdef025ab 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py @@ -17,10 +17,11 @@ """Move Squeeze after Relu Graph Rewriter.""" import copy + +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.utils.utility import dump_elapsed_time from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer class MoveSqueezeAfterReluOptimizer(GraphRewriterBase): @@ -29,7 +30,7 @@ class MoveSqueezeAfterReluOptimizer(GraphRewriterBase): def __init__(self, model): """Initilization.""" super().__init__(model) - self.op_list = ['Relu', 'Sigmoid', 'Relu6', 'LeakyRelu', 'Elu'] + self.op_list = ["Relu", "Sigmoid", "Relu6", "LeakyRelu", "Elu"] @dump_elapsed_time("Pass MoveSqueezeAfterReluOptimizer") def do_transformation(self): @@ -39,46 +40,52 @@ def do_transformation(self): graph_info = g.parse_graph() # For pattern Conv + Squeeze + BiasAdd + Relu(Sigmoid, Relu6, LeakyRelu, Elu) for node in self.model.node: - if node.op in self.op_list and \ - node.input[0] in graph_info and \ - graph_info[node.input[0]].node.op == 'BiasAdd': + if ( + node.op in self.op_list + and node.input[0] in graph_info + and graph_info[node.input[0]].node.op == "BiasAdd" + ): biasadd_node = graph_info[node.input[0]].node biasadd_input = graph_info[biasadd_node.name].node.input[0] squeeze_node = graph_info[biasadd_input].node relu_output = graph_info[node.name].outputs - if squeeze_node.op == 'Squeeze': - #biasadd + if squeeze_node.op == "Squeeze": + # biasadd for i, input in enumerate(biasadd_node.input): if input == biasadd_input: - new_input = biasadd_node.input[:i] + [squeeze_node.input[0]] + \ - biasadd_node.input[i+1:] - graph_info[biasadd_node.name].node.ClearField('input') + new_input = biasadd_node.input[:i] + [squeeze_node.input[0]] + biasadd_node.input[i + 1 :] + graph_info[biasadd_node.name].node.ClearField("input") graph_info[biasadd_node.name].node.input.extend(new_input) graph_info[squeeze_node.name].outputs.remove(biasadd_node.name) - #conv output + # conv output conv = squeeze_node.input[0] conv_outputs = graph_info[conv].outputs for i, output in enumerate(conv_outputs): if output == squeeze_node.name: graph_info[conv].outputs.remove(squeeze_node.name) graph_info[conv].outputs.append(biasadd_node.name) - #squeeze input - squeeze_node.ClearField('input') + # squeeze input + squeeze_node.ClearField("input") squeeze_node.input.extend([node.name]) - #expand input,squeeze output + # expand input,squeeze output for output in relu_output: for i, input in enumerate(graph_info[output].node.input): if input == node.name: - new_input = graph_info[output].node.input[:i] + [squeeze_node.name] +\ - graph_info[output].node.input[i+1:] + new_input = ( + graph_info[output].node.input[:i] + + [squeeze_node.name] + + graph_info[output].node.input[i + 1 :] + ) graph_info[squeeze_node.name].outputs.append(output) - graph_info[output].node.ClearField('input') + graph_info[output].node.ClearField("input") graph_info[output].node.input.extend(new_input) # For pattern x + Reshape + Relu(Sigmoid, Relu6, LeakyRelu, Elu) - if node.op in self.op_list and \ - node.input[0] in graph_info and \ - graph_info[node.input[0]].node.op == 'Reshape': + if ( + node.op in self.op_list + and node.input[0] in graph_info + and graph_info[node.input[0]].node.op == "Reshape" + ): reshape_node = graph_info[node.input[0]].node reshape_input = graph_info[reshape_node.name].node.input[0] x_node = graph_info[reshape_input].node @@ -88,26 +95,28 @@ def do_transformation(self): continue if len(graph_info[reshape_node.name].outputs) > 1: continue - #relu---->reshape + # relu---->reshape for i, input in enumerate(reshape_node.input): if input == reshape_input: - new_input = reshape_node.input[:i] + [node.name] + \ - reshape_node.input[i+1:] - graph_info[reshape_node.name].node.ClearField('input') + new_input = reshape_node.input[:i] + [node.name] + reshape_node.input[i + 1 :] + graph_info[reshape_node.name].node.ClearField("input") graph_info[reshape_node.name].node.input.extend(new_input) graph_info[x_node.name].outputs.remove(reshape_node.name) graph_info[x_node.name].outputs.append(node.name) - #x----->relu - node.ClearField('input') + # x----->relu + node.ClearField("input") node.input.extend([reshape_input]) - #expand input,squeeze output + # expand input,squeeze output for output in relu_output: for i, input in enumerate(graph_info[output].node.input): if input == node.name: - new_input = graph_info[output].node.input[:i] + [reshape_node.name] +\ - graph_info[output].node.input[i+1:] + new_input = ( + graph_info[output].node.input[:i] + + [reshape_node.name] + + graph_info[output].node.input[i + 1 :] + ) graph_info[reshape_node.name].outputs.append(output) - graph_info[output].node.ClearField('input') + graph_info[output].node.ClearField("input") graph_info[output].node.input.extend(new_input) graph_info[node.name].outputs.remove(output) graph_info[node.name].outputs.append(reshape_node.name) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py index 683960907ed..ba682e876ba 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py @@ -16,63 +16,74 @@ # limitations under the License. """Pre Optimization Entrance.""" -import logging import copy +import logging + import tensorflow as tf + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer +from neural_compressor.adaptor.tf_utils.util import version1_eq_version2, version1_gte_version2, version1_lt_version2 from neural_compressor.utils.utility import dump_elapsed_time -from .fuse_column_wise_mul import FuseColumnWiseMulOptimizer -from .remove_training_nodes import RemoveTrainingNodesOptimizer -from .split_shared_input import SplitSharedInputOptimizer -from .strip_unused_nodes import StripUnusedNodesOptimizer -from .graph_cse_optimizer import GraphCseOptimizer -from .fold_constant import GraphFoldConstantOptimizer -from .fold_batch_norm import FoldBatchNormNodesOptimizer -from .rename_batch_norm import RenameBatchNormOptimizer + +from .convert_add_to_biasadd import ConvertAddToBiasAddOptimizer from .convert_layout import ConvertLayoutOptimizer -from .fuse_gelu import FuseGeluOptimizer -from .fuse_reshape_transpose import FuseTransposeReshapeOptimizer from .convert_leakyrelu import ConvertLeakyReluOptimizer -from .dummy_biasadd import InjectDummyBiasAddOptimizer -from .convert_add_to_biasadd import ConvertAddToBiasAddOptimizer -from .grappler_pass import GrapplerOptimizer -from .fuse_conv_with_math import FuseConvWithMathOptimizer -from .fuse_biasadd_add import FuseBiasAddAndAddOptimizer -from .switch_optimizer import SwitchOptimizer -from .move_squeeze_after_relu import MoveSqueezeAfterReluOptimizer from .convert_nan_to_random import ConvertNanToRandom +from .convert_placeholder_to_const import ConvertPlaceholderToConst +from .dilated_contraction import DilatedContraction +from .dummy_biasadd import InjectDummyBiasAddOptimizer from .expanddims_optimizer import ExpandDimsOptimizer from .fetch_weight_from_reshape import FetchWeightFromReshapeOptimizer +from .fold_batch_norm import FoldBatchNormNodesOptimizer +from .fold_constant import GraphFoldConstantOptimizer +from .fuse_biasadd_add import FuseBiasAddAndAddOptimizer +from .fuse_column_wise_mul import FuseColumnWiseMulOptimizer +from .fuse_conv_with_math import FuseConvWithMathOptimizer from .fuse_decomposed_bn import FuseDecomposedBNOptimizer from .fuse_decomposed_in import FuseDecomposedINOptimizer +from .fuse_gelu import FuseGeluOptimizer from .fuse_layer_norm import FuseLayerNormOptimizer +from .fuse_reshape_transpose import FuseTransposeReshapeOptimizer +from .graph_cse_optimizer import GraphCseOptimizer +from .grappler_pass import GrapplerOptimizer +from .move_squeeze_after_relu import MoveSqueezeAfterReluOptimizer +from .remove_training_nodes import RemoveTrainingNodesOptimizer +from .rename_batch_norm import RenameBatchNormOptimizer +from .split_shared_input import SplitSharedInputOptimizer from .strip_equivalent_nodes import StripEquivalentNodesOptimizer -from .dilated_contraction import DilatedContraction -from .convert_placeholder_to_const import ConvertPlaceholderToConst -from neural_compressor.adaptor.tf_utils.util import version1_gte_version2, version1_eq_version2 -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 +from .strip_unused_nodes import StripUnusedNodesOptimizer +from .switch_optimizer import SwitchOptimizer + -class PreOptimization(): +class PreOptimization: """Pre optimization for the FP32 models.""" def __init__(self, model, new_api, device): """Initilization.""" self.model = model - if version1_gte_version2(tf.version.VERSION, '2.1.0') or \ - version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): - self.optimization = {'pruning': True, 'shape': True, - 'constfold': False, 'arithmetic': False, - 'dependency': True, 'debug_stripper': True, - 'loop': True} + if version1_gte_version2(tf.version.VERSION, "2.1.0") or version1_eq_version2(tf.version.VERSION, "1.15.0-up3"): + self.optimization = { + "pruning": True, + "shape": True, + "constfold": False, + "arithmetic": False, + "dependency": True, + "debug_stripper": True, + "loop": True, + } else: - self.optimization = {'pruning': True, 'shape': True, - 'dependency': True, 'debug_stripper': True, - 'loop': True} + self.optimization = { + "pruning": True, + "shape": True, + "dependency": True, + "debug_stripper": True, + "loop": True, + } # Table initialization should disable grappler dependency and pruning pass node_names = [node.name for node in model.graph_def.node] - if 'init_all_tables' in node_names: - self.optimization['dependency'] = False - self.optimization['pruning'] = False + if "init_all_tables" in node_names: + self.optimization["dependency"] = False + self.optimization["pruning"] = False self.new_api = new_api self.device = device self.analyzer = GraphAnalyzer() @@ -81,7 +92,6 @@ def __init__(self, model, new_api, device): self._tmp_graph_def = None self._excluded_node_names = [] - def get_excluded_node_names(self): """Get the excluded node name. @@ -120,50 +130,51 @@ def get_optimized_model(self, itex_mode=False): input_output_names = output_node_names + input_node_names # Add device info before convert layout - # Google in layout optimizer where all nodes in the graph are expected to have their device + # Google in layout optimizer where all nodes in the graph are expected to have their device # information set (earlier version < 2.10.0 this was not needed). - if version1_gte_version2(tf.version.VERSION, '2.10.0'): + if version1_gte_version2(tf.version.VERSION, "2.10.0"): cur_graph = GraphAnalyzer() cur_graph.graph = self.model.graph_def graph_info = cur_graph.parse_graph() - if self.device == 'cpu': + if self.device == "cpu": cpus = tf.config.list_physical_devices("CPU") - node_device = cpus[0].name.replace('physical_device:', '') + node_device = cpus[0].name.replace("physical_device:", "") else: gpus = tf.config.list_physical_devices("GPU") if len(gpus) == 0: xpus = tf.config.list_physical_devices("XPU") if len(xpus) == 0: cpus = tf.config.list_physical_devices("CPU") - node_device = cpus[0].name.replace('physical_device:', '') + node_device = cpus[0].name.replace("physical_device:", "") else: - node_device = xpus[0].name.replace('physical_device:', '') + node_device = xpus[0].name.replace("physical_device:", "") else: - node_device = gpus[0].name.replace('physical_device:', '') + node_device = gpus[0].name.replace("physical_device:", "") for node_name in list(graph_info.keys()): node = graph_info[node_name].node node.device = node_device self._tmp_graph_def = cur_graph.dump_graph() - self._tmp_graph_def = ConvertLayoutOptimizer( - self._tmp_graph_def, output_node_names).do_transformation() + self._tmp_graph_def = ConvertLayoutOptimizer(self._tmp_graph_def, output_node_names).do_transformation() else: - self._tmp_graph_def = ConvertLayoutOptimizer( - self.model.graph_def, output_node_names).do_transformation() + self._tmp_graph_def = ConvertLayoutOptimizer(self.model.graph_def, output_node_names).do_transformation() self._tmp_graph_def = ConvertPlaceholderToConst(self._tmp_graph_def).do_transformation() self._tmp_graph_def = SwitchOptimizer(self._tmp_graph_def).do_transformation() self._tmp_graph_def = GrapplerOptimizer( - self._tmp_graph_def, input_output_names, self.optimization).do_transformation() + self._tmp_graph_def, input_output_names, self.optimization + ).do_transformation() - self._tmp_graph_def = StripUnusedNodesOptimizer(self._tmp_graph_def, - input_node_names, output_node_names).do_transformation() + self._tmp_graph_def = StripUnusedNodesOptimizer( + self._tmp_graph_def, input_node_names, output_node_names + ).do_transformation() self._tmp_graph_def = RemoveTrainingNodesOptimizer( - self._tmp_graph_def, protected_nodes=input_output_names).do_transformation() + self._tmp_graph_def, protected_nodes=input_output_names + ).do_transformation() self._tmp_graph_def = SplitSharedInputOptimizer(self._tmp_graph_def).do_transformation() @@ -182,63 +193,52 @@ def get_optimized_model(self, itex_mode=False): self._tmp_graph_def = FuseColumnWiseMulOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = StripUnusedNodesOptimizer(self._tmp_graph_def, - input_node_names, output_node_names).do_transformation() + self._tmp_graph_def = StripUnusedNodesOptimizer( + self._tmp_graph_def, input_node_names, output_node_names + ).do_transformation() self._tmp_graph_def = FuseGeluOptimizer(self._tmp_graph_def).do_transformation() self._tmp_graph_def = GraphCseOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = FoldBatchNormNodesOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FoldBatchNormNodesOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = RenameBatchNormOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = RenameBatchNormOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = ConvertLeakyReluOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = ConvertLeakyReluOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = ConvertAddToBiasAddOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = ConvertAddToBiasAddOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = FuseTransposeReshapeOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FuseTransposeReshapeOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = FuseConvWithMathOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FuseConvWithMathOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = ExpandDimsOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = ExpandDimsOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = FetchWeightFromReshapeOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = FetchWeightFromReshapeOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = MoveSqueezeAfterReluOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = MoveSqueezeAfterReluOptimizer(self._tmp_graph_def).do_transformation() if not self.new_api and not itex_mode: - #TODO we need to remove below optimizer once the TF enabled the single + # TODO we need to remove below optimizer once the TF enabled the single # matmul op quantization self._tmp_graph_def = InjectDummyBiasAddOptimizer( - self._tmp_graph_def, output_node_names).do_transformation() - - self._tmp_graph_def = FuseBiasAddAndAddOptimizer( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def, output_node_names + ).do_transformation() + self._tmp_graph_def = FuseBiasAddAndAddOptimizer(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = ConvertNanToRandom( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = ConvertNanToRandom(self._tmp_graph_def).do_transformation() - self._tmp_graph_def = StripEquivalentNodesOptimizer( - self._tmp_graph_def, output_node_names).do_transformation() + self._tmp_graph_def = StripEquivalentNodesOptimizer(self._tmp_graph_def, output_node_names).do_transformation() if self.new_api or itex_mode: - self._tmp_graph_def = DilatedContraction( - self._tmp_graph_def).do_transformation() + self._tmp_graph_def = DilatedContraction(self._tmp_graph_def).do_transformation() # node device info will be removed by GrapplerOptimizer, insert it again. - if version1_lt_version2(tf.version.VERSION, '2.0.0'): # pragma: no cover + if version1_lt_version2(tf.version.VERSION, "2.0.0"): # pragma: no cover from tensorflow._api.v1.config import experimental + list_physical_devices = experimental.list_physical_devices else: list_physical_devices = tf.config.list_physical_devices @@ -246,20 +246,20 @@ def get_optimized_model(self, itex_mode=False): cur_graph.graph = self._tmp_graph_def graph_info = cur_graph.parse_graph() - if self.device == 'cpu': + if self.device == "cpu": cpus = list_physical_devices("CPU") - node_device = cpus[0].name.replace('physical_device:', '') + node_device = cpus[0].name.replace("physical_device:", "") else: gpus = list_physical_devices("GPU") if len(gpus) == 0: xpus = list_physical_devices("XPU") if len(xpus) == 0: cpus = list_physical_devices("CPU") - node_device = cpus[0].name.replace('physical_device:', '') + node_device = cpus[0].name.replace("physical_device:", "") else: - node_device = xpus[0].name.replace('physical_device:', '') + node_device = xpus[0].name.replace("physical_device:", "") else: - node_device = gpus[0].name.replace('physical_device:', '') + node_device = gpus[0].name.replace("physical_device:", "") for node_name in list(graph_info.keys()): node = graph_info[node_name].node node.device = node_device @@ -268,7 +268,7 @@ def get_optimized_model(self, itex_mode=False): self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library) for function_def in self.model.graph_def.library.function: - if function_def.signature.name == 'swish_f32': + if function_def.signature.name == "swish_f32": self._tmp_graph_def.library.function.extend([copy.deepcopy(function_def)]) origin_model.graph_def = self._tmp_graph_def @@ -290,8 +290,7 @@ def get_matched_nodes(self, patterns): res = [] for sub_pattern in patterns: - res.extend([i for i in self.analyzer.query_fusion_pattern_nodes( - sub_pattern) if i not in res]) + res.extend([i for i in self.analyzer.query_fusion_pattern_nodes(sub_pattern) if i not in res]) return res def has_positive_input(self, node_name): diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py index fb0792a3290..75080979216 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py @@ -16,17 +16,16 @@ # limitations under the License. """Remove training nodes Graph Rewriter.""" +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.utils.utility import dump_elapsed_time from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer class RemoveTrainingNodesOptimizer(GraphRewriterBase): """Remove training nodes optimizer.""" - def __init__(self, model, protected_nodes=[], types_to_splice= - ['Identity', 'CheckNumerics', 'StopGradient']): + def __init__(self, model, protected_nodes=[], types_to_splice=["Identity", "CheckNumerics", "StopGradient"]): """Initilizaiton.""" super().__init__(model) self.protected_nodes = protected_nodes @@ -60,11 +59,7 @@ def do_transformation(self): names_to_splice[node_name] = v.node.input[0] # We also don't want to remove nodes which are used as control edge inputs. - names_to_splice = { - name: value - for name, value in names_to_splice.items() - if name not in control_input_names - } + names_to_splice = {name: value for name, value in names_to_splice.items() if name not in control_input_names} for k, _ in names_to_splice.items(): graph_handle.remove_node_with_single_input_output(k) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py index 5aac853ffd6..d59c2512b41 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py @@ -17,16 +17,17 @@ """Rename FusedBatchNorm op to FusedBatchNormV2 Graph Rewriter.""" import math -import numpy as np -from tensorflow.core.framework import node_def_pb2 -from tensorflow.core.framework import attr_value_pb2 +import numpy as np +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 from tensorflow.python.framework import tensor_util -from neural_compressor.utils.utility import dump_elapsed_time -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase + class RenameBatchNormOptimizer(GraphRewriterBase): """Rename FusedBatchNorm op to FusedBatchNormV2.""" @@ -51,7 +52,7 @@ def do_transformation(self): graph_details = cur_graph.parse_graph() for _, v in graph_details.items(): - #for node in cur_graph.graph.node: + # for node in cur_graph.graph.node: if v.node.op == "FusedBatchNorm" or v.node.op == "FusedBatchNormV2": v.node.op = "FusedBatchNormV3" v.node.attr["U"].CopyFrom(v.node.attr["T"]) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py index 953be76d041..faf73c85977 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py @@ -17,15 +17,17 @@ """Split shared input Graph Rewriter.""" from tensorflow.core.framework import node_def_pb2 -from neural_compressor.utils.utility import dump_elapsed_time -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase class SplitSharedInputOptimizer(GraphRewriterBase): """Split the shared input if the input node is shared and const.""" + @dump_elapsed_time("Pass SplitSharedInputOptimizer") def do_transformation(self): """Execute splitting the shared input.""" @@ -40,9 +42,9 @@ def do_transformation(self): for node_name in list(graph_info.keys()): node = graph_info[node_name].node for _, input_node_name in enumerate(node.input): - if input_node_name.startswith('^'): + if input_node_name.startswith("^"): continue - if graph_info[Helper.node_name_from_input(input_node_name)].node.op == 'Const': + if graph_info[Helper.node_name_from_input(input_node_name)].node.op == "Const": # is shared and current node is not the first one # sharing the input if input_node_name in input_map: @@ -50,10 +52,8 @@ def do_transformation(self): input_map[input_node_name].append(node.name) new_input_node = node_def_pb2.NodeDef() new_input_node.CopyFrom(graph_info[input_node_name].node) - new_input_node.name = input_node_name + '_nc_share_' + str( - len(input_map[input_node_name])) - cur_graph.replace_const_node( - new_input_node, [node.name], input_node_name, False) + new_input_node.name = input_node_name + "_nc_share_" + str(len(input_map[input_node_name])) + cur_graph.replace_const_node(new_input_node, [node.name], input_node_name, False) else: input_map[input_node_name] = [node.name] diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py index bed0d61e4dc..73b2f0ee020 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py @@ -16,14 +16,16 @@ # limitations under the License. """Strip Equivalent Nodes Graph Rewriter.""" +from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def, strip_equivalent_nodes +from neural_compressor.utils import logger from neural_compressor.utils.utility import dump_elapsed_time + from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def -from neural_compressor.adaptor.tf_utils.util import strip_equivalent_nodes -from neural_compressor.utils import logger + class StripEquivalentNodesOptimizer(GraphRewriterBase): """Remove the equivalent nodes which have the same inputs and attributes.""" + def __init__(self, model, output_node_names): """Initilization.""" super().__init__(model) @@ -37,14 +39,12 @@ def do_transformation(self): replaced_nodes_type = True all_replaced_nodes_type = {} while replaced_nodes_type: - self.model, replaced_nodes_type = \ - strip_equivalent_nodes(self.model, self.output_node_names) + self.model, replaced_nodes_type = strip_equivalent_nodes(self.model, self.output_node_names) for k, v in replaced_nodes_type.items(): all_replaced_nodes_type[k] = all_replaced_nodes_type.get(k, 0) + v iter_num += 1 logger.debug( f"StripEquivalentNodes[Iter-{iter_num}]-Replaced equivalent node types are {replaced_nodes_type}" ) - logger.warning("All replaced equivalent node types are {}". \ - format(all_replaced_nodes_type)) + logger.warning("All replaced equivalent node types are {}".format(all_replaced_nodes_type)) return self.model diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py index e145fdac44e..36ee3749f41 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py @@ -20,8 +20,10 @@ from ..graph_base import GraphRewriterBase + class StripUnusedNodesOptimizer(GraphRewriterBase): """Remove the unused nodes in the graph.""" + def __init__(self, model, input_node_names, output_node_names): """Initilization.""" super().__init__(model) @@ -31,9 +33,7 @@ def __init__(self, model, input_node_names, output_node_names): @dump_elapsed_time("Pass StripUnusedNodesOptimizer") def do_transformation(self): """Execute stripping unused nodes.""" - from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def - from neural_compressor.adaptor.tf_utils.util import strip_unused_nodes + from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def, strip_unused_nodes + self.model = fix_ref_type_of_graph_def(self.model) - return strip_unused_nodes(self.model, - self.input_node_names, - self.output_node_names) + return strip_unused_nodes(self.model, self.input_node_names, self.output_node_names) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py index c0c6320bb07..567cabeac74 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py @@ -17,10 +17,12 @@ """Switch Graph Rewriter.""" -from ..graph_base import GraphRewriterBase +from tensorflow.python.framework import tensor_util + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.utils.utility import dump_elapsed_time -from tensorflow.python.framework import tensor_util + +from ..graph_base import GraphRewriterBase class SwitchOptimizer(GraphRewriterBase): @@ -44,18 +46,21 @@ def do_transformation(self): for node_combination in target_nodes: switch_node = graph_info[node_combination[0]].node - pred_node = graph_info[switch_node.input[1]].node - if (pred_node.op == 'Const' and tensor_util.MakeNdarray( \ - graph_info[pred_node.name].node.attr['value'].tensor)) or \ - (pred_node.op == 'PlaceholderWithDefault' and tensor_util.MakeNdarray( - graph_info[pred_node.input[0]].node.attr['value'].tensor)): + pred_node = graph_info[switch_node.input[1]].node + if ( + pred_node.op == "Const" + and tensor_util.MakeNdarray(graph_info[pred_node.name].node.attr["value"].tensor) + ) or ( + pred_node.op == "PlaceholderWithDefault" + and tensor_util.MakeNdarray(graph_info[pred_node.input[0]].node.attr["value"].tensor) + ): condition = [] for output in graph_info[node_combination[0]].outputs: successor_node = graph_info[output].node for index, value in enumerate(successor_node.input): - if value == node_combination[0] + ':1': + if value == node_combination[0] + ":1": condition.append(True) - elif value == node_combination[0] + ':0': + elif value == node_combination[0] + ":0": condition.append(False) if not all(condition): @@ -65,7 +70,7 @@ def do_transformation(self): successor_node = graph_info[output].node replace_index = None for index, value in enumerate(successor_node.input): - if value == node_combination[0] + ':1': + if value == node_combination[0] + ":1": replace_index = index break if not replace_index: @@ -73,8 +78,7 @@ def do_transformation(self): successor_node.input[replace_index] = switch_node.input[0] switch_node_outputs = list(graph_info[node_combination[0]].outputs) if switch_node_outputs.index(output) == len(switch_node_outputs) - 1: - cur_graph.remove_node_with_single_input_output( - node_combination[0]) + cur_graph.remove_node_with_single_input_output(node_combination[0]) else: continue diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py index 03baf811ff3..f33f2fe0030 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py @@ -16,11 +16,11 @@ # limitations under the License. """Graph Rewrite Base Class.""" -from abc import abstractmethod import logging +from abc import abstractmethod -class GraphRewriterBase(): +class GraphRewriterBase: """Graph Rewrite Base class. We abstract this base class and define the interface only. @@ -28,6 +28,7 @@ class GraphRewriterBase(): Args: object (model): the input model to be converted. """ + def __init__(self, model): """Initilization.""" self.model = model diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py index dd97881bc18..c90e7990018 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py @@ -16,14 +16,15 @@ # limitations under the License. """Freeze FakeQuant op Graph Rewriter.""" +from tensorflow.python.framework import dtypes + +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer, GraphRewriterHelper from neural_compressor.utils.utility import dump_elapsed_time from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper -from tensorflow.python.framework import dtypes -class FreezeFakeQuantOpOptimizer(GraphRewriterBase): # pragma: no cover + +class FreezeFakeQuantOpOptimizer(GraphRewriterBase): # pragma: no cover """Freeze fake_quant op to the following Quantize op and prioring Dequantize op.""" def __init__(self, model): @@ -36,14 +37,13 @@ def __init__(self, model): self.graph_info = self.graph_analyzer.parse_graph() self.freeze_patterns = { - str([['Requantize', 'RequantizePerChannel'], ["Dequantize"], \ - ['FakeQuantWithMinMaxVars']]): - self._freeze_requant_dequant_fakequant, - str([['FakeQuantWithMinMaxVars'], ["QuantizeV2"]]): - self._freeze_fakequant_quant, - str([['FakeQuantWithMinMaxVars'], ['Shape'], ['StridedSlice'], \ - ['Pack'], ['Reshape'], ["QuantizeV2"]]): - self._freeze_fakequant_metaop_quant + str( + [["Requantize", "RequantizePerChannel"], ["Dequantize"], ["FakeQuantWithMinMaxVars"]] + ): self._freeze_requant_dequant_fakequant, + str([["FakeQuantWithMinMaxVars"], ["QuantizeV2"]]): self._freeze_fakequant_quant, + str( + [["FakeQuantWithMinMaxVars"], ["Shape"], ["StridedSlice"], ["Pack"], ["Reshape"], ["QuantizeV2"]] + ): self._freeze_fakequant_metaop_quant, } def _freeze_requant_dequant_fakequant(self, pattern_nodes): @@ -56,17 +56,15 @@ def _freeze_requant_dequant_fakequant(self, pattern_nodes): # set the third input "requested_output_min" of RequantizePerChannel or Requantize op. requested_output_min_node = self.graph_info[requant_node.input[3]].node min_node = self.graph_info[fake_quant_node.input[1]].node - GraphRewriterHelper.set_attr_tensor(requested_output_min_node, - "value", - min_node.attr["value"].tensor, - dtypes.float32) + GraphRewriterHelper.set_attr_tensor( + requested_output_min_node, "value", min_node.attr["value"].tensor, dtypes.float32 + ) # set the fourth input "requested_output_max" of RequantizePerChannel or Requantize op. requested_output_max_node = self.graph_info[requant_node.input[4]].node max_node = self.graph_info[fake_quant_node.input[2]].node - GraphRewriterHelper.set_attr_tensor(requested_output_max_node, - "value", - max_node.attr["value"].tensor, - dtypes.float32) + GraphRewriterHelper.set_attr_tensor( + requested_output_max_node, "value", max_node.attr["value"].tensor, dtypes.float32 + ) def _freeze_fakequant_quant(self, pattern_nodes): """Freeze FakeQuant QuantizeV2 fusion.""" @@ -78,17 +76,11 @@ def _freeze_fakequant_quant(self, pattern_nodes): # set the second input "min_range" of QuantizeV2 op. min_node = self.graph_info[fake_quant_node.input[1]].node min_range_node = self.graph_info[quant_node.input[1]].node - GraphRewriterHelper.set_attr_tensor(min_range_node, - "value", - min_node.attr["value"].tensor, - dtypes.float32) + GraphRewriterHelper.set_attr_tensor(min_range_node, "value", min_node.attr["value"].tensor, dtypes.float32) # set the third input "max_range" of QuantizeV2 op. max_node = self.graph_info[fake_quant_node.input[2]].node max_range_node = self.graph_info[quant_node.input[2]].node - GraphRewriterHelper.set_attr_tensor(max_range_node, - "value", - max_node.attr["value"].tensor, - dtypes.float32) + GraphRewriterHelper.set_attr_tensor(max_range_node, "value", max_node.attr["value"].tensor, dtypes.float32) def _freeze_fakequant_metaop_quant(self, pattern_nodes): """Freeze FakeQuant Meta ops QuantizeV2 fusion.""" @@ -100,17 +92,11 @@ def _freeze_fakequant_metaop_quant(self, pattern_nodes): # set the second input "min_range" of QuantizeV2 op. min_node = self.graph_info[fake_quant_node.input[1]].node min_range_node = self.graph_info[quant_node.input[1]].node - GraphRewriterHelper.set_attr_tensor(min_range_node, - "value", - min_node.attr["value"].tensor, - dtypes.float32) + GraphRewriterHelper.set_attr_tensor(min_range_node, "value", min_node.attr["value"].tensor, dtypes.float32) # set the third input "max_range" of QuantizeV2 op. max_node = self.graph_info[fake_quant_node.input[2]].node max_range_node = self.graph_info[quant_node.input[2]].node - GraphRewriterHelper.set_attr_tensor(max_range_node, - "value", - max_node.attr["value"].tensor, - dtypes.float32) + GraphRewriterHelper.set_attr_tensor(max_range_node, "value", max_node.attr["value"].tensor, dtypes.float32) def _remove_all_fake_quants(self): """Remove all the fake quants.""" @@ -118,7 +104,7 @@ def _remove_all_fake_quants(self): for node_name in list(self.graph_info.keys()): node = self.graph_info[node_name].node - if node.op == 'FakeQuantWithMinMaxVars': + if node.op == "FakeQuantWithMinMaxVars": origin_outputs = list(self.graph_info[node_name].outputs) min_node_name = self.graph_info[node.input[1]].node.name max_node_name = self.graph_info[node.input[2]].node.name @@ -129,14 +115,17 @@ def _remove_all_fake_quants(self): for j in origin_outputs[1:]: output_node = self.graph_info[j].node - if len(output_node.input) == 1 and \ - output_node.op == 'Const' and output_node.input[0] == '^' + node.name: - self.graph_info[j].node.ClearField('input') - elif output_node.op == 'NoOp' : + if ( + len(output_node.input) == 1 + and output_node.op == "Const" + and output_node.input[0] == "^" + node.name + ): + self.graph_info[j].node.ClearField("input") + elif output_node.op == "NoOp": new_noop_input = [ - noop_input for noop_input in output_node.input \ - if noop_input != '^' + node.name] - output_node.ClearField('input') + noop_input for noop_input in output_node.input if noop_input != "^" + node.name + ] + output_node.ClearField("input") output_node.input.extend(new_noop_input) # remove those left const nodes used by FakeQuantWithMinMaxVars diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py index 845175d1486..cb689be2f55 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py @@ -16,21 +16,22 @@ # limitations under the License. """Freeze value with calibration Graph Rewriter.""" -from tensorflow.core.framework import node_def_pb2 -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +import re + +import numpy as np +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -import numpy as np -import re +from ..graph_base import GraphRewriterBase + class FreezeValueTransformer(GraphRewriterBase): """Freeze Value with calibration.""" - def __init__(self, model, max_min_data, postfix, tensor_data=None, th=1, device='gpu', itex_mode=False): + + def __init__(self, model, max_min_data, postfix, tensor_data=None, th=1, device="gpu", itex_mode=False): """Free Max/Min value into QuantizeV2 op. Args: @@ -48,8 +49,7 @@ def __init__(self, model, max_min_data, postfix, tensor_data=None, th=1, device= if 0.0 < th <= 1.0: self.threshold = th else: - self.logger.warning("The threshold value for clipping is invalid, " \ - "Reset it to 0.95 by default.") + self.logger.warning("The threshold value for clipping is invalid, " "Reset it to 0.95 by default.") self.threshold = 0.95 self.postfix = postfix self.device = device @@ -72,9 +72,9 @@ def _get_valid_log(self): """ output = [] - target_lines = [i.strip() for i in self.data if i.strip().find(';') != -1] + target_lines = [i.strip() for i in self.data if i.strip().find(";") != -1] for i in target_lines: - semi_count = i.count(';') + semi_count = i.count(";") if semi_count == 2: output.append(i) elif semi_count % 2 != 0: @@ -83,8 +83,8 @@ def _get_valid_log(self): loop_times = int(semi_count / 2) semi_index = [index for index, value in enumerate(i) if value == ";"] for index in range(loop_times - 1): - output.append(i[semi_index[index * 2]:semi_index[index * 2 + 2]]) - output.append(i[semi_index[loop_times * 2 - 2]:]) + output.append(i[semi_index[index * 2] : semi_index[index * 2 + 2]]) + output.append(i[semi_index[loop_times * 2 - 2] :]) return output def _parse_max_min_log(self): @@ -99,13 +99,12 @@ def _parse_max_min_log(self): temp = {} pattern_def = r"{};{}\[\-?\d+\.?\d*e?-?\+?\d*\]".format(print_suffix, self.postfix) for i in lines: - if not re.search(pattern_def, i): continue - max_line_data = i.split(';') - name = max_line_data[1][:-len(print_suffix)] - value = max_line_data[-1].split('[')[-1].split(']')[0] + max_line_data = i.split(";") + name = max_line_data[1][: -len(print_suffix)] + value = max_line_data[-1].split("[")[-1].split("]")[0] if "eightbit" in name and name not in temp: temp[name] = [] if "eightbit" in name: @@ -114,7 +113,7 @@ def _parse_max_min_log(self): target_index = int(len(temp[key]) * self.threshold) if target_index > len(temp[key]) - 1: target_index = len(temp[key]) - 1 - if self.postfix == '__min:': + if self.postfix == "__min:": res[key] = sorted(temp[key], reverse=True)[target_index] else: res[key] = sorted(temp[key])[target_index] @@ -137,9 +136,9 @@ def _parse_requantization_ranges(self): continue max_line_data = i.split(print_suffix + ";" + self.postfix)[-1] - min_value = max_line_data.split('][')[0].split('[')[1] - max_value = max_line_data.split('][')[1].split(']')[0] - name = i.split(';')[1].strip()[:-len(print_suffix)] + min_value = max_line_data.split("][")[0].split("[")[1] + max_value = max_line_data.split("][")[1].split("]")[0] + name = i.split(";")[1].strip()[: -len(print_suffix)] if name not in temp_min: temp_min[name] = [] if name not in temp_max: @@ -185,44 +184,45 @@ def generate_output_graph(self, max_name_value): continue new_node = node_def_pb2.NodeDef() new_node.op = "Const" - new_node_postfix = "/frozen_{}_only".format(''.join( - [x for x in self.postfix if x.isalpha()])) + new_node_postfix = "/frozen_{}_only".format("".join([x for x in self.postfix if x.isalpha()])) new_node.name = node_name + new_node_postfix - new_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + new_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) new_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(float(value), - dtypes.float32, []))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value), dtypes.float32, [])) + ) output_node_name = self.graph_info[node_name].outputs[0] - if not self.itex_mode and node_name in self.cur_graph.parent_frame_details and \ - self.cur_graph.parent_frame_details[node_name]: # pragma: no cover - new_node_enter_node = Helper.create_node( - 'Enter', new_node.name+'_enter', [new_node.name]) - Helper.set_attr_string(new_node_enter_node, - 'frame_name', self.cur_graph.parent_frame_details[node_name].attr['frame_name'].s) - Helper.set_attr_dtype(new_node_enter_node, 'T', dtypes.float32) - Helper.set_attr_bool(new_node_enter_node, 'is_constant', True) - Helper.set_attr_int(new_node_enter_node, 'parallel_iterations', - self.cur_graph.parent_frame_details[node_name].attr['parallel_iterations'].i) + if ( + not self.itex_mode + and node_name in self.cur_graph.parent_frame_details + and self.cur_graph.parent_frame_details[node_name] + ): # pragma: no cover + new_node_enter_node = Helper.create_node("Enter", new_node.name + "_enter", [new_node.name]) + Helper.set_attr_string( + new_node_enter_node, + "frame_name", + self.cur_graph.parent_frame_details[node_name].attr["frame_name"].s, + ) + Helper.set_attr_dtype(new_node_enter_node, "T", dtypes.float32) + Helper.set_attr_bool(new_node_enter_node, "is_constant", True) + Helper.set_attr_int( + new_node_enter_node, + "parallel_iterations", + self.cur_graph.parent_frame_details[node_name].attr["parallel_iterations"].i, + ) self.cur_graph.add_node(new_node, None, [new_node_enter_node.name]) - #self.cur_graph.add_node(new_node_enter_node, new_node.name, + # self.cur_graph.add_node(new_node_enter_node, new_node.name, # [Helper.node_name_from_input(output_node_name)]) - self.cur_graph.replace_const_node(new_node_enter_node, - [Helper.node_name_from_input(output_node_name)], - node_name) + self.cur_graph.replace_const_node( + new_node_enter_node, [Helper.node_name_from_input(output_node_name)], node_name + ) self.cur_graph.remove_node(node_name) else: - self.cur_graph.replace_const_node(new_node, - [Helper.node_name_from_input(output_node_name)], - node_name) + self.cur_graph.replace_const_node(new_node, [Helper.node_name_from_input(output_node_name)], node_name) self.cur_graph.remove_node(node_name) - self.quantizeV2_min_max[node_name] = tensor_util.MakeNdarray( - new_node.attr["value"].tensor - ) + self.quantizeV2_min_max[node_name] = tensor_util.MakeNdarray(new_node.attr["value"].tensor) self.scale_info[self.postfix[:-1]] = self.quantizeV2_min_max return GraphAnalyzer().dump_graph(), self.scale_info @@ -234,26 +234,26 @@ def generate_output_graph_ranges(self, max_name_value): :return: transformed graph """ for node_name, value in max_name_value.items(): - bn_node_name = node_name.replace('eightbit_requant_range', 'eightbit_quantized_bn') - in_node_name = node_name.replace('eightbit_requant_range', 'eightbit_quantized_in') - if not self.graph_info.get(bn_node_name) or \ - not bn_node_name.endswith('_eightbit_quantized_bn'): + bn_node_name = node_name.replace("eightbit_requant_range", "eightbit_quantized_bn") + in_node_name = node_name.replace("eightbit_requant_range", "eightbit_quantized_in") + if not self.graph_info.get(bn_node_name) or not bn_node_name.endswith("_eightbit_quantized_bn"): bn_node_name = None - if not self.graph_info.get(in_node_name) or \ - not in_node_name.endswith('_eightbit_quantized_in'): + if not self.graph_info.get(in_node_name) or not in_node_name.endswith("_eightbit_quantized_in"): in_node_name = None - if self.itex_mode and 'BatchNorm' in node_name: - bn_node_name = node_name[:-len("_eightbit_requant_range")] + if self.itex_mode and "BatchNorm" in node_name: + bn_node_name = node_name[: -len("_eightbit_requant_range")] if bn_node_name not in self.graph_info: bn_node_name = None else: - if 'FusedBatchNorm' not in self.graph_info[bn_node_name].node.op: + if "FusedBatchNorm" not in self.graph_info[bn_node_name].node.op: bn_node_name = None - if node_name not in self.graph_info \ - and bn_node_name not in self.graph_info \ - and in_node_name not in self.graph_info: + if ( + node_name not in self.graph_info + and bn_node_name not in self.graph_info + and in_node_name not in self.graph_info + ): continue min_node = node_def_pb2.NodeDef() @@ -265,12 +265,10 @@ def generate_output_graph_ranges(self, max_name_value): min_node.name = in_node_name + "/frozen_in_output_min" else: min_node.name = node_name + min_node_postfix - min_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + min_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) min_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(float(value[0]), - dtypes.float32, []))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[0]), dtypes.float32, [])) + ) max_node = node_def_pb2.NodeDef() max_node.op = "Const" @@ -281,100 +279,99 @@ def generate_output_graph_ranges(self, max_name_value): max_node.name = in_node_name + "/frozen_in_output_max" else: max_node.name = node_name + max_node_postfix - max_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + max_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) max_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(float(value[1]), - dtypes.float32, []))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[1]), dtypes.float32, [])) + ) if bn_node_name: if self.itex_mode: self.cur_graph.replace_const_node( min_node, - [Helper.node_name_from_input(bn_node_name+'_eightbit_quantize_bn')], - bn_node_name + '_eightbit_input7_output_min' + [Helper.node_name_from_input(bn_node_name + "_eightbit_quantize_bn")], + bn_node_name + "_eightbit_input7_output_min", ) self.cur_graph.replace_const_node( max_node, - [Helper.node_name_from_input(bn_node_name+'_eightbit_quantize_bn')], - bn_node_name + '_eightbit_input8_output_max' + [Helper.node_name_from_input(bn_node_name + "_eightbit_quantize_bn")], + bn_node_name + "_eightbit_input8_output_max", ) else: self.cur_graph.replace_const_node( - min_node, - [Helper.node_name_from_input(bn_node_name)], - bn_node_name + '_input7_output_min' + min_node, [Helper.node_name_from_input(bn_node_name)], bn_node_name + "_input7_output_min" ) self.cur_graph.replace_const_node( - max_node, - [Helper.node_name_from_input(bn_node_name)], - bn_node_name + '_input8_output_max' + max_node, [Helper.node_name_from_input(bn_node_name)], bn_node_name + "_input8_output_max" ) elif in_node_name: self.cur_graph.replace_const_node( - min_node, - [Helper.node_name_from_input(in_node_name)], - in_node_name + '_input7_output_min' + min_node, [Helper.node_name_from_input(in_node_name)], in_node_name + "_input7_output_min" ) self.cur_graph.replace_const_node( - max_node, - [Helper.node_name_from_input(in_node_name)], - in_node_name + '_input8_output_max' + max_node, [Helper.node_name_from_input(in_node_name)], in_node_name + "_input8_output_max" ) - elif not self.itex_mode and node_name in self.cur_graph.parent_frame_details and \ - self.cur_graph.parent_frame_details[node_name]: # pragma: no cover + elif ( + not self.itex_mode + and node_name in self.cur_graph.parent_frame_details + and self.cur_graph.parent_frame_details[node_name] + ): # pragma: no cover output_node_name = self.graph_info[node_name].outputs[0] - min_node_enter_node = Helper.create_node( - 'Enter', min_node.name+'_enter', [min_node.name]) - Helper.set_attr_string(min_node_enter_node, - 'frame_name', self.cur_graph.parent_frame_details[node_name].attr['frame_name'].s) - Helper.set_attr_dtype(min_node_enter_node, 'T', dtypes.float32) - Helper.set_attr_bool(min_node_enter_node, 'is_constant', True) - Helper.set_attr_int(min_node_enter_node, 'parallel_iterations', - self.cur_graph.parent_frame_details[node_name].attr['parallel_iterations'].i) + min_node_enter_node = Helper.create_node("Enter", min_node.name + "_enter", [min_node.name]) + Helper.set_attr_string( + min_node_enter_node, + "frame_name", + self.cur_graph.parent_frame_details[node_name].attr["frame_name"].s, + ) + Helper.set_attr_dtype(min_node_enter_node, "T", dtypes.float32) + Helper.set_attr_bool(min_node_enter_node, "is_constant", True) + Helper.set_attr_int( + min_node_enter_node, + "parallel_iterations", + self.cur_graph.parent_frame_details[node_name].attr["parallel_iterations"].i, + ) self.cur_graph.add_node(min_node, None, [min_node_enter_node.name]) - #self.cur_graph.add_node(min_node_enter_node, min_node.name, + # self.cur_graph.add_node(min_node_enter_node, min_node.name, # [Helper.node_name_from_input(output_node_name)]) - self.cur_graph.replace_const_node(min_node_enter_node, - [Helper.node_name_from_input(output_node_name)], - node_name + ':0') - - max_node_enter_node = Helper.create_node( - 'Enter', max_node.name+'_enter', [max_node.name]) - Helper.set_attr_string(max_node_enter_node, - 'frame_name', self.cur_graph.parent_frame_details[node_name].attr['frame_name'].s) - Helper.set_attr_dtype(max_node_enter_node, 'T', dtypes.float32) - Helper.set_attr_bool(max_node_enter_node, 'is_constant', True) - Helper.set_attr_int(max_node_enter_node, 'parallel_iterations', - self.cur_graph.parent_frame_details[node_name].attr['parallel_iterations'].i) + self.cur_graph.replace_const_node( + min_node_enter_node, [Helper.node_name_from_input(output_node_name)], node_name + ":0" + ) + + max_node_enter_node = Helper.create_node("Enter", max_node.name + "_enter", [max_node.name]) + Helper.set_attr_string( + max_node_enter_node, + "frame_name", + self.cur_graph.parent_frame_details[node_name].attr["frame_name"].s, + ) + Helper.set_attr_dtype(max_node_enter_node, "T", dtypes.float32) + Helper.set_attr_bool(max_node_enter_node, "is_constant", True) + Helper.set_attr_int( + max_node_enter_node, + "parallel_iterations", + self.cur_graph.parent_frame_details[node_name].attr["parallel_iterations"].i, + ) self.cur_graph.add_node(max_node, None, [max_node_enter_node.name]) - #self.cur_graph.add_node(max_node_enter_node, max_node.name, + # self.cur_graph.add_node(max_node_enter_node, max_node.name, # [Helper.node_name_from_input(output_node_name)]) - self.cur_graph.replace_const_node(max_node_enter_node, - [Helper.node_name_from_input(output_node_name)], - node_name + ':1') + self.cur_graph.replace_const_node( + max_node_enter_node, [Helper.node_name_from_input(output_node_name)], node_name + ":1" + ) self.cur_graph.remove_node(node_name) else: output_node_name = self.graph_info[node_name].outputs[0] self.cur_graph.replace_const_node( - min_node, - [Helper.node_name_from_input(output_node_name)], - node_name + ':0' + min_node, [Helper.node_name_from_input(output_node_name)], node_name + ":0" ) self.cur_graph.replace_const_node( - max_node, - [Helper.node_name_from_input(output_node_name)], - node_name + ':1' + max_node, [Helper.node_name_from_input(output_node_name)], node_name + ":1" ) self.cur_graph.remove_node(node_name) self.requant_min_max[node_name] = [ tensor_util.MakeNdarray(min_node.attr["value"].tensor), - tensor_util.MakeNdarray(max_node.attr["value"].tensor) + tensor_util.MakeNdarray(max_node.attr["value"].tensor), ] self.scale_info[self.postfix] = self.requant_min_max @@ -390,7 +387,7 @@ def generate_output_graph_ranges(self, max_name_value): def do_transformation(self): """Apply the transformation of freeze value.""" - if self.postfix == '__requant_min_max': + if self.postfix == "__requant_min_max": range_data = self._parse_requantization_ranges() return self.generate_output_graph_ranges(range_data) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py index 75af017f5f2..b5ff37f51ce 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py @@ -16,18 +16,19 @@ # limitations under the License. """Freeze Value without calibration Graph Rewriter.""" -from tensorflow.core.framework import node_def_pb2 -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from ..graph_base import GraphRewriterBase + + class FreezeValueWithoutCalibTransformer(GraphRewriterBase): """Freeze value without calibration.""" - def __init__(self, model, max_min_data, postfix, th=0.95, device='gpu'): + + def __init__(self, model, max_min_data, postfix, th=0.95, device="gpu"): """Free Max/Min value into QuantizeV2 op. Args: @@ -42,8 +43,7 @@ def __init__(self, model, max_min_data, postfix, th=0.95, device='gpu'): if 0.0 < th <= 1.0: self.threshold = th else: - self.logger.warning("The threshold value for clipping is invalid, " \ - "Reset it to 0.95 by default.") + self.logger.warning("The threshold value for clipping is invalid, " "Reset it to 0.95 by default.") self.threshold = 0.95 self.postfix = postfix self.device = device @@ -64,19 +64,14 @@ def generate_output_graph(self, max_name_value): continue new_node = node_def_pb2.NodeDef() new_node.op = "Const" - new_node_postfix = "/frozen_{}_only".format(''.join( - [x for x in self.postfix if x.isalpha()])) + new_node_postfix = "/frozen_{}_only".format("".join([x for x in self.postfix if x.isalpha()])) new_node.name = node_name + new_node_postfix - new_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + new_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) new_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(float(value), - dtypes.float32, []))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value), dtypes.float32, [])) + ) output_node_name = self.graph_info[node_name].outputs[0] - self.cur_graph.replace_const_node(new_node, - [Helper.node_name_from_input(output_node_name)], - node_name) + self.cur_graph.replace_const_node(new_node, [Helper.node_name_from_input(output_node_name)], node_name) self.cur_graph.remove_node(node_name) return GraphAnalyzer().dump_graph() @@ -95,39 +90,34 @@ def generate_output_graph_ranges(self, max_name_value): min_node.op = "Const" min_node_postfix = "/frozen_min" min_node.name = node_name + min_node_postfix - min_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + min_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) min_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(float(value[0]), - dtypes.float32, []))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[0]), dtypes.float32, [])) + ) max_node = node_def_pb2.NodeDef() max_node.op = "Const" max_node_postfix = "/frozen_max" max_node.name = node_name + max_node_postfix - max_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + max_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) max_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(float(value[1]), - dtypes.float32, []))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value[1]), dtypes.float32, [])) + ) output_node_name = self.graph_info[node_name].outputs[0] - self.cur_graph.replace_const_node(min_node, - [Helper.node_name_from_input(output_node_name)], - node_name + ':0') - self.cur_graph.replace_const_node(max_node, - [Helper.node_name_from_input(output_node_name)], - node_name + ':1') + self.cur_graph.replace_const_node( + min_node, [Helper.node_name_from_input(output_node_name)], node_name + ":0" + ) + self.cur_graph.replace_const_node( + max_node, [Helper.node_name_from_input(output_node_name)], node_name + ":1" + ) self.cur_graph.remove_node(node_name) return GraphAnalyzer().dump_graph() def do_transformation_without_calib(self): """Apply transformation without calibration.""" - if self.postfix == '__requant_min_max': + if self.postfix == "__requant_min_max": range_data = self.data[self.postfix] return self.generate_output_graph_ranges(range_data) max_name_value = self.data[self.postfix] return self.generate_output_graph(max_name_value) - diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py index f782761a306..e60dd638496 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py @@ -16,33 +16,38 @@ # limitations under the License. """Fuse QuantizedConv QuantizedDeConv with redundant Dequantize Graph Rewriter.""" -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 from tensorflow.python.framework import dtypes -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from ..graph_base import GraphRewriterBase + + class FuseConvRedundantDequantizeTransformer(GraphRewriterBase): """Fuse _QuantizedConv/_QuantizedDeConv with the successor Dequantize Op.""" - fuse_patterns = [[ - "_FusedQuantizedConv3D", - "_FusedQuantizedConv2D", - "_FusedQuantizedDepthwiseConv2D", - "_FusedQuantizedDeconv2D", - "_FusedQuantizedDeconv3D" - ], ['Dequantize']] + + fuse_patterns = [ + [ + "_FusedQuantizedConv3D", + "_FusedQuantizedConv2D", + "_FusedQuantizedDepthwiseConv2D", + "_FusedQuantizedDeconv2D", + "_FusedQuantizedDeconv3D", + ], + ["Dequantize"], + ] fuse_sum_op_types_str = ( - str([b'BiasAdd', b'Sum', b'Requantize']), - str([b'BiasAdd', b'Sum', b'Relu', b'Requantize']), - str([b'BiasAdd', b'Sum', b'LeakyRelu', b'Requantize']), - str([b'BiasAdd', b'Relu', b'Sum', b'Requantize']), - str([b'BiasAdd', b'LeakyRelu', b'Sum', b'Requantize']) - ) - - def __init__(self, model, device='cpu'): + str([b"BiasAdd", b"Sum", b"Requantize"]), + str([b"BiasAdd", b"Sum", b"Relu", b"Requantize"]), + str([b"BiasAdd", b"Sum", b"LeakyRelu", b"Requantize"]), + str([b"BiasAdd", b"Relu", b"Sum", b"Requantize"]), + str([b"BiasAdd", b"LeakyRelu", b"Sum", b"Requantize"]), + ) + + def __init__(self, model, device="cpu"): """Initilization.""" super().__init__(model) self.device = device @@ -63,7 +68,7 @@ def do_transformation(self): dtypes.quint8.as_datatype_enum: dtypes.quint8, dtypes.float32.as_datatype_enum: dtypes.float32, dtypes.qint32.as_datatype_enum: dtypes.qint32, - dtypes.bfloat16.as_datatype_enum: dtypes.bfloat16 + dtypes.bfloat16.as_datatype_enum: dtypes.bfloat16, } target_nodes = self.graph_analyzer.query_fusion_pattern_nodes(self.fuse_patterns) @@ -78,62 +83,67 @@ def do_transformation(self): # QuantizedConv doesn't support {"BiasAdd", "Sum", "Activation", "Dequantize"}, # {"BiasAdd", "Activation", "Sum", "Dequantize"} and {"BiasAdd", "Sum", "Dequantize"} - if str(quantized_node.attr['fused_ops'].list.s) in self.fuse_sum_op_types_str: + if str(quantized_node.attr["fused_ops"].list.s) in self.fuse_sum_op_types_str: continue new_node = node_def_pb2.NodeDef() new_node.op = quantized_node.op - fused_ops = str(quantized_node.attr['fused_ops'].list.s).replace("Requantize", "Dequantize") - new_node.name = quantized_node.name + '_dequantize' + fused_ops = str(quantized_node.attr["fused_ops"].list.s).replace("Requantize", "Dequantize") + new_node.name = quantized_node.name + "_dequantize" for _, value in enumerate(quantized_node.input): new_node.input.append(value) - if 'Tinput' in quantized_node.attr: - new_node.attr["Tinput"].CopyFrom(quantized_node.attr['Tinput']) - if 'Tfilter' in quantized_node.attr: - new_node.attr["Tfilter"].CopyFrom(quantized_node.attr['Tfilter']) - if 'strides' in quantized_node.attr: - new_node.attr["strides"].CopyFrom(quantized_node.attr['strides']) - if 'padding' in quantized_node.attr: - new_node.attr["padding"].CopyFrom(quantized_node.attr['padding']) - if 'alpha' in quantized_node.attr: - new_node.attr["alpha"].CopyFrom(quantized_node.attr['alpha']) - if 'Tbias' in quantized_node.attr: - new_node.attr["Tbias"].CopyFrom(quantized_node.attr['Tbias']) - if 'data_format' in quantized_node.attr: - new_node.attr["data_format"].CopyFrom(quantized_node.attr['data_format']) - if 'is_filter_const' in quantized_node.attr: - new_node.attr["is_filter_const"].CopyFrom(quantized_node.attr['is_filter_const']) - if 'is_bias_const' in quantized_node.attr: - new_node.attr["is_bias_const"].CopyFrom(quantized_node.attr['is_bias_const']) - if 'dilations' in quantized_node.attr: - new_node.attr["dilations"].CopyFrom(quantized_node.attr['dilations']) - if 'explicit_paddings' in quantized_node.attr: - new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr['explicit_paddings']) - if 'Tdevice_inputs' in quantized_node.attr: - new_node.attr["Tdevice_inputs"].CopyFrom(quantized_node.attr['Tdevice_inputs']) - if 'Tdevice_outputs' in quantized_node.attr: - new_node.attr["Tdevice_outputs"].CopyFrom(quantized_node.attr['Tdevice_outputs']) - if 'Thost_inputs' in quantized_node.attr: - new_node.attr["Thost_inputs"].CopyFrom(quantized_node.attr['Thost_inputs']) - Helper.set_attr_type_list(new_node, 'Thost_outputs', [dequantize_node.attr['dtype'].type]) - new_node.attr["out_type"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr['dtype'].type)) - Helper.set_attr_string_list(new_node, 'fused_ops', eval(fused_ops)) - if 'Tsummand' in quantized_node.attr: - Helper.set_attr_dtype(new_node, "Tsummand", dtype_map_dict[dequantize_node.attr['dtype'].type]) - + if "Tinput" in quantized_node.attr: + new_node.attr["Tinput"].CopyFrom(quantized_node.attr["Tinput"]) + if "Tfilter" in quantized_node.attr: + new_node.attr["Tfilter"].CopyFrom(quantized_node.attr["Tfilter"]) + if "strides" in quantized_node.attr: + new_node.attr["strides"].CopyFrom(quantized_node.attr["strides"]) + if "padding" in quantized_node.attr: + new_node.attr["padding"].CopyFrom(quantized_node.attr["padding"]) + if "alpha" in quantized_node.attr: + new_node.attr["alpha"].CopyFrom(quantized_node.attr["alpha"]) + if "Tbias" in quantized_node.attr: + new_node.attr["Tbias"].CopyFrom(quantized_node.attr["Tbias"]) + if "data_format" in quantized_node.attr: + new_node.attr["data_format"].CopyFrom(quantized_node.attr["data_format"]) + if "is_filter_const" in quantized_node.attr: + new_node.attr["is_filter_const"].CopyFrom(quantized_node.attr["is_filter_const"]) + if "is_bias_const" in quantized_node.attr: + new_node.attr["is_bias_const"].CopyFrom(quantized_node.attr["is_bias_const"]) + if "dilations" in quantized_node.attr: + new_node.attr["dilations"].CopyFrom(quantized_node.attr["dilations"]) + if "explicit_paddings" in quantized_node.attr: + new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr["explicit_paddings"]) + if "Tdevice_inputs" in quantized_node.attr: + new_node.attr["Tdevice_inputs"].CopyFrom(quantized_node.attr["Tdevice_inputs"]) + if "Tdevice_outputs" in quantized_node.attr: + new_node.attr["Tdevice_outputs"].CopyFrom(quantized_node.attr["Tdevice_outputs"]) + if "Thost_inputs" in quantized_node.attr: + new_node.attr["Thost_inputs"].CopyFrom(quantized_node.attr["Thost_inputs"]) + Helper.set_attr_type_list(new_node, "Thost_outputs", [dequantize_node.attr["dtype"].type]) + new_node.attr["out_type"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr["dtype"].type)) + Helper.set_attr_string_list(new_node, "fused_ops", eval(fused_ops)) + if "Tsummand" in quantized_node.attr: + Helper.set_attr_dtype(new_node, "Tsummand", dtype_map_dict[dequantize_node.attr["dtype"].type]) + top_node_name = Helper.node_name_from_input(quantized_node.input[0]) if self.graph_info[dequantize_node_name].outputs: self.graph_analyzer.replace_single_node( - new_node, [top_node_name], quantized_node_name, - self.graph_info[dequantize_node_name].outputs, dequantize_node_name) + new_node, + [top_node_name], + quantized_node_name, + self.graph_info[dequantize_node_name].outputs, + dequantize_node_name, + ) self.graph_analyzer.remove_node(dequantize_node_name) else: self.graph_analyzer.remove_node(dequantize_node_name) new_node.name = dequantize_node_name self.graph_analyzer.replace_single_node( - new_node, [top_node_name], quantized_node_name, [], dequantize_node_name) + new_node, [top_node_name], quantized_node_name, [], dequantize_node_name + ) self.graph_analyzer.remove_node(quantized_node_name) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py index ebb59ddcd26..ad0a3a27ca9 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py @@ -17,42 +17,54 @@ """Fuse QuantizedConv Requantize/Dequantize Graph Rewriter.""" import tensorflow as tf -from tensorflow.python.framework import tensor_util -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import dtypes +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from ..graph_base import GraphRewriterBase + + class FuseConvRequantizeTransformer(GraphRewriterBase): """Fuse Quantized Conv Op with the successor Requantize Op.""" - fuse_patterns = [[ - "QuantizedConv2DWithBiasAndRelu", - "QuantizedDepthwiseConv2DWithBiasAndRelu", - "QuantizedConv2DWithBias", - "QuantizedDepthwiseConv2DWithBias", - "_FusedQuantizedConv2D", - "_FusedQuantizedDepthwiseConv2D", - "_FusedQuantizedConv3D", - "_FusedQuantizedDeconv2D", - "_FusedQuantizedDeconv3D" - ], ['RequantizePerChannel', 'Requantize'], ('Dequantize',)] - + + fuse_patterns = [ + [ + "QuantizedConv2DWithBiasAndRelu", + "QuantizedDepthwiseConv2DWithBiasAndRelu", + "QuantizedConv2DWithBias", + "QuantizedDepthwiseConv2DWithBias", + "_FusedQuantizedConv2D", + "_FusedQuantizedDepthwiseConv2D", + "_FusedQuantizedConv3D", + "_FusedQuantizedDeconv2D", + "_FusedQuantizedDeconv3D", + ], + ["RequantizePerChannel", "Requantize"], + ("Dequantize",), + ] + fuse_sum_op_types = ( - [b'BiasAdd', b'Sum'], - [b'BiasAdd', b'Sum', b'Relu'], - [b'BiasAdd', b'Sum', b'LeakyRelu'], - [b'BiasAdd', b'Relu', b'Sum'], - [b'BiasAdd', b'LeakyRelu', b'Sum'] - ) - - sum_pattern = [["QuantizedConv2DWithBiasSumAndRelu", "QuantizedConv2DWithBiasReluAndSum", \ - "_FusedQuantizedDepthwiseConv2D", "_FusedQuantizedConv2D", "_FusedQuantizedConv3D"], - ['RequantizePerChannel', 'Requantize']] - - def __init__(self, model, device='cpu', new_api=False): + [b"BiasAdd", b"Sum"], + [b"BiasAdd", b"Sum", b"Relu"], + [b"BiasAdd", b"Sum", b"LeakyRelu"], + [b"BiasAdd", b"Relu", b"Sum"], + [b"BiasAdd", b"LeakyRelu", b"Sum"], + ) + + sum_pattern = [ + [ + "QuantizedConv2DWithBiasSumAndRelu", + "QuantizedConv2DWithBiasReluAndSum", + "_FusedQuantizedDepthwiseConv2D", + "_FusedQuantizedConv2D", + "_FusedQuantizedConv3D", + ], + ["RequantizePerChannel", "Requantize"], + ] + + def __init__(self, model, device="cpu", new_api=False): """Initilization.""" super().__init__(model) self.device = device @@ -80,7 +92,7 @@ def do_transformation(self): dtypes.qint8.as_datatype_enum: dtypes.qint8, dtypes.quint8.as_datatype_enum: dtypes.quint8, dtypes.float32.as_datatype_enum: dtypes.float32, - dtypes.qint32.as_datatype_enum: dtypes.qint32 + dtypes.qint32.as_datatype_enum: dtypes.qint32, } target_nodes = self.graph_analyzer.query_fusion_pattern_nodes(self.fuse_patterns) @@ -89,17 +101,31 @@ def do_transformation(self): quantized_node = self.graph_info[quantized_node_name].node if not self.new_api and quantized_node.op == "QuantizedDepthwiseConv2DWithBias": continue - if i[-1][0] in ('_FusedQuantizedDepthwiseConv2D', '_FusedQuantizedConv2D', '_FusedQuantizedConv3D', \ - "_FusedQuantizedDeconv2D", "_FusedQuantizedDeconv3D"): - if str(quantized_node.attr['fused_ops'].list.s).find('Sum') != -1: + if i[-1][0] in ( + "_FusedQuantizedDepthwiseConv2D", + "_FusedQuantizedConv2D", + "_FusedQuantizedConv3D", + "_FusedQuantizedDeconv2D", + "_FusedQuantizedDeconv3D", + ): + if str(quantized_node.attr["fused_ops"].list.s).find("Sum") != -1: continue - #else: + # else: # print(quantized_node.attr['fused_ops'].list.s) requantize_node_name = i[1] requantize_node = self.graph_info[requantize_node_name].node - if i[-1][-1] == 'Dequantize' and self.new_api and \ - i[0] in ('_FusedQuantizedDepthwiseConv2D', '_FusedQuantizedConv2D', '_FusedQuantizedConv3D', \ - "_FusedQuantizedDeconv2D", "_FusedQuantizedDeconv3D"): + if ( + i[-1][-1] == "Dequantize" + and self.new_api + and i[0] + in ( + "_FusedQuantizedDepthwiseConv2D", + "_FusedQuantizedConv2D", + "_FusedQuantizedConv3D", + "_FusedQuantizedDeconv2D", + "_FusedQuantizedDeconv3D", + ) + ): dequantize_node_name = i[2] else: dequantize_node_name = None @@ -110,43 +136,47 @@ def do_transformation(self): new_node = node_def_pb2.NodeDef() if self.new_api: - if i[-1][0] == 'QuantizedConv2DWithBiasAndRelu': - new_node.op = '_FusedQuantizedConv2D' - self.fused_ops= [b"BiasAdd", b"Relu", b"Requantize"] - self.output_types= [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum] - elif i[-1][0] == 'QuantizedConv2DWithBias': - new_node.op = '_FusedQuantizedConv2D' + if i[-1][0] == "QuantizedConv2DWithBiasAndRelu": + new_node.op = "_FusedQuantizedConv2D" + self.fused_ops = [b"BiasAdd", b"Relu", b"Requantize"] + self.output_types = [ + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ] + elif i[-1][0] == "QuantizedConv2DWithBias": + new_node.op = "_FusedQuantizedConv2D" self.fused_ops = [b"BiasAdd", b"Requantize"] self.output_types = [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum] - elif i[-1][0] == 'QuantizedDepthwiseConv2DWithBias': - new_node.op = '_FusedQuantizedDepthwiseConv2D' + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ] + elif i[-1][0] == "QuantizedDepthwiseConv2DWithBias": + new_node.op = "_FusedQuantizedDepthwiseConv2D" self.fused_ops = [b"BiasAdd", b"Requantize"] self.output_types = [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum] - elif i[-1][0] == 'QuantizedDepthwiseConv2DWithBiasAndRelu': - new_node.op = '_FusedQuantizedDepthwiseConv2D' - self.fused_ops= [b"BiasAdd", b"Relu", b"Requantize"] - self.output_types= [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum] - elif quantized_node_op == '_FusedQuantizedConv2D': - new_node.op = '_FusedQuantizedConv2D' - elif quantized_node_op == '_FusedQuantizedDepthwiseConv2D': - new_node.op = '_FusedQuantizedDepthwiseConv2D' - elif quantized_node_op == '_FusedQuantizedConv3D': - new_node.op = '_FusedQuantizedConv3D' - elif quantized_node_op == '_FusedQuantizedDeconv2D': + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ] + elif i[-1][0] == "QuantizedDepthwiseConv2DWithBiasAndRelu": + new_node.op = "_FusedQuantizedDepthwiseConv2D" + self.fused_ops = [b"BiasAdd", b"Relu", b"Requantize"] + self.output_types = [ + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ] + elif quantized_node_op == "_FusedQuantizedConv2D": + new_node.op = "_FusedQuantizedConv2D" + elif quantized_node_op == "_FusedQuantizedDepthwiseConv2D": + new_node.op = "_FusedQuantizedDepthwiseConv2D" + elif quantized_node_op == "_FusedQuantizedConv3D": + new_node.op = "_FusedQuantizedConv3D" + elif quantized_node_op == "_FusedQuantizedDeconv2D": new_node.op = "_FusedQuantizedDeconv2D" - elif quantized_node_op == '_FusedQuantizedDeconv3D': + elif quantized_node_op == "_FusedQuantizedDeconv3D": new_node.op = "_FusedQuantizedDeconv3D" else: new_node.op = quantized_node_op + "AndRequantize" @@ -154,23 +184,23 @@ def do_transformation(self): for _, value in enumerate(quantized_node.input): new_node.input.append(value) - if 'Tinput' in quantized_node.attr: - new_node.attr["Tinput"].CopyFrom(quantized_node.attr['Tinput']) - if 'Tfilter' in quantized_node.attr: - new_node.attr["Tfilter"].CopyFrom(quantized_node.attr['Tfilter']) - if 'strides' in quantized_node.attr: - new_node.attr["strides"].CopyFrom(quantized_node.attr['strides']) - if 'padding' in quantized_node.attr: - new_node.attr["padding"].CopyFrom(quantized_node.attr['padding']) - if 'alpha' in quantized_node.attr: - new_node.attr["alpha"].CopyFrom(quantized_node.attr['alpha']) - if 'Tsummand' in quantized_node.attr: - new_node.attr["Tsummand"].CopyFrom(quantized_node.attr['Tsummand']) - if 'data_format' in quantized_node.attr: - new_node.attr["data_format"].CopyFrom(quantized_node.attr['data_format']) + if "Tinput" in quantized_node.attr: + new_node.attr["Tinput"].CopyFrom(quantized_node.attr["Tinput"]) + if "Tfilter" in quantized_node.attr: + new_node.attr["Tfilter"].CopyFrom(quantized_node.attr["Tfilter"]) + if "strides" in quantized_node.attr: + new_node.attr["strides"].CopyFrom(quantized_node.attr["strides"]) + if "padding" in quantized_node.attr: + new_node.attr["padding"].CopyFrom(quantized_node.attr["padding"]) + if "alpha" in quantized_node.attr: + new_node.attr["alpha"].CopyFrom(quantized_node.attr["alpha"]) + if "Tsummand" in quantized_node.attr: + new_node.attr["Tsummand"].CopyFrom(quantized_node.attr["Tsummand"]) + if "data_format" in quantized_node.attr: + new_node.attr["data_format"].CopyFrom(quantized_node.attr["data_format"]) parent_node_name = Helper.node_name_from_input(quantized_node.input[0]) - if new_node.op in ('_FusedQuantizedDeconv2D', '_FusedQuantizedDeconv3D'): + if new_node.op in ("_FusedQuantizedDeconv2D", "_FusedQuantizedDeconv3D"): max_filter_node = self.graph_info[new_node.input[-3]].node min_filter_node = self.graph_info[new_node.input[-4]].node else: @@ -180,318 +210,448 @@ def do_transformation(self): new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) - if (last_node.op.find('Requantize') != -1 or \ - ((last_node.op.find('QuantizeV2') != -1 or \ - last_node.op.find('QuantizedConv2D') != -1))) and \ - len(quantized_node.attr['fused_ops'].list.s) > 0: + if ( + last_node.op.find("Requantize") != -1 + or ((last_node.op.find("QuantizeV2") != -1 or last_node.op.find("QuantizedConv2D") != -1)) + ) and len(quantized_node.attr["fused_ops"].list.s) > 0: bias_node = self.graph_info[new_node.input[2]].node max_input_node = self.graph_info[last_node.input[-1]].node min_input_node = self.graph_info[last_node.input[-2]].node - min_input = (min_input_node.attr['value'].tensor.float_val)[0] - max_input = (max_input_node.attr['value'].tensor.float_val)[0] - if 'Depthwise' in quantized_node_op or requantize_node.op.find('PerChannel') != -1: - channel_size = max_filter_node.attr['value'].tensor.tensor_shape.dim[0].size - max_filter_tensor = tensor_util.MakeNdarray( - min_filter_node.attr['value'].tensor) - min_filter_tensor = tensor_util.MakeNdarray( - min_filter_node.attr['value'].tensor) + min_input = (min_input_node.attr["value"].tensor.float_val)[0] + max_input = (max_input_node.attr["value"].tensor.float_val)[0] + if "Depthwise" in quantized_node_op or requantize_node.op.find("PerChannel") != -1: + channel_size = max_filter_node.attr["value"].tensor.tensor_shape.dim[0].size + max_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) + min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) else: channel_size = 1 max_filter_tensor = [] min_filter_tensor = [] - max_filter_tensor.append((max_filter_node.attr['value'].tensor.float_val)[0]) - min_filter_tensor.append((min_filter_node.attr['value'].tensor.float_val)[0]) - bias_tensor = tensor_util.MakeNdarray( - self.graph_info[new_node.input[2]].node.attr['value'].tensor) + max_filter_tensor.append((max_filter_node.attr["value"].tensor.float_val)[0]) + min_filter_tensor.append((min_filter_node.attr["value"].tensor.float_val)[0]) + bias_tensor = tensor_util.MakeNdarray(self.graph_info[new_node.input[2]].node.attr["value"].tensor) activation_range = 127.0 if new_node.attr["Tinput"].type == dtypes.qint8 else 255.0 int32_bias = Helper.generate_int32_bias_for_conv( - bias_tensor, channel_size, max_input, min_input, - max_filter_tensor, min_filter_tensor, activation_range) - - bias_node.attr['dtype'].CopyFrom( + bias_tensor, + channel_size, + max_input, + min_input, + max_filter_tensor, + min_filter_tensor, + activation_range, + ) + + bias_node.attr["dtype"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) + + bias_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - type=float32_type if self.device == 'gpu' else qint32_type)) - - bias_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_tensor if self.device == 'gpu' else int32_bias, dtypes. - float32 if self.device == 'gpu' else dtypes.int32, bias_tensor.shape))) - - bias_node.attr['value'].tensor.dtype = float32_type \ - if self.device == 'gpu' else qint32_type - new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ - if self.device == 'gpu' else qint32_type)) + tensor=tensor_util.make_tensor_proto( + bias_tensor if self.device == "gpu" else int32_bias, + dtypes.float32 if self.device == "gpu" else dtypes.int32, + bias_tensor.shape, + ) + ) + ) + + bias_node.attr["value"].tensor.dtype = float32_type if self.device == "gpu" else qint32_type + new_node.attr["Tbias"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) else: new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type)) # in tf 2.10, the "padding_list" attr name changes to explicit_paddings if "padding_list" in quantized_node.attr: if not self.new_api: - new_node.attr["padding_list"].CopyFrom(quantized_node.attr['padding_list']) + new_node.attr["padding_list"].CopyFrom(quantized_node.attr["padding_list"]) elif quantized_node.attr["padding"].s == b"EXPLICIT": - new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr['padding_list']) + new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr["padding_list"]) elif "explicit_paddings" in quantized_node.attr: - new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr['explicit_paddings']) + new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr["explicit_paddings"]) if "dilations" in quantized_node.attr: - new_node.attr["dilations"].CopyFrom(quantized_node.attr['dilations']) - - if self.new_api and new_node.op in ('_FusedQuantizedConv2D', '_FusedQuantizedDepthwiseConv2D', \ - '_FusedQuantizedDeconv2D', '_FusedQuantizedDeconv3D'): + new_node.attr["dilations"].CopyFrom(quantized_node.attr["dilations"]) + + if self.new_api and new_node.op in ( + "_FusedQuantizedConv2D", + "_FusedQuantizedDepthwiseConv2D", + "_FusedQuantizedDeconv2D", + "_FusedQuantizedDeconv3D", + ): input_data_type = dtypes.qint8 if new_node.attr["Tinput"].type == dtypes.qint8 else dtypes.quint8 - if new_node.op in ('_FusedQuantizedDeconv2D', '_FusedQuantizedDeconv3D'): - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - dtypes.int32.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - input_data_type.as_datatype_enum, - dtypes.float32.as_datatype_enum if new_node.attr["Tbias"].type == dtypes.float32 \ - else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) + if new_node.op in ("_FusedQuantizedDeconv2D", "_FusedQuantizedDeconv3D"): + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + dtypes.int32.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + input_data_type.as_datatype_enum, + dtypes.float32.as_datatype_enum + if new_node.attr["Tbias"].type == dtypes.float32 + else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) else: - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum if new_node.attr["Tbias"].type == dtypes.float32 \ - else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - - if quantized_node_op not in ('_FusedQuantizedConv2D', '_FusedQuantizedDepthwiseConv2D', \ - '_FusedQuantizedDeconv2D','_FusedQuantizedDeconv3D'): - Helper.set_attr_type_list(new_node, 'Thost_outputs', self.output_types) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum + if new_node.attr["Tbias"].type == dtypes.float32 + else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + + if quantized_node_op not in ( + "_FusedQuantizedConv2D", + "_FusedQuantizedDepthwiseConv2D", + "_FusedQuantizedDeconv2D", + "_FusedQuantizedDeconv3D", + ): + Helper.set_attr_type_list(new_node, "Thost_outputs", self.output_types) new_node.attr["Tsummand"].CopyFrom(attr_value_pb2.AttrValue(type=self.output_types[0])) else: - if str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"_FusedHardSwish"]): - self.fused_ops= [b"BiasAdd", b"_FusedHardSwish", b"Requantize"] - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"_FusedSwish"]): - self.fused_ops= [b"BiasAdd", b"_FusedSwish", b"Requantize"] - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"Relu"]): - self.fused_ops= [b"BiasAdd", b"Relu", b"Requantize"] - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"LeakyRelu"]): - self.fused_ops= [b"BiasAdd", b"LeakyRelu", b"Requantize"] - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"Elu"]): - self.fused_ops= [b"BiasAdd", b"Elu", b"Requantize"] - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"Sigmoid"]): - self.fused_ops= [b"BiasAdd", b"Sigmoid", b"Requantize"] - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd"]): - self.fused_ops= [b"BiasAdd", b"Requantize"] - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - if new_node.op not in ('_FusedQuantizedDeconv2D', '_FusedQuantizedDeconv3D'): - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - elif len(quantized_node.attr['fused_ops'].list.s) == 0: - if new_node.op in ('_FusedQuantizedDeconv2D', '_FusedQuantizedDeconv3D'): - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - dtypes.int32.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - input_data_type.as_datatype_enum, + if str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"_FusedHardSwish"]): + self.fused_ops = [b"BiasAdd", b"_FusedHardSwish", b"Requantize"] + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype( + new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + Helper.set_attr_dtype( + new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"_FusedSwish"]): + self.fused_ops = [b"BiasAdd", b"_FusedSwish", b"Requantize"] + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype( + new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + Helper.set_attr_dtype( + new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Relu"]): + self.fused_ops = [b"BiasAdd", b"Relu", b"Requantize"] + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype( + new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + Helper.set_attr_dtype( + new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"LeakyRelu"]): + self.fused_ops = [b"BiasAdd", b"LeakyRelu", b"Requantize"] + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - else: - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - #dtypes.float32.as_datatype_enum if new_node.attr["Tbias"].type == dtypes.float32 \ - # else dtypes.qint32.as_datatype_enum, dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype( + new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + Helper.set_attr_dtype( + new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Elu"]): + self.fused_ops = [b"BiasAdd", b"Elu", b"Requantize"] + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype( + new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + Helper.set_attr_dtype( + new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sigmoid"]): + self.fused_ops = [b"BiasAdd", b"Sigmoid", b"Requantize"] + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - self.fused_ops= [b"Requantize"] - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - if new_node.op not in ('_FusedQuantizedDeconv2D', '_FusedQuantizedDeconv3D'): - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_string_list(new_node, 'fused_ops', self.fused_ops) + ], + ) + Helper.set_attr_dtype( + new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + Helper.set_attr_dtype( + new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd"]): + self.fused_ops = [b"BiasAdd", b"Requantize"] + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype( + new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + if new_node.op not in ("_FusedQuantizedDeconv2D", "_FusedQuantizedDeconv3D"): + Helper.set_attr_dtype( + new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + elif len(quantized_node.attr["fused_ops"].list.s) == 0: + if new_node.op in ("_FusedQuantizedDeconv2D", "_FusedQuantizedDeconv3D"): + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + dtypes.int32.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + input_data_type.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + else: + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + # dtypes.float32.as_datatype_enum if new_node.attr["Tbias"].type == dtypes.float32 \ + # else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + self.fused_ops = [b"Requantize"] + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype( + new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + if new_node.op not in ("_FusedQuantizedDeconv2D", "_FusedQuantizedDeconv3D"): + Helper.set_attr_dtype( + new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type] + ) + Helper.set_attr_string_list(new_node, "fused_ops", self.fused_ops) if "_kernel" in quantized_node.attr: - new_node.attr["_kernel"].CopyFrom(quantized_node.attr['_kernel']) + new_node.attr["_kernel"].CopyFrom(quantized_node.attr["_kernel"]) - if new_node.op in ('_FusedQuantizedConv3D'): + if new_node.op in ("_FusedQuantizedConv3D"): input_data_type = dtypes.qint8 if new_node.attr["Tinput"].type == dtypes.qint8 else dtypes.quint8 - if len(quantized_node.attr['fused_ops'].list.s) == 0: - Helper.set_attr_string_list(new_node, 'fused_ops', [ b'Requantize']) - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd"]): - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Requantize']) - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum if new_node.attr["Tbias"].type == dtypes.float32 else \ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"Relu"]): - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Relu', b'Requantize']) - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum if new_node.attr["Tbias"].type == dtypes.float32 else \ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"LeakyRelu"]): - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'LeakyRelu', b'Requantize']) - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum if new_node.attr["Tbias"].type == dtypes.float32 else \ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b"BiasAdd", b"Elu"]): - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Elu', b'Requantize']) - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum if new_node.attr["Tbias"].type == dtypes.float32 else \ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - - if quantized_node.op == "QuantizedConv2D" or \ - quantized_node.op == "QuantizedConv2DWithBias" or \ - quantized_node.op == "QuantizedDepthwiseConv2D" or \ - quantized_node.op == "QuantizedDepthwiseConv2DWithBias" or \ - ('alpha' in quantized_node.attr and quantized_node.attr['alpha'].f > 0): + if len(quantized_node.attr["fused_ops"].list.s) == 0: + Helper.set_attr_string_list(new_node, "fused_ops", [b"Requantize"]) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd"]): + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Requantize"]) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum + if new_node.attr["Tbias"].type == dtypes.float32 + else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Relu"]): + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Relu", b"Requantize"]) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum + if new_node.attr["Tbias"].type == dtypes.float32 + else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"LeakyRelu"]): + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"LeakyRelu", b"Requantize"]) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum + if new_node.attr["Tbias"].type == dtypes.float32 + else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Elu"]): + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Elu", b"Requantize"]) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum + if new_node.attr["Tbias"].type == dtypes.float32 + else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype(new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type]) + Helper.set_attr_dtype(new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type]) + + if ( + quantized_node.op == "QuantizedConv2D" + or quantized_node.op == "QuantizedConv2DWithBias" + or quantized_node.op == "QuantizedDepthwiseConv2D" + or quantized_node.op == "QuantizedDepthwiseConv2DWithBias" + or ("alpha" in quantized_node.attr and quantized_node.attr["alpha"].f > 0) + ): new_node.attr["out_type"].CopyFrom(attr_value_pb2.AttrValue(type=int8_type)) - elif quantized_node.op == "QuantizedConv2DWithBiasAndRelu" or \ - quantized_node.op == "QuantizedDepthwiseConv2DWithBiasAndRelu": + elif ( + quantized_node.op == "QuantizedConv2DWithBiasAndRelu" + or quantized_node.op == "QuantizedDepthwiseConv2DWithBiasAndRelu" + ): new_node.attr["out_type"].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) - elif new_node.op not in ('_FusedQuantizedConv2D', '_FusedQuantizedDepthwiseConv2D', \ - '_FusedQuantizedConv3D', '_FusedQuantizedDeconv2D', '_FusedQuantizedDeconv3D'): + elif new_node.op not in ( + "_FusedQuantizedConv2D", + "_FusedQuantizedDepthwiseConv2D", + "_FusedQuantizedConv3D", + "_FusedQuantizedDeconv2D", + "_FusedQuantizedDeconv3D", + ): new_node.attr["out_type"].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) elif new_node.op in ("_FusedQuantizedDeconv2D", "_FusedQuantizedDeconv3D"): new_node.attr["out_type"].CopyFrom(attr_value_pb2.AttrValue(type=int8_type)) old_input_name = dequantize_node_name if dequantize_node_name else requantize_node_name self.graph_analyzer.replace_single_node( - new_node, [parent_node_name], quantized_node_name, - self.graph_info[old_input_name].outputs, old_input_name) + new_node, + [parent_node_name], + quantized_node_name, + self.graph_info[old_input_name].outputs, + old_input_name, + ) self.graph_analyzer.remove_node(quantized_node_name) target_nodes = self.graph_analyzer.query_fusion_pattern_nodes(self.sum_pattern) for i in target_nodes: quantized_node_name = i[0] quantized_node = self.graph_info[quantized_node_name].node - if i[-1][0] in ('_FusedQuantizedDepthwiseConv2D', '_FusedQuantizedConv2D', '_FusedQuantizedConv3D'): - if quantized_node.attr['fused_ops'].list.s not in self.fuse_sum_op_types: - continue - #else: + if i[-1][0] in ("_FusedQuantizedDepthwiseConv2D", "_FusedQuantizedConv2D", "_FusedQuantizedConv3D"): + if quantized_node.attr["fused_ops"].list.s not in self.fuse_sum_op_types: + continue + # else: # print(quantized_node.attr['fused_ops'].list.s) requantize_node_name = i[1] @@ -504,15 +664,15 @@ def do_transformation(self): new_node = node_def_pb2.NodeDef() if self.new_api: - if i[-1][0] in ('QuantizedConv2DWithBiasSumAndRelu',) : - new_node.op = '_FusedQuantizedConv2D' - self.fused_ops= [b"BiasAdd", b"Sum", b"Relu", b"Requantize"] - elif i[-1][0] == '_FusedQuantizedConv2D': - new_node.op = '_FusedQuantizedConv2D' - elif quantized_node_op == '_FusedQuantizedDepthwiseConv2D': - new_node.op = '_FusedQuantizedDepthwiseConv2D' - elif i[-1][0] == '_FusedQuantizedConv3D': - new_node.op = '_FusedQuantizedConv3D' + if i[-1][0] in ("QuantizedConv2DWithBiasSumAndRelu",): + new_node.op = "_FusedQuantizedConv2D" + self.fused_ops = [b"BiasAdd", b"Sum", b"Relu", b"Requantize"] + elif i[-1][0] == "_FusedQuantizedConv2D": + new_node.op = "_FusedQuantizedConv2D" + elif quantized_node_op == "_FusedQuantizedDepthwiseConv2D": + new_node.op = "_FusedQuantizedDepthwiseConv2D" + elif i[-1][0] == "_FusedQuantizedConv3D": + new_node.op = "_FusedQuantizedConv3D" else: new_node.op = quantized_node_op + "AndRequantize" @@ -521,41 +681,39 @@ def do_transformation(self): for _, value in enumerate(quantized_node.input[:-1]): new_node.input.append(value) - new_node.attr["Tinput"].CopyFrom(quantized_node.attr['Tinput']) - new_node.attr["Tfilter"].CopyFrom(quantized_node.attr['Tfilter']) - new_node.attr["strides"].CopyFrom(quantized_node.attr['strides']) - new_node.attr["padding"].CopyFrom(quantized_node.attr['padding']) - #new_node.attr["Tsummand"].CopyFrom(quantized_node.attr['Tsummand']) + new_node.attr["Tinput"].CopyFrom(quantized_node.attr["Tinput"]) + new_node.attr["Tfilter"].CopyFrom(quantized_node.attr["Tfilter"]) + new_node.attr["strides"].CopyFrom(quantized_node.attr["strides"]) + new_node.attr["padding"].CopyFrom(quantized_node.attr["padding"]) + # new_node.attr["Tsummand"].CopyFrom(quantized_node.attr['Tsummand']) new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) deq_node = self.graph_info[Helper.node_name_from_input(quantized_node.input[-1])].node - if deq_node.op != 'Dequantize' or deq_node.op.find("Quantize") != -1: + if deq_node.op != "Dequantize" or deq_node.op.find("Quantize") != -1: continue - - if deq_node.op == 'Dequantize': - original_summand_node = self.graph_info[Helper.node_name_from_input( - deq_node.input[0])].node + + if deq_node.op == "Dequantize": + original_summand_node = self.graph_info[Helper.node_name_from_input(deq_node.input[0])].node else: original_summand_node = deq_node - summand_op_type = uint8_type if dtypes.as_dtype( - deq_node.attr["T"].type) == uint8_type else int8_type + summand_op_type = uint8_type if dtypes.as_dtype(deq_node.attr["T"].type) == uint8_type else int8_type for j in range(3): - new_node.input.append(original_summand_node.name + ':{}'.format(j)) + new_node.input.append(original_summand_node.name + ":{}".format(j)) # in tf 2.10, the "padding_list" attr name changes to explicit_paddings if "padding_list" in quantized_node.attr: if not self.new_api: - new_node.attr["padding_list"].CopyFrom(quantized_node.attr['padding_list']) + new_node.attr["padding_list"].CopyFrom(quantized_node.attr["padding_list"]) elif quantized_node.attr["padding"].s == b"EXPLICIT": - new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr['padding_list']) + new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr["padding_list"]) elif "explicit_paddings" in quantized_node.attr: - new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr['explicit_paddings']) + new_node.attr["explicit_paddings"].CopyFrom(quantized_node.attr["explicit_paddings"]) if "dilations" in quantized_node.attr: - new_node.attr["dilations"].CopyFrom(quantized_node.attr['dilations']) + new_node.attr["dilations"].CopyFrom(quantized_node.attr["dilations"]) - if "alpha" in quantized_node.attr and quantized_node.attr['alpha'].f > 0: + if "alpha" in quantized_node.attr and quantized_node.attr["alpha"].f > 0: new_node.attr["out_type"].CopyFrom(attr_value_pb2.AttrValue(type=int8_type)) else: new_node.attr["out_type"].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) @@ -563,7 +721,7 @@ def do_transformation(self): new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type)) new_node.attr["Tsummand"].CopyFrom(attr_value_pb2.AttrValue(type=summand_op_type)) - if new_node.op in ('_FusedQuantizedConv2D', '_FusedQuantizedDepthwiseConv2D', '_FusedQuantizedConv3D'): + if new_node.op in ("_FusedQuantizedConv2D", "_FusedQuantizedDepthwiseConv2D", "_FusedQuantizedConv3D"): original_input = list(new_node.input) new_input = [] new_input.extend(original_input[:3]) @@ -571,65 +729,77 @@ def do_transformation(self): new_input.extend(original_input[3:7]) new_input.extend(original_input[-2:]) new_input.extend(original_input[-5:-3]) - new_node.ClearField('input') + new_node.ClearField("input") new_node.input.extend(new_input) input_data_type = dtypes.qint8 if new_node.attr["Tinput"].type == dtypes.qint8 else dtypes.quint8 - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.quint8.as_datatype_enum if summand_op_type != int8_type else dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - Helper.set_attr_dtype(new_node, "Tsummand", dtypes.quint8 if summand_op_type != int8_type \ - else dtypes.qint8) - if str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Relu']): - self.fused_ops = [b'BiasAdd', b'Sum', b'Relu', b'Requantize'] - elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'LeakyRelu']): - self.fused_ops = [b'BiasAdd', b'Sum', b'LeakyRelu', b'Requantize'] - elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'LeakyRelu', b'Sum']): - self.fused_ops = [b'BiasAdd', b'LeakyRelu', b'Sum', b'Requantize'] - elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Relu', b'Sum']): - self.fused_ops = [b'BiasAdd', b'Relu', b'Sum', b'Requantize'] - #Current fusion requires summand has same dtype as output if output is qint8 - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - elif str(quantized_node.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum']): - self.fused_ops = [b'BiasAdd', b'Sum', b'Requantize'] - #Current fusion requires summand has same dtype as output if output is qint8 - Helper.set_attr_dtype(new_node, "Tsummand", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - requantize_node.attr['out_type'].type, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum ]) - Helper.set_attr_dtype(new_node, "out_type", \ - dtype_map_dict[requantize_node.attr['out_type'].type]) - Helper.set_attr_string_list(new_node, 'fused_ops', self.fused_ops) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.quint8.as_datatype_enum + if summand_op_type != int8_type + else dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype( + new_node, "Tsummand", dtypes.quint8 if summand_op_type != int8_type else dtypes.qint8 + ) + if str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"Relu"]): + self.fused_ops = [b"BiasAdd", b"Sum", b"Relu", b"Requantize"] + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"LeakyRelu"]): + self.fused_ops = [b"BiasAdd", b"Sum", b"LeakyRelu", b"Requantize"] + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"LeakyRelu", b"Sum"]): + self.fused_ops = [b"BiasAdd", b"LeakyRelu", b"Sum", b"Requantize"] + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Relu", b"Sum"]): + self.fused_ops = [b"BiasAdd", b"Relu", b"Sum", b"Requantize"] + # Current fusion requires summand has same dtype as output if output is qint8 + Helper.set_attr_dtype(new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type]) + elif str(quantized_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum"]): + self.fused_ops = [b"BiasAdd", b"Sum", b"Requantize"] + # Current fusion requires summand has same dtype as output if output is qint8 + Helper.set_attr_dtype(new_node, "Tsummand", dtype_map_dict[requantize_node.attr["out_type"].type]) + Helper.set_attr_type_list( + new_node, + "Thost_outputs", + [ + requantize_node.attr["out_type"].type, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_dtype(new_node, "out_type", dtype_map_dict[requantize_node.attr["out_type"].type]) + Helper.set_attr_string_list(new_node, "fused_ops", self.fused_ops) if not self.new_api: - if quantized_node_op == 'QuantizedConv2DWithBiasReluAndSum': - new_node.op = 'QuantizedConv2DWithBiasReluAndSumAndRequantize' + if quantized_node_op == "QuantizedConv2DWithBiasReluAndSum": + new_node.op = "QuantizedConv2DWithBiasReluAndSumAndRequantize" if "alpha" in quantized_node.attr: - new_node.attr["alpha"].CopyFrom(quantized_node.attr['alpha']) + new_node.attr["alpha"].CopyFrom(quantized_node.attr["alpha"]) elif summand_op_type == int8_type: new_node.op = "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize" self.graph_analyzer.replace_single_node( - new_node, [quantized_node.input[0], original_summand_node.name], - quantized_node.name, self.graph_info[requantize_node_name].outputs, - requantize_node_name) + new_node, + [quantized_node.input[0], original_summand_node.name], + quantized_node.name, + self.graph_info[requantize_node_name].outputs, + requantize_node_name, + ) self.graph_analyzer.remove_node(quantized_node_name) - if deq_node.op == 'Dequantize': + if deq_node.op == "Dequantize": self.graph_analyzer.remove_node_with_single_input_output(deq_node.name) return self.graph_analyzer.dump_graph() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py index 6d4c6e31e24..56882f01885 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py @@ -16,21 +16,21 @@ # limitations under the License. """Fuse QuantizedMatMul with redundant Dequantize Graph Rewriter.""" -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 from tensorflow.python.framework import dtypes -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from ..graph_base import GraphRewriterBase + + class FuseMatMulRedundantDequantizeTransformer(GraphRewriterBase): """Fuse _QuantizedMatMul with the successor Dequantize Op.""" - fuse_patterns = [[ - "_QuantizedMatMul", "_QuantizedBatchMatMul" - ], ['Dequantize', 'Cast']] - def __init__(self, model, device='cpu'): + fuse_patterns = [["_QuantizedMatMul", "_QuantizedBatchMatMul"], ["Dequantize", "Cast"]] + + def __init__(self, model, device="cpu"): """Initilization.""" super().__init__(model) self.device = device @@ -57,95 +57,101 @@ def do_transformation(self): if len(self.graph_info[quantized_node_name].outputs) > 3: # pragma: no cover need_drop = False for output in self.graph_info[quantized_node_name].outputs: - if self.graph_info[output].node.op != 'Dequantize': + if self.graph_info[output].node.op != "Dequantize": need_drop = True break if need_drop: continue # ignore shared output case for license-plate-recognition-barrier-0007 model - if len(self.graph_info[dequantize_node_name].outputs) == 2 and \ - self.graph_info[self.graph_info[dequantize_node_name].outputs[0]].node.op == 'Reshape' and \ - self.graph_info[self.graph_info[dequantize_node_name].outputs[1]].node.op == 'Shape': + if ( + len(self.graph_info[dequantize_node_name].outputs) == 2 + and self.graph_info[self.graph_info[dequantize_node_name].outputs[0]].node.op == "Reshape" + and self.graph_info[self.graph_info[dequantize_node_name].outputs[1]].node.op == "Shape" + ): continue new_node = node_def_pb2.NodeDef() new_node.op = quantized_node.op if dequantize_node.op == "Dequantize": - fused_ops = str(quantized_node.attr['fused_ops'].list.s).replace("Requantize", "Dequantize") - new_node.name = quantized_node.name + '_dequantize' + fused_ops = str(quantized_node.attr["fused_ops"].list.s).replace("Requantize", "Dequantize") + new_node.name = quantized_node.name + "_dequantize" for _, value in enumerate(quantized_node.input): new_node.input.append(value) - if 'input_quant_mode' in quantized_node.attr: - new_node.attr["input_quant_mode"].CopyFrom(quantized_node.attr['input_quant_mode']) - if 'output_quant_mode' in quantized_node.attr: - new_node.attr["output_quant_mode"].CopyFrom(quantized_node.attr['output_quant_mode']) - if 'leakyrelu_alpha' in quantized_node.attr: - new_node.attr["leakyrelu_alpha"].CopyFrom(quantized_node.attr['leakyrelu_alpha']) - if 'T1' in quantized_node.attr: - new_node.attr["T1"].CopyFrom(quantized_node.attr['T1']) - if 'T2' in quantized_node.attr: - new_node.attr["T2"].CopyFrom(quantized_node.attr['T2']) - if 'U' in quantized_node.attr: - new_node.attr["U"].CopyFrom(quantized_node.attr['U']) - if 'is_weight_const' in quantized_node.attr: - new_node.attr["is_weight_const"].CopyFrom(quantized_node.attr['is_weight_const']) - if 'is_bias_const' in quantized_node.attr: - new_node.attr["is_bias_const"].CopyFrom(quantized_node.attr['is_bias_const']) - if 'transpose_a' in quantized_node.attr: - new_node.attr["transpose_a"].CopyFrom(quantized_node.attr['transpose_a']) - if 'transpose_b' in quantized_node.attr: - new_node.attr["transpose_b"].CopyFrom(quantized_node.attr['transpose_b']) - if 'Tdevice_inputs' in quantized_node.attr: - new_node.attr["Tdevice_inputs"].CopyFrom(quantized_node.attr['Tdevice_inputs']) - if 'Tdevice_outputs' in quantized_node.attr: - new_node.attr["Tdevice_outputs"].CopyFrom(quantized_node.attr['Tdevice_outputs']) - if 'Thost_inputs' in quantized_node.attr: - new_node.attr["Thost_inputs"].CopyFrom(quantized_node.attr['Thost_inputs']) - if 'Tbias' in quantized_node.attr: - new_node.attr["Tbias"].CopyFrom(quantized_node.attr['Tbias']) - if 'adj_x' in quantized_node.attr: - new_node.attr["adj_x"].CopyFrom(quantized_node.attr['adj_x']) - if 'adj_y' in quantized_node.attr: - new_node.attr["adj_y"].CopyFrom(quantized_node.attr['adj_y']) - if 'input_quant_mode' in quantized_node.attr: - new_node.attr["input_quant_mode"].CopyFrom(quantized_node.attr['input_quant_mode']) - if 'output_quant_mode' in quantized_node.attr: - new_node.attr["output_quant_mode"].CopyFrom(quantized_node.attr['output_quant_mode']) - if 'fused_ops' in quantized_node.attr: - new_node.attr["fused_ops"].CopyFrom(quantized_node.attr['fused_ops']) + if "input_quant_mode" in quantized_node.attr: + new_node.attr["input_quant_mode"].CopyFrom(quantized_node.attr["input_quant_mode"]) + if "output_quant_mode" in quantized_node.attr: + new_node.attr["output_quant_mode"].CopyFrom(quantized_node.attr["output_quant_mode"]) + if "leakyrelu_alpha" in quantized_node.attr: + new_node.attr["leakyrelu_alpha"].CopyFrom(quantized_node.attr["leakyrelu_alpha"]) + if "T1" in quantized_node.attr: + new_node.attr["T1"].CopyFrom(quantized_node.attr["T1"]) + if "T2" in quantized_node.attr: + new_node.attr["T2"].CopyFrom(quantized_node.attr["T2"]) + if "U" in quantized_node.attr: + new_node.attr["U"].CopyFrom(quantized_node.attr["U"]) + if "is_weight_const" in quantized_node.attr: + new_node.attr["is_weight_const"].CopyFrom(quantized_node.attr["is_weight_const"]) + if "is_bias_const" in quantized_node.attr: + new_node.attr["is_bias_const"].CopyFrom(quantized_node.attr["is_bias_const"]) + if "transpose_a" in quantized_node.attr: + new_node.attr["transpose_a"].CopyFrom(quantized_node.attr["transpose_a"]) + if "transpose_b" in quantized_node.attr: + new_node.attr["transpose_b"].CopyFrom(quantized_node.attr["transpose_b"]) + if "Tdevice_inputs" in quantized_node.attr: + new_node.attr["Tdevice_inputs"].CopyFrom(quantized_node.attr["Tdevice_inputs"]) + if "Tdevice_outputs" in quantized_node.attr: + new_node.attr["Tdevice_outputs"].CopyFrom(quantized_node.attr["Tdevice_outputs"]) + if "Thost_inputs" in quantized_node.attr: + new_node.attr["Thost_inputs"].CopyFrom(quantized_node.attr["Thost_inputs"]) + if "Tbias" in quantized_node.attr: + new_node.attr["Tbias"].CopyFrom(quantized_node.attr["Tbias"]) + if "adj_x" in quantized_node.attr: + new_node.attr["adj_x"].CopyFrom(quantized_node.attr["adj_x"]) + if "adj_y" in quantized_node.attr: + new_node.attr["adj_y"].CopyFrom(quantized_node.attr["adj_y"]) + if "input_quant_mode" in quantized_node.attr: + new_node.attr["input_quant_mode"].CopyFrom(quantized_node.attr["input_quant_mode"]) + if "output_quant_mode" in quantized_node.attr: + new_node.attr["output_quant_mode"].CopyFrom(quantized_node.attr["output_quant_mode"]) + if "fused_ops" in quantized_node.attr: + new_node.attr["fused_ops"].CopyFrom(quantized_node.attr["fused_ops"]) # update Tbias for single MatMul withou bias case, same as Tout. if dequantize_node.op == "Dequantize": - Helper.set_attr_type_list(new_node, 'Thost_outputs', [dequantize_node.attr['dtype'].type]) - new_node.attr["Tout"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr['dtype'].type)) - if new_node.op == '_QuantizedBatchMatMul': - new_node.attr["U"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr['DstT'].type)) - if str(quantized_node.attr['fused_ops'].list.s) == str([b"Requantize"]): - new_node.attr["Tbias"].CopyFrom( \ - attr_value_pb2.AttrValue(type=dequantize_node.attr['dtype'].type)) - Helper.set_attr_string_list(new_node, 'fused_ops', eval(fused_ops)) + Helper.set_attr_type_list(new_node, "Thost_outputs", [dequantize_node.attr["dtype"].type]) + new_node.attr["Tout"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr["dtype"].type)) + if new_node.op == "_QuantizedBatchMatMul": + new_node.attr["U"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr["DstT"].type)) + if str(quantized_node.attr["fused_ops"].list.s) == str([b"Requantize"]): + new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr["dtype"].type)) + Helper.set_attr_string_list(new_node, "fused_ops", eval(fused_ops)) else: - Helper.set_attr_type_list(new_node, 'Thost_outputs', [dequantize_node.attr['DstT'].type]) - new_node.attr["Tout"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr['DstT'].type)) - if new_node.op == '_QuantizedBatchMatMul': - new_node.attr["U"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr['DstT'].type)) + Helper.set_attr_type_list(new_node, "Thost_outputs", [dequantize_node.attr["DstT"].type]) + new_node.attr["Tout"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr["DstT"].type)) + if new_node.op == "_QuantizedBatchMatMul": + new_node.attr["U"].CopyFrom(attr_value_pb2.AttrValue(type=dequantize_node.attr["DstT"].type)) top_node_name = Helper.node_name_from_input(quantized_node.input[0]) if self.graph_info[dequantize_node_name].outputs: self.graph_analyzer.replace_single_node( - new_node, [top_node_name], quantized_node_name, - self.graph_info[dequantize_node_name].outputs, dequantize_node_name) + new_node, + [top_node_name], + quantized_node_name, + self.graph_info[dequantize_node_name].outputs, + dequantize_node_name, + ) self.graph_analyzer.remove_node(dequantize_node_name) else: self.graph_analyzer.remove_node(dequantize_node_name) new_node.name = dequantize_node_name self.graph_analyzer.replace_single_node( - new_node, [top_node_name], quantized_node_name, [], dequantize_node_name) + new_node, [top_node_name], quantized_node_name, [], dequantize_node_name + ) self.graph_analyzer.remove_node(quantized_node_name) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py index cf5eb5592ea..e8bfbc71216 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py @@ -18,22 +18,20 @@ import numpy as np import tensorflow as tf +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util -from tensorflow.python.framework import tensor_util -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import dtypes - -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.adaptor.tf_utils.util import version1_gt_version2, version1_lt_version2 +from ..graph_base import GraphRewriterBase + class FuseMatMulRequantizeDequantizeTransformer(GraphRewriterBase): """Fuse QuantizedMatMul + Requantize + Dequantize into QuantizedMatMulWithBiasAndDequantize.""" - def __init__(self, model, device='cpu'): + def __init__(self, model, device="cpu"): """Initilization.""" super().__init__(model) self.device = device @@ -47,14 +45,14 @@ def __init__(self, model, device='cpu'): def do_transformation(self): """Apply the fusion of QuantizedMatMul + Requantize + Dequantize.""" fuse_pattern = [] - if tf.version.VERSION in ("1.15.0-up2", "1.15.0-up3") or version1_gt_version2(tf.version.VERSION, '2.1.0'): - fuse_pattern = [["QuantizedMatMulWithBias"], ['Requantize'], ['Dequantize'], ('Softmax',)] + if tf.version.VERSION in ("1.15.0-up2", "1.15.0-up3") or version1_gt_version2(tf.version.VERSION, "2.1.0"): + fuse_pattern = [["QuantizedMatMulWithBias"], ["Requantize"], ["Dequantize"], ("Softmax",)] float32_type = dtypes.float32.as_datatype_enum qint32_type = dtypes.qint32.as_datatype_enum target_nodes = self.graph_analyzer.query_fusion_pattern_nodes(fuse_pattern) for i in target_nodes: # TODO Remove below checker once the TF's limitation removed. - if len(i) == 5 and version1_lt_version2(tf.__version__, '2.6.0'): + if len(i) == 5 and version1_lt_version2(tf.__version__, "2.6.0"): continue quantized_node_name = i[0] @@ -76,10 +74,10 @@ def do_transformation(self): new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) - if 'T1' in quantized_node.attr: - new_node.attr["T1"].CopyFrom(quantized_node.attr['T1']) - if 'T2' in quantized_node.attr: - new_node.attr["T2"].CopyFrom(quantized_node.attr['T2']) + if "T1" in quantized_node.attr: + new_node.attr["T1"].CopyFrom(quantized_node.attr["T1"]) + if "T2" in quantized_node.attr: + new_node.attr["T2"].CopyFrom(quantized_node.attr["T2"]) top_node_name = Helper.node_name_from_input(quantized_node.input[0]) max_filter_node = self.graph_info[new_node.input[6]].node @@ -91,79 +89,91 @@ def do_transformation(self): max_input_node = self.graph_info[last_node.input[-1]].node min_input_node = self.graph_info[last_node.input[-2]].node - if max_input_node.op == 'Enter': # pragma: no cover + if max_input_node.op == "Enter": # pragma: no cover min_input_parent_name = Helper.node_name_from_input(min_input_node.input[0]) max_input_parent_name = Helper.node_name_from_input(max_input_node.input[0]) min_input_parent_node = self.graph_info[min_input_parent_name].node max_input_parent_node = self.graph_info[max_input_parent_name].node - if min_input_parent_node.op != 'Const' or max_input_parent_node.op != 'Const': + if min_input_parent_node.op != "Const" or max_input_parent_node.op != "Const": continue min_input_node = min_input_parent_node max_input_node = max_input_parent_node - if max_filter_node.op == 'Enter': # pragma: no cover + if max_filter_node.op == "Enter": # pragma: no cover min_filter_parent_name = Helper.node_name_from_input(min_filter_node.input[0]) max_filter_parent_name = Helper.node_name_from_input(max_filter_node.input[0]) min_filter_parent_node = self.graph_info[min_filter_parent_name].node max_filter_parent_node = self.graph_info[max_filter_parent_name].node - if min_filter_parent_node.op != 'Const' or max_filter_parent_node.op != 'Const': + if min_filter_parent_node.op != "Const" or max_filter_parent_node.op != "Const": continue min_filter_node = min_filter_parent_node max_filter_node = max_filter_parent_node - if weight_node.op == 'Enter': # pragma: no cover + if weight_node.op == "Enter": # pragma: no cover weight_parent_name = Helper.node_name_from_input(weight_node.input[0]) weight_parent_node = self.graph_info[weight_parent_name].node - if weight_parent_node.op != 'Const': + if weight_parent_node.op != "Const": continue weight_node = weight_parent_node bias_enter_node = None - if bias_node.op == 'Enter': # pragma: no cover + if bias_node.op == "Enter": # pragma: no cover bias_enter_node = bias_node bias_parent_name = Helper.node_name_from_input(bias_node.input[0]) bias_parent_node = self.graph_info[bias_parent_name].node - if bias_parent_node.op != 'Const': + if bias_parent_node.op != "Const": continue bias_node = bias_parent_node - if max_filter_node.op == 'Const': - min_input_value = (min_input_node.attr['value'].tensor.float_val)[0] - max_input_value = (max_input_node.attr['value'].tensor.float_val)[0] + if max_filter_node.op == "Const": + min_input_value = (min_input_node.attr["value"].tensor.float_val)[0] + max_input_value = (max_input_node.attr["value"].tensor.float_val)[0] - max_filter_value = (max_filter_node.attr['value'].tensor.float_val)[0] - min_filter_value = (min_filter_node.attr['value'].tensor.float_val)[0] + max_filter_value = (max_filter_node.attr["value"].tensor.float_val)[0] + min_filter_value = (min_filter_node.attr["value"].tensor.float_val)[0] - weights_tensor = tensor_util.MakeNdarray( - weight_node.attr['value'].tensor) - bias_tensor = tensor_util.MakeNdarray( - bias_node.attr['value'].tensor) - is_min_first = bool(quantized_node.attr['input_quant_mode'].s == b'MIN_FIRST') - input_range = max_input_value - min_input_value if is_min_first else max( - abs(max_input_value), abs(min_input_value)) + weights_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) + bias_tensor = tensor_util.MakeNdarray(bias_node.attr["value"].tensor) + is_min_first = bool(quantized_node.attr["input_quant_mode"].s == b"MIN_FIRST") + input_range = ( + max_input_value - min_input_value + if is_min_first + else max(abs(max_input_value), abs(min_input_value)) + ) - if -self.eps <= input_range <= self.eps: + if -self.eps <= input_range <= self.eps: input_range += self.eps if -self.eps <= max_input_value - min_input_value <= self.eps: max_input_value += self.eps - int32_bias = Helper.generate_int32_bias_for_matmul(bias_tensor, weights_tensor, - input_range, max_input_value, - min_input_value, - max_filter_value, min_filter_value) - - bias_node.attr['dtype'].CopyFrom( + int32_bias = Helper.generate_int32_bias_for_matmul( + bias_tensor, + weights_tensor, + input_range, + max_input_value, + min_input_value, + max_filter_value, + min_filter_value, + ) + + bias_node.attr["dtype"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) + bias_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - type=float32_type if self.device == 'gpu' else qint32_type)) - bias_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_tensor if self.device == 'gpu' else int32_bias, dtypes. - float32 if self.device == 'gpu' else dtypes.int32, bias_tensor.shape))) - - bias_node.attr['value'].tensor.dtype = float32_type \ - if self.device == 'gpu' else qint32_type - new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ - if self.device == 'gpu' else qint32_type)) + tensor=tensor_util.make_tensor_proto( + bias_tensor if self.device == "gpu" else int32_bias, + dtypes.float32 if self.device == "gpu" else dtypes.int32, + bias_tensor.shape, + ) + ) + ) + + bias_node.attr["value"].tensor.dtype = float32_type if self.device == "gpu" else qint32_type + new_node.attr["Tbias"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) if bias_enter_node: - bias_enter_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ - if self.device == 'gpu' else qint32_type)) + bias_enter_node.attr["T"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) else: new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type)) new_node.attr["Toutput"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type)) @@ -172,23 +182,30 @@ def do_transformation(self): if self.graph_info[deq_node_name].outputs: self.graph_analyzer.replace_single_node( - new_node, [top_node_name], quantized_node_name, - self.graph_info[deq_node_name].outputs, deq_node_name) + new_node, + [top_node_name], + quantized_node_name, + self.graph_info[deq_node_name].outputs, + deq_node_name, + ) self.graph_analyzer.remove_node(deq_node_name) else: self.graph_analyzer.remove_node(deq_node_name) new_node.name = deq_node_name self.graph_analyzer.replace_single_node( - new_node, [top_node_name], quantized_node_name, [], deq_node_name) + new_node, [top_node_name], quantized_node_name, [], deq_node_name + ) self.graph_analyzer.remove_node(quantized_node_name) return self.graph_analyzer.dump_graph() + class FuseMatMulRequantizeTransformer(GraphRewriterBase): """Fuse Quantized MatMul Op with the successor Requantize Op.""" - def __init__(self, model, device='cpu'): + + def __init__(self, model, device="cpu"): """Initilization.""" super().__init__(model) self.device = device @@ -209,7 +226,8 @@ def do_transformation(self): while True: target_nodes = self.graph_analyzer.query_fusion_pattern_nodes( - [["QuantizedMatMulWithBiasAndRelu"], ['Requantize']]) + [["QuantizedMatMulWithBiasAndRelu"], ["Requantize"]] + ) if len(target_nodes) == 0: break @@ -231,119 +249,133 @@ def do_transformation(self): new_node.input.append(value) new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) - if 'T1' in quantized_node.attr: - new_node.attr["T1"].CopyFrom(quantized_node.attr['T1']) - if 'T2' in quantized_node.attr: - new_node.attr["T2"].CopyFrom(quantized_node.attr['T2']) + if "T1" in quantized_node.attr: + new_node.attr["T1"].CopyFrom(quantized_node.attr["T1"]) + if "T2" in quantized_node.attr: + new_node.attr["T2"].CopyFrom(quantized_node.attr["T2"]) parent_node_name = Helper.node_name_from_input(quantized_node.input[0]) max_filter_node = self.graph_info[new_node.input[6]].node min_filter_node = self.graph_info[new_node.input[5]].node last_node = self.graph_info[new_node.input[0]].node - is_min_first = bool(quantized_node.attr['input_quant_mode'].s == b'MIN_FIRST') + is_min_first = bool(quantized_node.attr["input_quant_mode"].s == b"MIN_FIRST") weight_node = self.graph_info[new_node.input[1]].node bias_node = self.graph_info[new_node.input[2]].node max_input_node = self.graph_info[last_node.input[-1]].node min_input_node = self.graph_info[last_node.input[-2]].node - if max_input_node.op == 'Enter': # pragma: no cover + if max_input_node.op == "Enter": # pragma: no cover min_input_parent_name = Helper.node_name_from_input(min_input_node.input[0]) max_input_parent_name = Helper.node_name_from_input(max_input_node.input[0]) min_input_parent_node = self.graph_info[min_input_parent_name].node max_input_parent_node = self.graph_info[max_input_parent_name].node - if min_input_parent_node.op != 'Const' or max_input_parent_node.op != 'Const': + if min_input_parent_node.op != "Const" or max_input_parent_node.op != "Const": continue min_input_node = min_input_parent_node max_input_node = max_input_parent_node - if max_filter_node.op == 'Enter': # pragma: no cover + if max_filter_node.op == "Enter": # pragma: no cover min_filter_parent_name = Helper.node_name_from_input(min_filter_node.input[0]) max_filter_parent_name = Helper.node_name_from_input(max_filter_node.input[0]) min_filter_parent_node = self.graph_info[min_filter_parent_name].node max_filter_parent_node = self.graph_info[max_filter_parent_name].node - if min_filter_parent_node.op != 'Const' or max_filter_parent_node.op != 'Const': + if min_filter_parent_node.op != "Const" or max_filter_parent_node.op != "Const": continue min_filter_node = min_filter_parent_node max_filter_node = max_filter_parent_node - if weight_node.op == 'Enter': # pragma: no cover + if weight_node.op == "Enter": # pragma: no cover weight_parent_name = Helper.node_name_from_input(weight_node.input[0]) weight_parent_node = self.graph_info[weight_parent_name].node - if weight_parent_node.op != 'Const': + if weight_parent_node.op != "Const": continue weight_node = weight_parent_node bias_enter_node = None - if bias_node.op == 'Enter': # pragma: no cover + if bias_node.op == "Enter": # pragma: no cover bias_enter_node = bias_node bias_parent_name = Helper.node_name_from_input(bias_node.input[0]) bias_parent_node = self.graph_info[bias_parent_name].node - if bias_parent_node.op != 'Const': + if bias_parent_node.op != "Const": continue bias_node = bias_parent_node - if last_node.op.find('Requantize') != -1 or last_node.op.find('QuantizeV2') != -1: - min_input_value = (min_input_node.attr['value'].tensor.float_val)[0] - max_input_value = (max_input_node.attr['value'].tensor.float_val)[0] + if last_node.op.find("Requantize") != -1 or last_node.op.find("QuantizeV2") != -1: + min_input_value = (min_input_node.attr["value"].tensor.float_val)[0] + max_input_value = (max_input_node.attr["value"].tensor.float_val)[0] - max_filter_value = (max_filter_node.attr['value'].tensor.float_val)[0] - min_filter_value = (min_filter_node.attr['value'].tensor.float_val)[0] + max_filter_value = (max_filter_node.attr["value"].tensor.float_val)[0] + min_filter_value = (min_filter_node.attr["value"].tensor.float_val)[0] - weights_tensor = tensor_util.MakeNdarray( - weight_node.attr['value'].tensor) - bias_tensor = tensor_util.MakeNdarray( - bias_node.attr['value'].tensor) + weights_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) + bias_tensor = tensor_util.MakeNdarray(bias_node.attr["value"].tensor) - input_range = max_input_value - min_input_value if is_min_first else max( - abs(max_input_value), abs(min_input_value)) + input_range = ( + max_input_value - min_input_value + if is_min_first + else max(abs(max_input_value), abs(min_input_value)) + ) if -self.eps <= input_range <= self.eps: input_range += self.eps if -self.eps <= max_input_value - min_input_value <= self.eps: max_input_value += self.eps - int32_bias = Helper.generate_int32_bias_for_matmul(bias_tensor, weights_tensor, - input_range, - max_input_value, - min_input_value, - max_filter_value, - min_filter_value) - bias_node.attr['dtype'].CopyFrom( + int32_bias = Helper.generate_int32_bias_for_matmul( + bias_tensor, + weights_tensor, + input_range, + max_input_value, + min_input_value, + max_filter_value, + min_filter_value, + ) + bias_node.attr["dtype"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) + bias_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - type=float32_type if self.device == 'gpu' else qint32_type)) - bias_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_tensor if self.device == 'gpu' else int32_bias, dtypes. - float32 if self.device == 'gpu' else dtypes.int32, bias_tensor.shape))) - - bias_node.attr['value'].tensor.dtype = float32_type \ - if self.device == 'gpu' else qint32_type - new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ - if self.device == 'gpu' else qint32_type)) - - new_node.attr["Toutput"].CopyFrom( - attr_value_pb2.AttrValue(type=uint8_type)) - #TODO enabled below commit once the graph refactor pre_optimize commmitted. - if quantized_node_op.find('Relu') == -1: + tensor=tensor_util.make_tensor_proto( + bias_tensor if self.device == "gpu" else int32_bias, + dtypes.float32 if self.device == "gpu" else dtypes.int32, + bias_tensor.shape, + ) + ) + ) + + bias_node.attr["value"].tensor.dtype = float32_type if self.device == "gpu" else qint32_type + new_node.attr["Tbias"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) + + new_node.attr["Toutput"].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) + # TODO enabled below commit once the graph refactor pre_optimize commmitted. + if quantized_node_op.find("Relu") == -1: deq_node_name = self.graph_info[requantize_node_name].outputs[0] deq_node = self.graph_info[deq_node_name].node - deq_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) + deq_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) if bias_enter_node: - bias_enter_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ - if self.device == 'gpu' else qint32_type)) + bias_enter_node.attr["T"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) else: new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type)) self.graph_analyzer.replace_single_node( - new_node, [parent_node_name], quantized_node_name, - [self.graph_info[requantize_node_name].outputs[0]], requantize_node_name) + new_node, + [parent_node_name], + quantized_node_name, + [self.graph_info[requantize_node_name].outputs[0]], + requantize_node_name, + ) self.graph_analyzer.remove_node(quantized_node_name) return self.graph_analyzer.dump_graph() -class FuseMatMulRequantizeDequantizeNewAPITransformer(GraphRewriterBase): # pragma: no cover + +class FuseMatMulRequantizeDequantizeNewAPITransformer(GraphRewriterBase): # pragma: no cover """Fuse _QuantizedMatMul + Requantize + Dequantize into _QuantizedMatMul.""" - def __init__(self, model, device='cpu'): + def __init__(self, model, device="cpu"): """Initilization.""" super().__init__(model) self.device = device @@ -356,7 +388,7 @@ def __init__(self, model, device='cpu'): def do_transformation(self): """Apply the fusion of QuantizedMatMul + Requantize + Dequantize.""" - fuse_pattern = [["_QuantizedMatMul"], ['Requantize', 'RequantizePerChannel'], ['Dequantize'], ('Softmax',)] + fuse_pattern = [["_QuantizedMatMul"], ["Requantize", "RequantizePerChannel"], ["Dequantize"], ("Softmax",)] uint8_type = dtypes.quint8.as_datatype_enum int8_type = dtypes.qint8.as_datatype_enum @@ -373,11 +405,14 @@ def do_transformation(self): deq_node_name = i[2] quantized_node_op = i[-1][0] - + # "BiasAdd" + "Add" only supports "Dequantize" - attr_fused_ops = ''.join(x for x in quantized_node.attr["fused_ops"].SerializeToString()\ - .decode('UTF-8', 'ignore').strip() if x.isprintable()) - if not "BiasAddAdd" in attr_fused_ops: + attr_fused_ops = "".join( + x + for x in quantized_node.attr["fused_ops"].SerializeToString().decode("UTF-8", "ignore").strip() + if x.isprintable() + ) + if "BiasAddAdd" not in attr_fused_ops: continue new_node = node_def_pb2.NodeDef() @@ -389,19 +424,19 @@ def do_transformation(self): new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) - if 'T1' in quantized_node.attr: - new_node.attr["T1"].CopyFrom(quantized_node.attr['T1']) - if 'T2' in quantized_node.attr: - new_node.attr["T2"].CopyFrom(quantized_node.attr['T2']) - if 'U' in quantized_node.attr: + if "T1" in quantized_node.attr: + new_node.attr["T1"].CopyFrom(quantized_node.attr["T1"]) + if "T2" in quantized_node.attr: + new_node.attr["T2"].CopyFrom(quantized_node.attr["T2"]) + if "U" in quantized_node.attr: new_node.attr["U"].CopyFrom(quantized_node.attr["U"]) - if 'transpose_b' in quantized_node.attr: - new_node.attr["transpose_b"].CopyFrom(quantized_node.attr['transpose_b']) - if 'transpose_a' in quantized_node.attr: - new_node.attr["transpose_a"].CopyFrom(quantized_node.attr['transpose_a']) - if 'input_quant_mode' in quantized_node.attr: + if "transpose_b" in quantized_node.attr: + new_node.attr["transpose_b"].CopyFrom(quantized_node.attr["transpose_b"]) + if "transpose_a" in quantized_node.attr: + new_node.attr["transpose_a"].CopyFrom(quantized_node.attr["transpose_a"]) + if "input_quant_mode" in quantized_node.attr: new_node.attr["input_quant_mode"].CopyFrom(quantized_node.attr["input_quant_mode"]) - if 'output_quant_mode' in quantized_node.attr: + if "output_quant_mode" in quantized_node.attr: new_node.attr["output_quant_mode"].CopyFrom(quantized_node.attr["output_quant_mode"]) top_node_name = Helper.node_name_from_input(quantized_node.input[0]) @@ -418,127 +453,167 @@ def do_transformation(self): last_node = self.graph_info[new_node.input[0]].node weight_node = self.graph_info[Helper.node_name_from_input(new_node.input[1])].node bias_node = self.graph_info[Helper.node_name_from_input(new_node.input[2])].node - if not last_node.op == 'QuantizedConcatV2': + if not last_node.op == "QuantizedConcatV2": max_input_node = self.graph_info[last_node.input[-1]].node min_input_node = self.graph_info[last_node.input[-2]].node - + type_bias = float32_type - if not last_node.op == 'QuantizedConcatV2' and max_input_node.op == 'Enter': # pragma: no cover + if not last_node.op == "QuantizedConcatV2" and max_input_node.op == "Enter": # pragma: no cover min_input_parent_name = Helper.node_name_from_input(min_input_node.input[0]) max_input_parent_name = Helper.node_name_from_input(max_input_node.input[0]) min_input_parent_node = self.graph_info[min_input_parent_name].node max_input_parent_node = self.graph_info[max_input_parent_name].node - if min_input_parent_node.op != 'Const' or max_input_parent_node.op != 'Const': + if min_input_parent_node.op != "Const" or max_input_parent_node.op != "Const": continue min_input_node = min_input_parent_node max_input_node = max_input_parent_node - if max_filter_node and min_filter_node and max_filter_node.op == 'Enter': # pragma: no cover + if max_filter_node and min_filter_node and max_filter_node.op == "Enter": # pragma: no cover min_filter_parent_name = Helper.node_name_from_input(min_filter_node.input[0]) max_filter_parent_name = Helper.node_name_from_input(max_filter_node.input[0]) min_filter_parent_node = self.graph_info[min_filter_parent_name].node max_filter_parent_node = self.graph_info[max_filter_parent_name].node - if min_filter_parent_node.op != 'Const' or max_filter_parent_node.op != 'Const': + if min_filter_parent_node.op != "Const" or max_filter_parent_node.op != "Const": continue min_filter_node = min_filter_parent_node max_filter_node = max_filter_parent_node - if weight_node.op == 'Enter': # pragma: no cover + if weight_node.op == "Enter": # pragma: no cover weight_parent_name = Helper.node_name_from_input(weight_node.input[0]) weight_parent_node = self.graph_info[weight_parent_name].node - if weight_parent_node.op != 'Const': + if weight_parent_node.op != "Const": continue weight_node = weight_parent_node bias_enter_node = None - if bias_node.op == 'Enter': # pragma: no cover + if bias_node.op == "Enter": # pragma: no cover bias_enter_node = bias_node bias_parent_name = Helper.node_name_from_input(bias_node.input[0]) bias_parent_node = self.graph_info[bias_parent_name].node - if bias_parent_node.op != 'Const': + if bias_parent_node.op != "Const": continue bias_node = bias_parent_node - if max_filter_node and min_filter_node and max_filter_node.op == 'Const' \ - and weight_node.op == 'Const' and not last_node.op == 'QuantizedConcatV2': - min_input_value = (min_input_node.attr['value'].tensor.float_val)[0] - max_input_value = (max_input_node.attr['value'].tensor.float_val)[0] - if requantize_node.op.find('PerChannel') != -1: # pragma: no cover - max_filter_tensor = tensor_util.MakeNdarray( # get tensor - max_filter_node.attr['value'].tensor) - min_filter_tensor = tensor_util.MakeNdarray( # get tensor - min_filter_node.attr['value'].tensor) + if ( + max_filter_node + and min_filter_node + and max_filter_node.op == "Const" + and weight_node.op == "Const" + and not last_node.op == "QuantizedConcatV2" + ): + min_input_value = (min_input_node.attr["value"].tensor.float_val)[0] + max_input_value = (max_input_node.attr["value"].tensor.float_val)[0] + if requantize_node.op.find("PerChannel") != -1: # pragma: no cover + max_filter_tensor = tensor_util.MakeNdarray(max_filter_node.attr["value"].tensor) # get tensor + min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) # get tensor else: - max_filter_value = (max_filter_node.attr['value'].tensor.float_val)[0] - min_filter_value = (min_filter_node.attr['value'].tensor.float_val)[0] - - weights_tensor = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) - bias_tensor = tensor_util.MakeNdarray(bias_node.attr['value'].tensor) - is_min_first = bool(quantized_node.attr['input_quant_mode'].s == b'MIN_FIRST') - input_range = max_input_value - min_input_value if is_min_first else max( - abs(max_input_value), abs(min_input_value)) + max_filter_value = (max_filter_node.attr["value"].tensor.float_val)[0] + min_filter_value = (min_filter_node.attr["value"].tensor.float_val)[0] + + weights_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) + bias_tensor = tensor_util.MakeNdarray(bias_node.attr["value"].tensor) + is_min_first = bool(quantized_node.attr["input_quant_mode"].s == b"MIN_FIRST") + input_range = ( + max_input_value - min_input_value + if is_min_first + else max(abs(max_input_value), abs(min_input_value)) + ) if -self.eps <= input_range <= self.eps: input_range += self.eps if -self.eps <= max_input_value - min_input_value <= self.eps: max_input_value += self.eps - if requantize_node.op.find('PerChannel') != -1: # pragma: no cover + if requantize_node.op.find("PerChannel") != -1: # pragma: no cover int32_bias = Helper.generate_int32_bias_for_matmul_per_channel( - bias_tensor, weights_tensor, max_input_value, min_input_value, - max_filter_tensor, min_filter_tensor) + bias_tensor, + weights_tensor, + max_input_value, + min_input_value, + max_filter_tensor, + min_filter_tensor, + ) else: int32_bias = Helper.generate_int32_bias_for_matmul( - bias_tensor, weights_tensor, input_range, max_input_value, min_input_value, - max_filter_value, min_filter_value) - - bias_node.attr['dtype'].CopyFrom( + bias_tensor, + weights_tensor, + input_range, + max_input_value, + min_input_value, + max_filter_value, + min_filter_value, + ) + + bias_node.attr["dtype"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) + bias_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - type=float32_type if self.device == 'gpu' else qint32_type)) - bias_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_tensor if self.device == 'gpu' else int32_bias, dtypes. - float32 if self.device == 'gpu' else dtypes.int32, bias_tensor.shape))) - - bias_node.attr['value'].tensor.dtype = float32_type \ - if self.device == 'gpu' else qint32_type - type_bias = float32_type if self.device == 'gpu' else qint32_type + tensor=tensor_util.make_tensor_proto( + bias_tensor if self.device == "gpu" else int32_bias, + dtypes.float32 if self.device == "gpu" else dtypes.int32, + bias_tensor.shape, + ) + ) + ) + + bias_node.attr["value"].tensor.dtype = float32_type if self.device == "gpu" else qint32_type + type_bias = float32_type if self.device == "gpu" else qint32_type if bias_enter_node: - bias_enter_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ - if self.device == 'gpu' else qint32_type)) + bias_enter_node.attr["T"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) else: type_bias = float32_type new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=type_bias)) - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Add', b'Dequantize']) - Helper.set_attr_type_list(new_node, 'Thost_inputs', [ - uint8_type, int8_type, type_bias, float32_type, float32_type, float32_type, - float32_type, float32_type, float32_type, float32_type - ]) - - Helper.set_attr_type_list(new_node, 'Thost_outputs', [float32_type]) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Add", b"Dequantize"]) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + uint8_type, + int8_type, + type_bias, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + ], + ) + + Helper.set_attr_type_list(new_node, "Thost_outputs", [float32_type]) new_node.attr["Tout"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type)) self.graph_analyzer.remove_node(requantize_node_name) if self.graph_info[deq_node_name].outputs: self.graph_analyzer.replace_single_node( - new_node, [top_node_name], quantized_node_name, - self.graph_info[deq_node_name].outputs, deq_node_name) + new_node, + [top_node_name], + quantized_node_name, + self.graph_info[deq_node_name].outputs, + deq_node_name, + ) self.graph_analyzer.remove_node(deq_node_name) else: self.graph_analyzer.remove_node(deq_node_name) new_node.name = deq_node_name self.graph_analyzer.replace_single_node( - new_node, [top_node_name], quantized_node_name, [], deq_node_name) + new_node, [top_node_name], quantized_node_name, [], deq_node_name + ) self.graph_analyzer.remove_node(quantized_node_name) return self.graph_analyzer.dump_graph() + class FuseMatMulRequantizeNewAPITransformer(GraphRewriterBase): """Fuse newAPI Quantized MatMul Op with the successor Requantize Op.""" - def __init__(self, model, device='cpu'): + def __init__(self, model, device="cpu"): """Initilization.""" super().__init__(model) self.device = device @@ -559,7 +634,8 @@ def do_transformation(self): qint32_type = dtypes.qint32.as_datatype_enum target_nodes = self.graph_analyzer.query_fusion_pattern_nodes( - [["_QuantizedMatMul"], ['Requantize', 'RequantizePerChannel']]) + [["_QuantizedMatMul"], ["Requantize", "RequantizePerChannel"]] + ) for i in target_nodes: quantized_node_name = i[0] quantized_node = self.graph_info[quantized_node_name].node @@ -569,8 +645,11 @@ def do_transformation(self): requested_output_max_name = requantize_node.input[4] quantized_node_op = i[-1][0] - attr_fused_ops = ''.join(x for x in quantized_node.attr["fused_ops"].SerializeToString()\ - .decode('UTF-8', 'ignore').strip() if x.isprintable()) + attr_fused_ops = "".join( + x + for x in quantized_node.attr["fused_ops"].SerializeToString().decode("UTF-8", "ignore").strip() + if x.isprintable() + ) # "Requantize" # "BiasAdd", "Requantize" # "BiasAdd", "Activation", "Requantize" @@ -585,19 +664,19 @@ def do_transformation(self): new_node.input.append(requested_output_min_name) new_node.input.append(requested_output_max_name) - if 'transpose_b' in quantized_node.attr: - new_node.attr["transpose_b"].CopyFrom(quantized_node.attr['transpose_b']) - if 'transpose_a' in quantized_node.attr: - new_node.attr["transpose_a"].CopyFrom(quantized_node.attr['transpose_a']) - if 'T1' in quantized_node.attr: - new_node.attr["T1"].CopyFrom(quantized_node.attr['T1']) - if 'T2' in quantized_node.attr: - new_node.attr["T2"].CopyFrom(quantized_node.attr['T2']) - if 'U' in quantized_node.attr: + if "transpose_b" in quantized_node.attr: + new_node.attr["transpose_b"].CopyFrom(quantized_node.attr["transpose_b"]) + if "transpose_a" in quantized_node.attr: + new_node.attr["transpose_a"].CopyFrom(quantized_node.attr["transpose_a"]) + if "T1" in quantized_node.attr: + new_node.attr["T1"].CopyFrom(quantized_node.attr["T1"]) + if "T2" in quantized_node.attr: + new_node.attr["T2"].CopyFrom(quantized_node.attr["T2"]) + if "U" in quantized_node.attr: new_node.attr["U"].CopyFrom(quantized_node.attr["U"]) - if 'input_quant_mode' in quantized_node.attr: + if "input_quant_mode" in quantized_node.attr: new_node.attr["input_quant_mode"].CopyFrom(quantized_node.attr["input_quant_mode"]) - if 'output_quant_mode' in quantized_node.attr: + if "output_quant_mode" in quantized_node.attr: new_node.attr["output_quant_mode"].CopyFrom(quantized_node.attr["output_quant_mode"]) parent_node_name = Helper.node_name_from_input(quantized_node.input[0]) @@ -605,7 +684,7 @@ def do_transformation(self): min_filter_node = None # The Min and Max of non-const weight node are from QuantizeV2's output, not valid nodes. # Add check here for excluding this case. - if len(attr_fused_ops) == 0: # single matmul case + if len(attr_fused_ops) == 0: # single matmul case if ":2" not in new_node.input[5]: max_filter_node = self.graph_info[new_node.input[5]].node if ":1" not in new_node.input[4]: @@ -616,150 +695,210 @@ def do_transformation(self): if ":1" not in new_node.input[5]: min_filter_node = self.graph_info[new_node.input[5]].node last_node = self.graph_info[new_node.input[0]].node - is_min_first = bool(quantized_node.attr['input_quant_mode'].s == b'MIN_FIRST') + is_min_first = bool(quantized_node.attr["input_quant_mode"].s == b"MIN_FIRST") weight_node = self.graph_info[new_node.input[1]].node bias_node = None if "BiasAdd" in attr_fused_ops: bias_node = self.graph_info[new_node.input[2]].node - if not last_node.op == 'QuantizedConcatV2': + if not last_node.op == "QuantizedConcatV2": max_input_node = self.graph_info[last_node.input[-1]].node min_input_node = self.graph_info[last_node.input[-2]].node - if not last_node.op == 'QuantizedConcatV2' and max_input_node.op == 'Enter': # pragma: no cover + if not last_node.op == "QuantizedConcatV2" and max_input_node.op == "Enter": # pragma: no cover min_input_parent_name = Helper.node_name_from_input(min_input_node.input[0]) max_input_parent_name = Helper.node_name_from_input(max_input_node.input[0]) min_input_parent_node = self.graph_info[min_input_parent_name].node max_input_parent_node = self.graph_info[max_input_parent_name].node - if min_input_parent_node.op != 'Const' or max_input_parent_node.op != 'Const': + if min_input_parent_node.op != "Const" or max_input_parent_node.op != "Const": continue min_input_node = min_input_parent_node max_input_node = max_input_parent_node - if max_filter_node and min_filter_node and min_filter_node.input and \ - max_filter_node.input and max_filter_node.op == 'Enter': # pragma: no cover + if ( + max_filter_node + and min_filter_node + and min_filter_node.input + and max_filter_node.input + and max_filter_node.op == "Enter" + ): # pragma: no cover min_filter_parent_name = Helper.node_name_from_input(min_filter_node.input[0]) max_filter_parent_name = Helper.node_name_from_input(max_filter_node.input[0]) min_filter_parent_node = self.graph_info[min_filter_parent_name].node max_filter_parent_node = self.graph_info[max_filter_parent_name].node - if min_filter_parent_node.op != 'Const' or max_filter_parent_node.op != 'Const': + if min_filter_parent_node.op != "Const" or max_filter_parent_node.op != "Const": continue min_filter_node = min_filter_parent_node max_filter_node = max_filter_parent_node - if weight_node.op == 'Enter': # pragma: no cover + if weight_node.op == "Enter": # pragma: no cover weight_parent_name = Helper.node_name_from_input(weight_node.input[0]) weight_parent_node = self.graph_info[weight_parent_name].node - if weight_parent_node.op != 'Const': + if weight_parent_node.op != "Const": continue weight_node = weight_parent_node bias_enter_node = None - if bias_node and bias_node.op == 'Enter': # pragma: no cover + if bias_node and bias_node.op == "Enter": # pragma: no cover bias_enter_node = bias_node bias_parent_name = Helper.node_name_from_input(bias_node.input[0]) bias_parent_node = self.graph_info[bias_parent_name].node - if bias_parent_node.op != 'Const': + if bias_parent_node.op != "Const": continue bias_node = bias_parent_node - if bias_node and (last_node.op.find('_QuantizedMatMul') != -1 or last_node.op.find('QuantizeV2') != -1 \ - and max_filter_node and min_filter_node): - min_input_value = (min_input_node.attr['value'].tensor.float_val)[0] - max_input_value = (max_input_node.attr['value'].tensor.float_val)[0] - if requantize_node.op.find('PerChannel') != -1: # pragma: no cover - max_filter_tensor = tensor_util.MakeNdarray( # get tensor - max_filter_node.attr['value'].tensor) - min_filter_tensor = tensor_util.MakeNdarray( # get tensor - min_filter_node.attr['value'].tensor) + if bias_node and ( + last_node.op.find("_QuantizedMatMul") != -1 + or last_node.op.find("QuantizeV2") != -1 + and max_filter_node + and min_filter_node + ): + min_input_value = (min_input_node.attr["value"].tensor.float_val)[0] + max_input_value = (max_input_node.attr["value"].tensor.float_val)[0] + if requantize_node.op.find("PerChannel") != -1: # pragma: no cover + max_filter_tensor = tensor_util.MakeNdarray(max_filter_node.attr["value"].tensor) # get tensor + min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) # get tensor else: - max_filter_value = (max_filter_node.attr['value'].tensor.float_val)[0] - min_filter_value = (min_filter_node.attr['value'].tensor.float_val)[0] + max_filter_value = (max_filter_node.attr["value"].tensor.float_val)[0] + min_filter_value = (min_filter_node.attr["value"].tensor.float_val)[0] - weights_tensor = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) - bias_tensor = tensor_util.MakeNdarray(bias_node.attr['value'].tensor) + weights_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) + bias_tensor = tensor_util.MakeNdarray(bias_node.attr["value"].tensor) - input_range = max_input_value - min_input_value if is_min_first else max( - abs(max_input_value), abs(min_input_value)) + input_range = ( + max_input_value - min_input_value + if is_min_first + else max(abs(max_input_value), abs(min_input_value)) + ) if -self.eps <= input_range <= self.eps: input_range += self.eps if -self.eps <= max_input_value - min_input_value <= self.eps: max_input_value += self.eps - if requantize_node.op.find('PerChannel') != -1: # pragma: no cover + if requantize_node.op.find("PerChannel") != -1: # pragma: no cover int32_bias = Helper.generate_int32_bias_for_matmul_per_channel( - bias_tensor, weights_tensor, max_input_value, min_input_value, - max_filter_tensor, min_filter_tensor) + bias_tensor, + weights_tensor, + max_input_value, + min_input_value, + max_filter_tensor, + min_filter_tensor, + ) else: int32_bias = Helper.generate_int32_bias_for_matmul( - bias_tensor, weights_tensor, input_range, max_input_value, min_input_value, - max_filter_value, min_filter_value) - bias_node.attr['dtype'].CopyFrom( + bias_tensor, + weights_tensor, + input_range, + max_input_value, + min_input_value, + max_filter_value, + min_filter_value, + ) + bias_node.attr["dtype"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) + bias_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - type=float32_type if self.device == 'gpu' else qint32_type)) - bias_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_tensor if self.device == 'gpu' else int32_bias, dtypes. - float32 if self.device == 'gpu' else dtypes.int32, bias_tensor.shape))) - - bias_node.attr['value'].tensor.dtype = float32_type \ - if self.device == 'gpu' else qint32_type + tensor=tensor_util.make_tensor_proto( + bias_tensor if self.device == "gpu" else int32_bias, + dtypes.float32 if self.device == "gpu" else dtypes.int32, + bias_tensor.shape, + ) + ) + ) + + bias_node.attr["value"].tensor.dtype = float32_type if self.device == "gpu" else qint32_type if bias_enter_node: - bias_enter_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ - if self.device == 'gpu' else qint32_type)) - new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type \ - if self.device == 'gpu' else qint32_type)) + bias_enter_node.attr["T"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) + new_node.attr["Tbias"].CopyFrom( + attr_value_pb2.AttrValue(type=float32_type if self.device == "gpu" else qint32_type) + ) deq_node_name = self.graph_info[requantize_node_name].outputs[0] deq_node = self.graph_info[deq_node_name].node - deq_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) - - Helper.set_attr_type_list(new_node, "Thost_inputs", [ - uint8_type, int8_type, - float32_type if self.device == 'gpu' else qint32_type, - float32_type, float32_type, float32_type, - float32_type, float32_type, float32_type]) + deq_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) + + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + uint8_type, + int8_type, + float32_type if self.device == "gpu" else qint32_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + ], + ) else: new_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=float32_type)) deq_node_name = self.graph_info[requantize_node_name].outputs[0] deq_node = self.graph_info[deq_node_name].node - deq_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) + deq_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) if bias_node: - Helper.set_attr_type_list(new_node, "Thost_inputs", [ - uint8_type, int8_type, float32_type, - float32_type, float32_type, float32_type, - float32_type, float32_type, float32_type]) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + uint8_type, + int8_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + ], + ) else: - Helper.set_attr_type_list(new_node, "Thost_inputs", [ - uint8_type, int8_type, - float32_type, float32_type, float32_type, - float32_type, float32_type, float32_type]) - - Helper.set_attr_type_list(new_node, 'Thost_outputs', [ - uint8_type, float32_type, float32_type]) - - if "GeluApproximate" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', \ - [b'BiasAdd', b'GeluApproximate', b'Requantize']) + Helper.set_attr_type_list( + new_node, + "Thost_inputs", + [ + uint8_type, + int8_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + float32_type, + ], + ) + + Helper.set_attr_type_list(new_node, "Thost_outputs", [uint8_type, float32_type, float32_type]) + + if "GeluApproximate" in attr_fused_ops: + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"GeluApproximate", b"Requantize"]) elif "GeluExact" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'GeluExact', b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"GeluExact", b"Requantize"]) elif "Elu" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Elu', b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Elu", b"Requantize"]) elif "LeakyRelu" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'LeakyRelu', b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"LeakyRelu", b"Requantize"]) elif "Relu6" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Relu6', b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Relu6", b"Requantize"]) elif "Tanh" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Tanh', b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Tanh", b"Requantize"]) elif "Sigmoid" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Sigmoid', b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Sigmoid", b"Requantize"]) elif "Relu" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Relu', b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Relu", b"Requantize"]) elif "BiasAdd" in attr_fused_ops: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'BiasAdd', b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"BiasAdd", b"Requantize"]) else: - Helper.set_attr_string_list(new_node, 'fused_ops', [b'Requantize']) + Helper.set_attr_string_list(new_node, "fused_ops", [b"Requantize"]) new_node.attr["Tout"].CopyFrom(attr_value_pb2.AttrValue(type=uint8_type)) self.graph_analyzer.replace_single_node( - new_node, [parent_node_name], quantized_node_name, - [self.graph_info[requantize_node_name].outputs[0]], requantize_node_name) + new_node, + [parent_node_name], + quantized_node_name, + [self.graph_info[requantize_node_name].outputs[0]], + requantize_node_name, + ) self.graph_analyzer.remove_node(quantized_node_name) return self.graph_analyzer.dump_graph() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py index dcb3998c868..a426abbd65c 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py @@ -17,11 +17,12 @@ """Meta OP Graph Rewriter.""" +from tensorflow.python.framework import dtypes + +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.utils.utility import dump_elapsed_time from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from tensorflow.python.framework import dtypes class MetaInfoChangingMemOpOptimizer(GraphRewriterBase): @@ -29,6 +30,7 @@ class MetaInfoChangingMemOpOptimizer(GraphRewriterBase): With such changes, the Quantize and Dequantize OP will removed for better performance. """ + def __init__(self, model): """Initilization.""" super().__init__(model) @@ -41,8 +43,9 @@ def __init__(self, model): @dump_elapsed_time("Pass MetaOpOptimizer") def do_transformation(self): """Apply the fusion of Dequantize + MetaOp + QuantizeV2.""" - target_nodes = self.graph_analyzer.query_fusion_pattern_nodes( \ - [['Dequantize'], ('Squeeze', 'Reshape'), ('Squeeze','Reshape'), ['QuantizeV2']]) + target_nodes = self.graph_analyzer.query_fusion_pattern_nodes( + [["Dequantize"], ("Squeeze", "Reshape"), ("Squeeze", "Reshape"), ["QuantizeV2"]] + ) for i in target_nodes: if len(i[-1]) == 2: continue @@ -60,11 +63,11 @@ def do_transformation(self): if len(self.graph_info[dequantize_node_name].outputs) != 1: continue - if quant_node.attr['mode'].s.decode() == deq_node.attr['mode'].s.decode(): + if quant_node.attr["mode"].s.decode() == deq_node.attr["mode"].s.decode(): deq_min_range = self.graph_info[dequantize_node_name].node.input[1] deq_max_range = self.graph_info[dequantize_node_name].node.input[2] - quant_output_min = quantize_node_name + ':1' - quant_output_max = quantize_node_name + ':2' + quant_output_min = quantize_node_name + ":1" + quant_output_max = quantize_node_name + ":2" if len(i[-1]) == 3: quantize_input_name = i[1] else: @@ -73,9 +76,10 @@ def do_transformation(self): quantized_node_name = self.graph_info[quantize_node_name].outputs[0] # _QuantizedBatchMatMul requires T1 and T2 with qint8 type # _QuantizedFusedBatchNorm requires T with qint8 type - if (self.graph_info[quantized_node_name].node.op == '_QuantizedBatchMatMul' or \ - self.graph_info[quantized_node_name].node.op == '_QuantizedFusedBatchNorm') and \ - self.graph_info[dequantize_node_name].node.attr['T'].type != dtypes.qint8.as_datatype_enum: + if ( + self.graph_info[quantized_node_name].node.op == "_QuantizedBatchMatMul" + or self.graph_info[quantized_node_name].node.op == "_QuantizedFusedBatchNorm" + ) and self.graph_info[dequantize_node_name].node.attr["T"].type != dtypes.qint8.as_datatype_enum: continue for index, value in enumerate(self.graph_info[quantized_node_name].node.input): @@ -88,24 +92,23 @@ def do_transformation(self): if index == 0: self.graph_info[quantized_node_name].node.input[index] = quantize_input_name - new_dtype = self.graph_info[dequantize_node_name].node.attr['T'].type - for node_name in i[1: -1]: - self.graph_info[node_name].node.attr['T'].type = new_dtype + new_dtype = self.graph_info[dequantize_node_name].node.attr["T"].type + for node_name in i[1:-1]: + self.graph_info[node_name].node.attr["T"].type = new_dtype - if 'T1' in self.graph_info[quantized_node_name].node.attr: - self.graph_info[quantized_node_name].node.attr['T1'].type = new_dtype + if "T1" in self.graph_info[quantized_node_name].node.attr: + self.graph_info[quantized_node_name].node.attr["T1"].type = new_dtype - if 'Tinput' in self.graph_info[quantized_node_name].node.attr: - self.graph_info[quantized_node_name].node.attr['Tinput'].type = new_dtype + if "Tinput" in self.graph_info[quantized_node_name].node.attr: + self.graph_info[quantized_node_name].node.attr["Tinput"].type = new_dtype - if 'Thost_inputs' in self.graph_info[quantized_node_name].node.attr: - self.graph_info[quantized_node_name].node.attr['Thost_inputs'].list.type[0] = new_dtype + if "Thost_inputs" in self.graph_info[quantized_node_name].node.attr: + self.graph_info[quantized_node_name].node.attr["Thost_inputs"].list.type[0] = new_dtype - if 'T' in self.graph_info[quantized_node_name].node.attr: - self.graph_info[quantized_node_name].node.attr['T'].type = new_dtype + if "T" in self.graph_info[quantized_node_name].node.attr: + self.graph_info[quantized_node_name].node.attr["T"].type = new_dtype - self.graph_info[i[1]].node.input[0] = \ - self.graph_info[dequantize_node_name].node.input[0] + self.graph_info[i[1]].node.input[0] = self.graph_info[dequantize_node_name].node.input[0] self.graph_analyzer.remove_node(dequantize_node_name) self.graph_analyzer.remove_node(self.graph_info[quantize_node_name].node.input[1]) self.graph_analyzer.remove_node(self.graph_info[quantize_node_name].node.input[2]) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py index d69c411cad4..64f5e21e983 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py @@ -17,26 +17,37 @@ """Post HostConst Graph Rewriter.""" import os + from tensorflow.core.framework import graph_pb2, node_def_pb2 + from neural_compressor.utils.utility import dump_elapsed_time + from ..graph_base import GraphRewriterBase + class PostHostConstConverter(GraphRewriterBase): """Support HostConst as default for all devices, not just for GPU.""" @dump_elapsed_time("Pass PostHostConstConverter") def do_transformation(self): """Convert Const to HostConst as default.""" - if os.environ.get("DISABLE_HOSTCONST") == '1': + if os.environ.get("DISABLE_HOSTCONST") == "1": return self.model output_graph_def = graph_pb2.GraphDef() for node in self.model.node: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) - new_node.device = '' - if node.op == "Const" and node.attr['dtype'].type in [1, 3] \ - and (node.name.endswith('_min') or node.name.endswith('_max') \ - or node.name.endswith('_max_only') or node.name.endswith('_min_only')): + new_node.device = "" + if ( + node.op == "Const" + and node.attr["dtype"].type in [1, 3] + and ( + node.name.endswith("_min") + or node.name.endswith("_max") + or node.name.endswith("_max_only") + or node.name.endswith("_min_only") + ) + ): new_node.op = "HostConst" output_graph_def.node.extend([new_node]) return output_graph_def diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py index f0238bcd60c..6ffe280d168 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py @@ -17,17 +17,21 @@ """Post CSE Graph Rewriter.""" import hashlib + from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import tensor_util -from neural_compressor.utils.utility import dump_elapsed_time -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase + class PostCseOptimizer(GraphRewriterBase): """Remove duplicated nodes like shared quantizev2 and const to decrease the output model size.""" - control_op_types = ('Switch', 'Enter', 'Merge', 'NextIteration', 'Exit') + + control_op_types = ("Switch", "Enter", "Merge", "NextIteration", "Exit") def _gen_node_hash(self, graph_info, node): """Generate nodes hash md5 data.""" @@ -35,8 +39,8 @@ def _gen_node_hash(self, graph_info, node): hash_str += str(len(node.input)) for i in node.input: input_node = graph_info[Helper.node_name_from_input(i)].node - if input_node.op == 'Const': - float_tensor = (tensor_util.MakeNdarray(input_node.attr["value"].tensor)) + if input_node.op == "Const": + float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor) hash_str += str(float_tensor.flatten()) else: hash_str += i @@ -45,7 +49,7 @@ def _gen_node_hash(self, graph_info, node): for i in attr_keys: hash_str += str(node.attr[i]) - return hashlib.md5(hash_str.encode('utf-8')).hexdigest() + return hashlib.md5(hash_str.encode("utf-8")).hexdigest() @dump_elapsed_time("Pass PostCseOptimizer") def do_transformation(self): @@ -57,10 +61,10 @@ def do_transformation(self): need_to_keep_const_node_name = [] for _, v in graph_info.items(): - if '_class' in v.node.attr: - loc_attr_node.append(v.node.attr['_class'].list.s[0].decode().split(':@')[-1]) + if "_class" in v.node.attr: + loc_attr_node.append(v.node.attr["_class"].list.s[0].decode().split(":@")[-1]) for node_name, i in graph_info.items(): - if node_name in loc_attr_node or i.node.op not in ('QuantizeV2', "Const"): + if node_name in loc_attr_node or i.node.op not in ("QuantizeV2", "Const"): continue hash_value = self._gen_node_hash(graph_info, i.node) @@ -78,10 +82,10 @@ def do_transformation(self): node_type = graph_info[v[0]].node.op for j in v[1:]: - if node_type == 'Const' and j in graph_info: + if node_type == "Const" and j in graph_info: output_op_types = [ - graph_info[out_name].node.op in self.control_op_types for out_name - in graph_info[j].outputs] + graph_info[out_name].node.op in self.control_op_types for out_name in graph_info[j].outputs + ] if any(output_op_types): continue @@ -100,17 +104,16 @@ def do_transformation(self): if j in graph_info: graph_info.pop(j) - elif node_type == 'QuantizeV2': # pragma: no cover + elif node_type == "QuantizeV2": # pragma: no cover next_node = graph_info[j].outputs[0] - quantize_v2_output_names = (j, j + ':1', j + ':2') + quantize_v2_output_names = (j, j + ":1", j + ":2") - replace_index = [list(graph_info[next_node].node.input).index(i) - for i in quantize_v2_output_names] + replace_index = [list(graph_info[next_node].node.input).index(i) for i in quantize_v2_output_names] graph_info[next_node].node.input[replace_index[0]] = v[0] - graph_info[next_node].node.input[replace_index[1]] = v[0] + ':1' - graph_info[next_node].node.input[replace_index[2]] = v[0] + ':2' + graph_info[next_node].node.input[replace_index[1]] = v[0] + ":1" + graph_info[next_node].node.input[replace_index[2]] = v[0] + ":2" graph_info[v[0]].outputs.append(next_node) @@ -118,13 +121,12 @@ def do_transformation(self): graph_info.pop(graph_info[j].node.input[1]) if graph_info[j].node.input[2] not in need_to_keep_const_node_name: - graph_info.pop(graph_info[j].node.input[2]) graph_info.pop(j) else: - self.logger.warning('Unknown Op type {}.'.format(node_type)) + self.logger.warning("Unknown Op type {}.".format(node_type)) output_graph_def = graph_pb2.GraphDef() diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py index 2199de7d325..0f99f491dfa 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py @@ -16,21 +16,21 @@ # limitations under the License. """Quantized RNN Graph Rewriter.""" -from neural_compressor.utils.utility import dump_elapsed_time +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import dtypes, tensor_util +from tensorflow.python.ops import array_ops -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util -from tensorflow.python.ops import array_ops +from neural_compressor.utils.utility import dump_elapsed_time -import numpy as np -import tensorflow as tf +from ..graph_base import GraphRewriterBase class QuantizedRNNConverter(GraphRewriterBase): """Quantized RNN converter.""" + def __init__(self, model, calibration_data, rnn_details, new_api=False): """Initilization.""" super().__init__(model) @@ -45,69 +45,55 @@ def do_transformation(self): g.graph = self.model graph_info = g.parse_graph() - for i in self.rnn_details.keys(): # pragma: no cover + for i in self.rnn_details.keys(): # pragma: no cover start_node_name = graph_info[i[0]].node.input[0] matmul_b_node_name = graph_info[i[0]].node.input[1] matmul_b_node = graph_info[Helper.node_name_from_input(matmul_b_node_name)].node - if matmul_b_node.op == 'Split': + if matmul_b_node.op == "Split": enter_node_name = matmul_b_node.input[1] - elif matmul_b_node.op == 'Enter': + elif matmul_b_node.op == "Enter": enter_node_name = graph_info[i[0]].node.input[1] else: continue - min_str = i[0] + '_eightbit_min_' + \ - start_node_name + '__print__;__min:' + min_str = i[0] + "_eightbit_min_" + start_node_name + "__print__;__min:" input_min_values = [] input_max_values = [] output_min_values = [] output_max_values = [] - max_str = i[0] + '_eightbit_max_' + \ - start_node_name + '__print__;__max:' - output_str = i[0] + \ - '_eightbit_requant_range__print__;__requant_min_max:' + max_str = i[0] + "_eightbit_max_" + start_node_name + "__print__;__max:" + output_str = i[0] + "_eightbit_requant_range__print__;__requant_min_max:" for j in self.calibration_data: if j.find(min_str) != -1: - input_min_values.append( - float(j.split('[')[-1].split(']')[0])) + input_min_values.append(float(j.split("[")[-1].split("]")[0])) if j.find(max_str) != -1: - input_max_values.append( - float(j.split('[')[-1].split(']')[0])) + input_max_values.append(float(j.split("[")[-1].split("]")[0])) if j.find(output_str) != -1: - output_min_values.append( - float(j.split(':')[-1][1:].split(']')[0])) - output_max_values.append(float(j.split('][')[-1][:-1])) + output_min_values.append(float(j.split(":")[-1][1:].split("]")[0])) + output_max_values.append(float(j.split("][")[-1][:-1])) min_input = min(input_min_values) max_input = max(input_max_values) min_output = min(output_min_values) max_output = max(output_max_values) - q_max_in_node = Helper.create_constant_node( - i[0] + '_quant_max', max_input, dtypes.float32) - - q_min_in_node = Helper.create_constant_node( - i[0] + '_quant_min', min_input, dtypes.float32) - q_enter_min_node = Helper.create_node( - 'Enter', q_min_in_node.name+'_enter', [q_min_in_node.name]) - Helper.set_attr_string( - q_enter_min_node, 'frame_name', self.rnn_details[i].encode()) - Helper.set_attr_dtype(q_enter_min_node, 'T', dtypes.float32) - Helper.set_attr_bool(q_enter_min_node, 'is_constant', True) - Helper.set_attr_int(q_enter_min_node, 'parallel_iterations', 32) - q_enter_max_node = Helper.create_node( - 'Enter', q_max_in_node.name+'_enter', [q_max_in_node.name]) - Helper.set_attr_dtype(q_enter_max_node, 'T', dtypes.float32) - Helper.set_attr_string( - q_enter_max_node, 'frame_name', self.rnn_details[i].encode()) - Helper.set_attr_bool(q_enter_max_node, 'is_constant', True) - Helper.set_attr_int(q_enter_max_node, 'parallel_iterations', 32) - - weight_node_name = graph_info[Helper.node_name_from_input( - enter_node_name)].node.input[0] - weight_node = graph_info[Helper.node_name_from_input( - weight_node_name)].node - if weight_node.attr['dtype'].type == dtypes.qint8: + q_max_in_node = Helper.create_constant_node(i[0] + "_quant_max", max_input, dtypes.float32) + + q_min_in_node = Helper.create_constant_node(i[0] + "_quant_min", min_input, dtypes.float32) + q_enter_min_node = Helper.create_node("Enter", q_min_in_node.name + "_enter", [q_min_in_node.name]) + Helper.set_attr_string(q_enter_min_node, "frame_name", self.rnn_details[i].encode()) + Helper.set_attr_dtype(q_enter_min_node, "T", dtypes.float32) + Helper.set_attr_bool(q_enter_min_node, "is_constant", True) + Helper.set_attr_int(q_enter_min_node, "parallel_iterations", 32) + q_enter_max_node = Helper.create_node("Enter", q_max_in_node.name + "_enter", [q_max_in_node.name]) + Helper.set_attr_dtype(q_enter_max_node, "T", dtypes.float32) + Helper.set_attr_string(q_enter_max_node, "frame_name", self.rnn_details[i].encode()) + Helper.set_attr_bool(q_enter_max_node, "is_constant", True) + Helper.set_attr_int(q_enter_max_node, "parallel_iterations", 32) + + weight_node_name = graph_info[Helper.node_name_from_input(enter_node_name)].node.input[0] + weight_node = graph_info[Helper.node_name_from_input(weight_node_name)].node + if weight_node.attr["dtype"].type == dtypes.qint8: qint8_const_name = weight_node_name else: base_name = weight_node_name + "_" @@ -115,11 +101,9 @@ def do_transformation(self): min_name = base_name + "min" max_name = base_name + "max" - need_to_create_const_node = bool( - qint8_const_name not in graph_info) + need_to_create_const_node = bool(qint8_const_name not in graph_info) if need_to_create_const_node: - float_tensor = tensor_util.MakeNdarray( - weight_node.attr["value"].tensor) + float_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) min_value = np.min(float_tensor.flatten()) max_value = np.max(float_tensor.flatten()) @@ -138,56 +122,39 @@ def do_transformation(self): sess = tf.compat.v1.Session() with sess.as_default(): quantize_op = array_ops.quantize_v2( - float_tensor, - min_value, - max_value, - dtypes.qint8, - mode='SCALED', - round_mode="HALF_TO_EVEN") - qint8_tensor = quantize_op[0].numpy( - ) if tf.executing_eagerly() else quantize_op[0].eval() + float_tensor, min_value, max_value, dtypes.qint8, mode="SCALED", round_mode="HALF_TO_EVEN" + ) + qint8_tensor = quantize_op[0].numpy() if tf.executing_eagerly() else quantize_op[0].eval() # Updated min-max values should be passed to the next # feeding node. - min_value = quantize_op[1].numpy( - ) if tf.executing_eagerly() else quantize_op[1].eval() - max_value = quantize_op[2].numpy( - ) if tf.executing_eagerly() else quantize_op[2].eval() + min_value = quantize_op[1].numpy() if tf.executing_eagerly() else quantize_op[1].eval() + max_value = quantize_op[2].numpy() if tf.executing_eagerly() else quantize_op[2].eval() sess.close() - shape = tensor_util.TensorShapeProtoToList( - weight_node.attr["value"].tensor.tensor_shape) - qint8_const_node = Helper.create_constant_node(qint8_const_name, - qint8_tensor, - dtypes.qint8, - shape=shape) - - min_node = Helper.create_constant_node( - min_name, min_value, dtypes.float32) - - max_node = Helper.create_constant_node( - max_name, max_value, dtypes.float32) - enter_min_node = Helper.create_node( - 'Enter', min_name+'_enter', [min_name]) - Helper.set_attr_string( - enter_min_node, 'frame_name', self.rnn_details[i].encode()) - Helper.set_attr_dtype(enter_min_node, 'T', dtypes.float32) - Helper.set_attr_bool(enter_min_node, 'is_constant', True) - Helper.set_attr_int(enter_min_node, 'parallel_iterations', 32) - enter_max_node = Helper.create_node( - 'Enter', max_name+'_enter', [max_name]) - Helper.set_attr_dtype(enter_max_node, 'T', dtypes.float32) - Helper.set_attr_string( - enter_max_node, 'frame_name', self.rnn_details[i].encode()) - Helper.set_attr_bool(enter_max_node, 'is_constant', True) - Helper.set_attr_int(enter_max_node, 'parallel_iterations', 32) + shape = tensor_util.TensorShapeProtoToList(weight_node.attr["value"].tensor.tensor_shape) + qint8_const_node = Helper.create_constant_node( + qint8_const_name, qint8_tensor, dtypes.qint8, shape=shape + ) + + min_node = Helper.create_constant_node(min_name, min_value, dtypes.float32) + + max_node = Helper.create_constant_node(max_name, max_value, dtypes.float32) + enter_min_node = Helper.create_node("Enter", min_name + "_enter", [min_name]) + Helper.set_attr_string(enter_min_node, "frame_name", self.rnn_details[i].encode()) + Helper.set_attr_dtype(enter_min_node, "T", dtypes.float32) + Helper.set_attr_bool(enter_min_node, "is_constant", True) + Helper.set_attr_int(enter_min_node, "parallel_iterations", 32) + enter_max_node = Helper.create_node("Enter", max_name + "_enter", [max_name]) + Helper.set_attr_dtype(enter_max_node, "T", dtypes.float32) + Helper.set_attr_string(enter_max_node, "frame_name", self.rnn_details[i].encode()) + Helper.set_attr_bool(enter_max_node, "is_constant", True) + Helper.set_attr_int(enter_max_node, "parallel_iterations", 32) else: qint8_const_node = graph_info[qint8_const_name].node min_node = graph_info[min_name].node max_node = graph_info[max_name].node - quant_input = [start_node_name, - q_enter_min_node.name, q_enter_max_node.name] - quantize_node = Helper.create_node( - 'QuantizeV2', i[0] + '_quantize', quant_input) + quant_input = [start_node_name, q_enter_min_node.name, q_enter_max_node.name] + quantize_node = Helper.create_node("QuantizeV2", i[0] + "_quantize", quant_input) Helper.set_attr_dtype(quantize_node, "T", dtypes.quint8) Helper.set_attr_string(quantize_node, "mode", b"MIN_FIRST") g.add_node(quantize_node, start_node_name, [i[0]]) @@ -198,124 +165,114 @@ def do_transformation(self): bias_node = graph_info[graph_info[i[0]].outputs[0]].node if graph_info[bias_node.name].outputs: - last_node_name = [ - graph_info[graph_info[bias_node.name].outputs[0]].node.name] + last_node_name = [graph_info[graph_info[bias_node.name].outputs[0]].node.name] else: last_node_name = [] - quantized_matmul_input = [quantize_node.name, - Helper.node_name_from_input( - graph_info[i[0]].node.input[1]), - bias_node.input[1]] - quantized_matmul_input.append(quantize_node.name + ':1') - quantized_matmul_input.append(quantize_node.name + ':2') + quantized_matmul_input = [ + quantize_node.name, + Helper.node_name_from_input(graph_info[i[0]].node.input[1]), + bias_node.input[1], + ] + quantized_matmul_input.append(quantize_node.name + ":1") + quantized_matmul_input.append(quantize_node.name + ":2") quantized_matmul_input.append(enter_min_node.name) quantized_matmul_input.append(enter_max_node.name) if self.new_api: quantized_matmul_with_bias_node = Helper.create_node( - '_QuantizedMatMul', i[0] + '_quantized_mat_mul', quantized_matmul_input) + "_QuantizedMatMul", i[0] + "_quantized_mat_mul", quantized_matmul_input + ) else: quantized_matmul_with_bias_node = Helper.create_node( - 'QuantizedMatMulWithBias', i[0] + '_quantized_mat_mul', quantized_matmul_input) - Helper.set_attr_dtype( - quantized_matmul_with_bias_node, 'T1', dtypes.quint8) - Helper.set_attr_dtype( - quantized_matmul_with_bias_node, 'T2', dtypes.qint8) - Helper.set_attr_dtype( - quantized_matmul_with_bias_node, 'Tbias', dtypes.float32) - if self.new_api: - Helper.set_attr_dtype( - quantized_matmul_with_bias_node, 'Tout', dtypes.qint32) + "QuantizedMatMulWithBias", i[0] + "_quantized_mat_mul", quantized_matmul_input + ) + Helper.set_attr_dtype(quantized_matmul_with_bias_node, "T1", dtypes.quint8) + Helper.set_attr_dtype(quantized_matmul_with_bias_node, "T2", dtypes.qint8) + Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Tbias", dtypes.float32) + if self.new_api: + Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Tout", dtypes.qint32) else: - Helper.set_attr_dtype( - quantized_matmul_with_bias_node, 'Toutput', dtypes.qint32) - Helper.set_attr_bool( - quantized_matmul_with_bias_node, 'transpose_a', False) - Helper.set_attr_bool( - quantized_matmul_with_bias_node, 'transpose_b', False) + Helper.set_attr_dtype(quantized_matmul_with_bias_node, "Toutput", dtypes.qint32) + Helper.set_attr_bool(quantized_matmul_with_bias_node, "transpose_a", False) + Helper.set_attr_bool(quantized_matmul_with_bias_node, "transpose_b", False) if self.new_api: - Helper.set_attr_string( - quantized_matmul_with_bias_node, 'input_quant_mode', b"SCALED") - Helper.set_attr_string( - quantized_matmul_with_bias_node, 'output_quant_mode', b"SCALED") - Helper.set_attr_string_list(quantized_matmul_with_bias_node, 'fused_ops', [b'BiasAdd']) - Helper.set_attr_type_list(quantized_matmul_with_bias_node, 'Thost_inputs', [ + Helper.set_attr_string(quantized_matmul_with_bias_node, "input_quant_mode", b"SCALED") + Helper.set_attr_string(quantized_matmul_with_bias_node, "output_quant_mode", b"SCALED") + Helper.set_attr_string_list(quantized_matmul_with_bias_node, "fused_ops", [b"BiasAdd"]) + Helper.set_attr_type_list( + quantized_matmul_with_bias_node, + "Thost_inputs", + [ dtypes.quint8.as_datatype_enum, dtypes.qint8.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - Helper.set_attr_type_list(quantized_matmul_with_bias_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum]) + dtypes.float32.as_datatype_enum, + ], + ) + Helper.set_attr_type_list( + quantized_matmul_with_bias_node, + "Thost_outputs", + [dtypes.qint32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum], + ) else: - Helper.set_attr_string( - quantized_matmul_with_bias_node, 'input_quant_mode', b"MIN_FIRST") + Helper.set_attr_string(quantized_matmul_with_bias_node, "input_quant_mode", b"MIN_FIRST") - g.add_node(quantized_matmul_with_bias_node, - quantize_node.name, [bias_node.name]) + g.add_node(quantized_matmul_with_bias_node, quantize_node.name, [bias_node.name]) if qint8_const_node.name not in graph_info: g.add_node(qint8_const_node, None, [enter_node_name]) enter_node = graph_info[enter_node_name].node - if matmul_b_node.op == 'Split': - Helper.set_attr_dtype(matmul_b_node, 'T', dtypes.qint8) - Helper.set_attr_dtype(enter_node, 'T', dtypes.qint8) + if matmul_b_node.op == "Split": + Helper.set_attr_dtype(matmul_b_node, "T", dtypes.qint8) + Helper.set_attr_dtype(enter_node, "T", dtypes.qint8) graph_info[enter_node.name].node.input[0] = qint8_const_node.name elif qint8_const_node.name in graph_info: pass else: - g.add_node(qint8_const_node, None, [ - quantized_matmul_with_bias_node.name]) + g.add_node(qint8_const_node, None, [quantized_matmul_with_bias_node.name]) if need_to_create_const_node: - g.add_node(enter_min_node, None, [ - quantized_matmul_with_bias_node.name]) - g.add_node(enter_max_node, None, [ - quantized_matmul_with_bias_node.name]) - g.add_node(min_node, None, [enter_min_node.name]) - g.add_node(max_node, None, [enter_max_node.name]) + g.add_node(enter_min_node, None, [quantized_matmul_with_bias_node.name]) + g.add_node(enter_max_node, None, [quantized_matmul_with_bias_node.name]) + g.add_node(min_node, None, [enter_min_node.name]) + g.add_node(max_node, None, [enter_max_node.name]) # create requantize node - requantize_min_node = Helper.create_constant_node( - i[0] + 'requant_w_min', min_output, dtypes.float32) - requantize_max_node = Helper.create_constant_node( - i[0] + 'requant_w_max', max_output, dtypes.float32) + requantize_min_node = Helper.create_constant_node(i[0] + "requant_w_min", min_output, dtypes.float32) + requantize_max_node = Helper.create_constant_node(i[0] + "requant_w_max", max_output, dtypes.float32) enter_req_min_node = Helper.create_node( - 'Enter', requantize_min_node.name+'_enter', [requantize_min_node.name]) - Helper.set_attr_string( - enter_req_min_node, 'frame_name', self.rnn_details[i].encode()) - Helper.set_attr_dtype(enter_req_min_node, 'T', dtypes.float32) - Helper.set_attr_bool(enter_req_min_node, 'is_constant', True) - Helper.set_attr_int(enter_req_min_node, 'parallel_iterations', 32) + "Enter", requantize_min_node.name + "_enter", [requantize_min_node.name] + ) + Helper.set_attr_string(enter_req_min_node, "frame_name", self.rnn_details[i].encode()) + Helper.set_attr_dtype(enter_req_min_node, "T", dtypes.float32) + Helper.set_attr_bool(enter_req_min_node, "is_constant", True) + Helper.set_attr_int(enter_req_min_node, "parallel_iterations", 32) enter_req_max_node = Helper.create_node( - 'Enter', requantize_max_node.name+'_enter', [requantize_max_node.name]) - Helper.set_attr_dtype(enter_req_max_node, 'T', dtypes.float32) - Helper.set_attr_string( - enter_req_max_node, 'frame_name', self.rnn_details[i].encode()) - Helper.set_attr_bool(enter_req_max_node, 'is_constant', True) - Helper.set_attr_int(enter_req_max_node, 'parallel_iterations', 32) - requantize_input = [quantized_matmul_with_bias_node.name, - quantized_matmul_with_bias_node.name + - ':1', quantized_matmul_with_bias_node.name + ':2', - enter_req_min_node.name, enter_req_max_node.name] - requantize_node = Helper.create_node( - 'Requantize', i[0] + '_requantize', requantize_input) - Helper.set_attr_dtype(requantize_node, 'out_type', dtypes.qint8) - Helper.set_attr_dtype(requantize_node, 'Tinput', dtypes.qint32) - - g.add_node(requantize_node, - quantized_matmul_with_bias_node.name, [bias_node.name]) - dequantize_input = [ - requantize_node.name, requantize_node.name + ':1', requantize_node.name + ':2'] - dequantize_node = Helper.create_node( - 'Dequantize', i[0] + '_dequantize', dequantize_input) + "Enter", requantize_max_node.name + "_enter", [requantize_max_node.name] + ) + Helper.set_attr_dtype(enter_req_max_node, "T", dtypes.float32) + Helper.set_attr_string(enter_req_max_node, "frame_name", self.rnn_details[i].encode()) + Helper.set_attr_bool(enter_req_max_node, "is_constant", True) + Helper.set_attr_int(enter_req_max_node, "parallel_iterations", 32) + requantize_input = [ + quantized_matmul_with_bias_node.name, + quantized_matmul_with_bias_node.name + ":1", + quantized_matmul_with_bias_node.name + ":2", + enter_req_min_node.name, + enter_req_max_node.name, + ] + requantize_node = Helper.create_node("Requantize", i[0] + "_requantize", requantize_input) + Helper.set_attr_dtype(requantize_node, "out_type", dtypes.qint8) + Helper.set_attr_dtype(requantize_node, "Tinput", dtypes.qint32) + + g.add_node(requantize_node, quantized_matmul_with_bias_node.name, [bias_node.name]) + dequantize_input = [requantize_node.name, requantize_node.name + ":1", requantize_node.name + ":2"] + dequantize_node = Helper.create_node("Dequantize", i[0] + "_dequantize", dequantize_input) Helper.set_attr_dtype(dequantize_node, "T", dtypes.qint8) Helper.set_attr_dtype(dequantize_node, "dtype", dtypes.float32) Helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST") @@ -326,8 +283,9 @@ def do_transformation(self): g.add_node(requantize_max_node, None, [enter_req_max_node.name]) g.add_node(dequantize_node, requantize_node.name, last_node_name) if last_node_name: - replace_index = [Helper.node_name_from_input( - i) for i in graph_info[last_node_name[0]].node.input].index(bias_node.name) + replace_index = [ + Helper.node_name_from_input(i) for i in graph_info[last_node_name[0]].node.input + ].index(bias_node.name) graph_info[last_node_name[0]].node.input[replace_index] = dequantize_node.name g.remove_node(bias_node.name) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py index f9dc4202598..5c3d5241043 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py @@ -16,29 +16,35 @@ # limitations under the License. """Scale propagation Graph Rewriter.""" -from tensorflow.core.framework import node_def_pb2 -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from ..graph_base import GraphRewriterBase + class ScaleProPagationTransformer(GraphRewriterBase): """Scale propagation converter.""" - cac_pattern = [['QuantizeV2', 'Requantize', 'RequantizePerChannel'], ['QuantizedAvgPool'], - [ - 'QuantizedConv2DWithBias', 'QuantizedConv2DWithBiasAndRelu', - 'QuantizedConv2DPerChannel', 'QuantizedConv2D', - 'QuantizedConv2DWithBiasSumAndRelu' - ], ['Requantize', 'RequantizePerChannel']] - - def __init__(self, model, direction='Up'): + + cac_pattern = [ + ["QuantizeV2", "Requantize", "RequantizePerChannel"], + ["QuantizedAvgPool"], + [ + "QuantizedConv2DWithBias", + "QuantizedConv2DWithBiasAndRelu", + "QuantizedConv2DPerChannel", + "QuantizedConv2D", + "QuantizedConv2DWithBiasSumAndRelu", + ], + ["Requantize", "RequantizePerChannel"], + ] + + def __init__(self, model, direction="Up"): """Initilization.""" super().__init__(model) - self.direction = direction if direction not in ('Up', 'Down') else 'Up' + self.direction = direction if direction not in ("Up", "Down") else "Up" self.cur_graph = GraphAnalyzer() self.cur_graph.graph = self.model @@ -49,15 +55,14 @@ def _create_new_const_node(self, new_const_node_name, value, old_const_node_name new_node = node_def_pb2.NodeDef() new_node.op = "Const" new_node.name = new_const_node_name - new_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + new_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) new_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(float(value), dtypes.float32, []))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(float(value), dtypes.float32, [])) + ) output_node_name = self.graph_info[old_const_node_name].outputs[0] - self.cur_graph.replace_const_node(new_node, - [Helper.node_name_from_input(output_node_name)], - old_const_node_name) + self.cur_graph.replace_const_node( + new_node, [Helper.node_name_from_input(output_node_name)], old_const_node_name + ) self.cur_graph.remove_node(old_const_node_name) def _cac_transformation(self): @@ -82,7 +87,7 @@ def _cac_transformation(self): if pooling_nodes_count > 1: continue - if pre_node.op == 'QuantizeV2': + if pre_node.op == "QuantizeV2": pre_min_index, pre_max_index = quantize_v2_min_index, quantize_v2_max_index else: pre_min_index, pre_max_index = requntize_min_index, requntize_max_index @@ -90,17 +95,21 @@ def _cac_transformation(self): requantize_node_name = match[3] requantize_node = self.graph_info[requantize_node_name].node - requantize_min = self.graph_info[Helper.node_name_from_input( - requantize_node.input[requntize_min_index])].node - requantize_max = self.graph_info[Helper.node_name_from_input( - requantize_node.input[requntize_max_index])].node - - requantize_min_value = (requantize_min.attr['value'].tensor.float_val)[0] - requantize_max_value = (requantize_max.attr['value'].tensor.float_val)[0] - self._create_new_const_node(pre_node_name + '_cac_requantize_min_value', - requantize_min_value, pre_node.input[pre_min_index]) - self._create_new_const_node(pre_node_name + '_cac_requantize_max_value', - requantize_max_value, pre_node.input[pre_max_index]) + requantize_min = self.graph_info[ + Helper.node_name_from_input(requantize_node.input[requntize_min_index]) + ].node + requantize_max = self.graph_info[ + Helper.node_name_from_input(requantize_node.input[requntize_max_index]) + ].node + + requantize_min_value = (requantize_min.attr["value"].tensor.float_val)[0] + requantize_max_value = (requantize_max.attr["value"].tensor.float_val)[0] + self._create_new_const_node( + pre_node_name + "_cac_requantize_min_value", requantize_min_value, pre_node.input[pre_min_index] + ) + self._create_new_const_node( + pre_node_name + "_cac_requantize_max_value", requantize_max_value, pre_node.input[pre_max_index] + ) def do_transformation(self): """Apply the scale propagation algrothim.""" diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py index 24412080afb..3386bd4335b 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py @@ -19,21 +19,34 @@ import collections import logging -import six import re + import numpy as np +import six +from onnx import AttributeProto, TensorProto, helper, numpy_helper -from onnx import helper, numpy_helper, AttributeProto, TensorProto from . import tf2onnx_utils as utils from .onnx_node import OnnxNode logger = logging.getLogger("neural_compressor") + class OnnxGraph: """Class that provides graph manipulation and matching.""" - def __init__(self, nodes, output_shapes=None, dtypes=None, target=None, opset=None, extra_opset=None, - input_names=None, output_names=None, is_subgraph=False, graph_name=None): + def __init__( + self, + nodes, + output_shapes=None, + dtypes=None, + target=None, + opset=None, + extra_opset=None, + input_names=None, + output_names=None, + is_subgraph=False, + graph_name=None, + ): """Create ONNX Graph. Args: @@ -120,17 +133,31 @@ def __init__(self, nodes, output_shapes=None, dtypes=None, target=None, opset=No body_graph.parent_graph = self branches[attr_name] = body_graph - _ = self.make_node(n.type, n.input, outputs=new_outputs, attr=n.attr, name=n.name, - skip_conversion=n._skip_conversion, dtypes=n_dtypes, shapes=n_shapes, - domain=n.domain, branches=branches) + _ = self.make_node( + n.type, + n.input, + outputs=new_outputs, + attr=n.attr, + name=n.name, + skip_conversion=n._skip_conversion, + dtypes=n_dtypes, + shapes=n_shapes, + domain=n.domain, + branches=branches, + ) self.replace_all_inputs(o, new_output_name, ops=self.get_nodes()) - self.make_node("Identity", [new_output_name], outputs=[o], op_name_scope=n.name + "_" + "graph_outputs", - dtypes=[o_dtype], shapes=[o_shape]) + self.make_node( + "Identity", + [new_output_name], + outputs=[o], + op_name_scope=n.name + "_" + "graph_outputs", + dtypes=[o_dtype], + shapes=[o_shape], + ) self.copy_shape(new_output_name, o) self.copy_dtype(new_output_name, o) - def set_config(self, target=None, opset=None, extra_opset=None): """Set graph fields containing conversion options.""" if target is None: @@ -176,18 +203,40 @@ def make_const(self, name, np_val, skip_conversion=False, raw=True): if raw and not is_bytes: onnx_tensor = numpy_helper.from_array(np_val, name) else: - onnx_tensor = helper.make_tensor(name, utils.map_numpy_to_onnx_dtype(np_val.dtype), - np_val.shape, np_val_flat, raw=False) + onnx_tensor = helper.make_tensor( + name, utils.map_numpy_to_onnx_dtype(np_val.dtype), np_val.shape, np_val_flat, raw=False + ) dtype = onnx_tensor.data_type - node = self.make_node("Const", [], outputs=[name], name=name, attr={"value": onnx_tensor}, - skip_conversion=skip_conversion, dtypes=[dtype], infer_shape_dtype=False) + node = self.make_node( + "Const", + [], + outputs=[name], + name=name, + attr={"value": onnx_tensor}, + skip_conversion=skip_conversion, + dtypes=[dtype], + infer_shape_dtype=False, + ) self.set_shape(name, np_val.shape) self.set_dtype(name, utils.map_numpy_to_onnx_dtype(np_val.dtype)) return node - def make_node(self, op_type, inputs, attr=None, output_count=1, outputs=None, skip_conversion=True, - op_name_scope=None, name=None, shapes=None, dtypes=None, domain=utils.ONNX_DOMAIN, - infer_shape_dtype=True, branches=None): + def make_node( + self, + op_type, + inputs, + attr=None, + output_count=1, + outputs=None, + skip_conversion=True, + op_name_scope=None, + name=None, + shapes=None, + dtypes=None, + domain=utils.ONNX_DOMAIN, + infer_shape_dtype=True, + branches=None, + ): """Make a new onnx node in the graph.""" if attr is None: attr = {} @@ -240,14 +289,22 @@ def make_node(self, op_type, inputs, attr=None, output_count=1, outputs=None, sk node.set_body_graph_as_attr(branch, body) if shapes: - utils.assert_error(len(shapes) == output_count, - "output shape count %s not equal to output count %s", len(shapes), output_count) + utils.assert_error( + len(shapes) == output_count, + "output shape count %s not equal to output count %s", + len(shapes), + output_count, + ) for i in range(output_count): self.set_shape(node.output[i], shapes[i]) if dtypes: - utils.assert_error(len(dtypes) == output_count, - "output dtypes count %s not equal to output count %s", len(dtypes), output_count) + utils.assert_error( + len(dtypes) == output_count, + "output dtypes count %s not equal to output count %s", + len(dtypes), + output_count, + ) for i in range(output_count): self.set_dtype(node.output[i], dtypes[i]) @@ -297,8 +354,8 @@ def remove_node(self, node_name): if op_input == "": continue utils.assert_error( - op_input in self._output_to_consumers, - "Input %r of node %r not found.", op_input, node_name) + op_input in self._output_to_consumers, "Input %r of node %r not found.", op_input, node_name + ) self._unregister_input_name(op_input, node) self._nodes.remove(node) @@ -355,8 +412,15 @@ def reset_nodes(self, ops): def create_new_graph_with_same_config(self): """Create a clean graph inheriting current graph's configuration.""" - return OnnxGraph([], output_shapes={}, dtypes={}, target=self._target, opset=self._opset, - extra_opset=self.extra_opset, output_names=[]) + return OnnxGraph( + [], + output_shapes={}, + dtypes={}, + target=self._target, + opset=self._opset, + extra_opset=self.extra_opset, + output_names=[], + ) def is_empty_input(self, name): """Check if the input is empty. @@ -383,8 +447,7 @@ def update_node_shape_dtype(self, node, override=False): if not self.is_empty_input(node.input[i]): if logger.isEnabledFor(logging.INFO): logger.warning( - "[%s] infer a inexistent node: [%s], please check the code", - node.name, node.input[i] + "[%s] infer a inexistent node: [%s], please check the code", node.name, node.input[i] ) continue if inp.is_const(): @@ -560,7 +623,7 @@ def topological_sort(self, ops): def _push_stack(stack, node, in_stack): stack.append(node) if node in in_stack: - raise ValueError('Graph has cycles, node.name=%r.' % ops[node].name) + raise ValueError("Graph has cycles, node.name=%r." % ops[node].name) in_stack[node] = True def _get_unvisited_child(g, node, not_visited): @@ -580,7 +643,7 @@ def _get_unvisited_child(g, node, not_visited): implicit_inputs = op.get_implicit_inputs() all_input |= set(implicit_inputs) # remove those empty inputs - all_input = list(filter(lambda a: a != '', all_input)) + all_input = list(filter(lambda a: a != "", all_input)) for inp in sorted(all_input): j = self.get_node_by_output(inp) utils.assert_error(j is not None, "Cannot find node with output %r in graph %r", inp, self.graph_name) @@ -644,9 +707,12 @@ def make_graph(self, doc, graph_name=None): for op in graph_inputs: if op.type == "PlaceholderWithDefault": utils.assert_error(op.inputs[0] is not None, "Cannot find node with output {}".format(op.input[0])) - utils.assert_error(op.inputs[0].is_const(), - "non-const default value for PlaceholderWithDefault node '%s' is not supported. " - "Use the --use_default or --ignore_default flags to convert this node.", op.name) + utils.assert_error( + op.inputs[0].is_const(), + "non-const default value for PlaceholderWithDefault node '%s' is not supported. " + "Use the --use_default or --ignore_default flags to convert this node.", + op.name, + ) # copy the tensor value, set its name to current node's output, add as initializer value = op.inputs[0].get_tensor_value(as_list=False) tensor = numpy_helper.from_array(value, op.output[0]) @@ -686,7 +752,7 @@ def make_graph(self, doc, graph_name=None): # We still don't 100% trust the accuracy of all the shapes in graph.py, but for custom ops they are # almost certainly accurate and onnx has no other way of knowing them. for out in op.output: - if out == '' or out in self.outputs: + if out == "" or out in self.outputs: continue dtype = self.get_dtype(out) shape = self.get_shape(out) @@ -694,13 +760,15 @@ def make_graph(self, doc, graph_name=None): tensor_value_info.append(v) # create graph proto - graph = helper.make_graph([op.op for op in ops], - graph_name, - input_tensor_values, - output_tensor_values, - initializer=initializers, - doc_string=doc, - value_info=tensor_value_info) + graph = helper.make_graph( + [op.op for op in ops], + graph_name, + input_tensor_values, + output_tensor_values, + initializer=initializers, + doc_string=doc, + value_info=tensor_value_info, + ) return graph @@ -714,10 +782,7 @@ def make_model(self, graph_doc, graph_name="tfqdq_to_onnxqdq", **kwargs): graph = self.make_graph(graph_doc, graph_name) if "producer_name" not in kwargs: - kwargs = { - "producer_name": "neural compressor", - "producer_version": "1.0.0" - } + kwargs = {"producer_name": "neural compressor", "producer_version": "1.0.0"} if "opset_imports" not in kwargs: opsets = [helper.make_opsetid(utils.ONNX_DOMAIN, self._opset)] opsets.append(utils.AI_ONNX_ML_OPSET) @@ -726,13 +791,15 @@ def make_model(self, graph_doc, graph_name="tfqdq_to_onnxqdq", **kwargs): kwargs["opset_imports"] = opsets model_proto = helper.make_model(graph, **kwargs) - utils.assert_error(self.opset in utils.OPSET_TO_IR_VERSION, - "Opset %s is not supported yet. Please use a lower opset" % self.opset) + utils.assert_error( + self.opset in utils.OPSET_TO_IR_VERSION, + "Opset %s is not supported yet. Please use a lower opset" % self.opset, + ) # set the IR version based on opset try: model_proto.ir_version = utils.OPSET_TO_IR_VERSION.get(self.opset, model_proto.ir_version) - except: # pylint: disable=bare-except + except: # pylint: disable=bare-except logger.error("ir_version override failed - install the latest onnx version") return model_proto @@ -746,8 +813,9 @@ def make_onnx_graph_io(self, ids): utils.assert_error(dtype is not None, "missing output dtype for " + name) # TODO: allow None output shape or not? e.g. shape=(?,) - #utils.assert_error(shape is not None, "missing output shape for " + name) - if shape is None: logger.warning("missing output shape for %s", name) + # utils.assert_error(shape is not None, "missing output shape for " + name) + if shape is None: + logger.warning("missing output shape for %s", name) v = utils.make_onnx_inputs_outputs(name, dtype, shape) tensor_value_infos.append(v) @@ -757,11 +825,7 @@ def dump_graph(self): """Dump graph with shapes (helpful for debugging).""" for node in self.get_nodes(): input_names = ["{}{}".format(n, self.get_shape(n)) for n in node.input] - logger.debug("%s %s %s %s", - node.type, - self.get_shape(node.output[0]), - node.name, - ", ".join(input_names)) + logger.debug("%s %s %s %s", node.type, self.get_shape(node.output[0]), node.name, ", ".join(input_names)) def dump_node_statistics(self, include_attrs=False, include_subgraphs=True): """Return a counter of op types (and optionally attribute names) within the graph.""" @@ -807,7 +871,9 @@ def remove_input(self, node, to_be_removed, input_index=None): utils.assert_error( node.input.count(node.input[i]) <= 1, "Node %r takes multiple times the same input %r. This case is not handled.", - node.name, node.input[i]) + node.name, + node.input[i], + ) self._unregister_input_name(node.input[i], node) del node.input[i] break @@ -885,10 +951,10 @@ def insert_new_node_on_output(self, op_type, output_name=None, name=None, inputs Returns: node that was inserted """ - utils.assert_error(isinstance(output_name, six.text_type), "output_name's type is not expected: %s", - type(output_name)) - utils.assert_error(isinstance(op_type, six.text_type), "op_type's type is not expected: %s", - type(op_type)) + utils.assert_error( + isinstance(output_name, six.text_type), "output_name's type is not expected: %s", type(output_name) + ) + utils.assert_error(isinstance(op_type, six.text_type), "op_type's type is not expected: %s", type(op_type)) utils.assert_error(output_name is not None, "output_name cannot be None for op_type=%r.", op_type) if inputs is None: @@ -939,9 +1005,11 @@ def _unregister_input_name(self, input_name, node, only_graph=False): if input_name in self._output_to_consumers[input_name]: if node_name in self._output_to_consumers[input_name]: self._output_to_consumers[input_name].remove(node_name) - if (self.parent_graph is not None and - input_name in self.parent_graph._input_to_graph and - id(self) in self.parent_graph._input_to_graph[input_name]): + if ( + self.parent_graph is not None + and input_name in self.parent_graph._input_to_graph + and id(self) in self.parent_graph._input_to_graph[input_name] + ): del self.parent_graph._input_to_graph[input_name][id(self)] self.parent_graph._unregister_input_name(input_name, node, only_graph=True) @@ -959,8 +1027,8 @@ def replace_all_inputs(self, old_input, new_input, ops=None): keep_ops = True elif old_input in self._output_to_consumers: ops = list( - filter(lambda a: a is not None, - map(self.get_node_by_name, self._output_to_consumers[old_input]))) + filter(lambda a: a is not None, map(self.get_node_by_name, self._output_to_consumers[old_input])) + ) keep_ops = False else: ops = [] @@ -979,8 +1047,7 @@ def replace_all_inputs(self, old_input, new_input, ops=None): # modify references in sub graphs if old_input in self._input_to_graph: for g in self._input_to_graph[old_input].values(): - g.replace_all_inputs(old_input, new_input, - ops=g.get_nodes() if keep_ops else None) + g.replace_all_inputs(old_input, new_input, ops=g.get_nodes() if keep_ops else None) def replace_input(self, node, old_input, new_input, input_index=None): """Replace one input in a node. @@ -988,8 +1055,9 @@ def replace_input(self, node, old_input, new_input, input_index=None): The method is more efficient if *input_index* is specified. Otherwise, it renames every output named *old_input*. """ - assert isinstance(node, OnnxNode) and isinstance(old_input, six.text_type) \ - and isinstance(new_input, six.text_type) + assert ( + isinstance(node, OnnxNode) and isinstance(old_input, six.text_type) and isinstance(new_input, six.text_type) + ) is_replaced = False if input_index is None: for i, input_name in enumerate(node.input): @@ -1122,7 +1190,7 @@ def convert_qdq_nodes(self, q_node, dq_node): qdq_node_output_shape = self.get_shape(dq_node.output[0]) # Get the attributes of qdq node - signed_input = bool(q_node.get_attr_value('T', TensorProto.INT8) == TensorProto.INT8) + signed_input = bool(q_node.get_attr_value("T", TensorProto.INT8) == TensorProto.INT8) max_quantized = 127 @@ -1130,14 +1198,14 @@ def convert_qdq_nodes(self, q_node, dq_node): max_quantized = 255 # Get axis attribute for per channel implementation. - axis = q_node.get_attr_value('axis', -1) + axis = q_node.get_attr_value("axis", -1) q_attrs = {} quantized_dtype = TensorProto.INT8 if signed_input else TensorProto.UINT8 if axis != -1: utils.assert_error(self.opset >= 13, "Opset >= 13 is required for per channel quantization") - q_attrs['axis'] = axis + q_attrs["axis"] = axis inp_rank = self.get_rank(q_node.input[0]) utils.assert_error(inp_rank is not None, "Input rank cannot be unknown for qdq op %s", q_node.name) @@ -1193,25 +1261,29 @@ def convert_qdq_nodes(self, q_node, dq_node): scale = self.make_const(name=utils.set_name("quant_scale"), np_val=cast_scale).output[0] zero_point = self.make_const(utils.set_name("zero_point"), zero_point_np).output[0] - quant_node = self.make_node(op_type="QuantizeLinear", - inputs=[q_node.input[0], scale, zero_point], - shapes=[qdq_node_output_shape], - attr=q_attrs, - dtypes=[quantized_dtype], - name=utils.set_name("QuantLinearNode")) + quant_node = self.make_node( + op_type="QuantizeLinear", + inputs=[q_node.input[0], scale, zero_point], + shapes=[qdq_node_output_shape], + attr=q_attrs, + dtypes=[quantized_dtype], + name=utils.set_name("QuantLinearNode"), + ) self.set_shape(quant_node.output[0], qdq_node_output_shape) self.remove_node(q_node.name) self.remove_node(dq_node.name) - dequant_node = self.make_node(op_type="DequantizeLinear", - inputs=[quant_node.output[0], scale, zero_point], - outputs=[dq_node.output[0]], - shapes=[qdq_node_output_shape], - attr=q_attrs, - dtypes=[qdq_node_output_dtype], - name=utils.set_name("DequantLinearNode")) + dequant_node = self.make_node( + op_type="DequantizeLinear", + inputs=[quant_node.output[0], scale, zero_point], + outputs=[dq_node.output[0]], + shapes=[qdq_node_output_shape], + attr=q_attrs, + dtypes=[qdq_node_output_dtype], + name=utils.set_name("DequantLinearNode"), + ) self.set_shape(dequant_node.output[0], qdq_node_output_shape) def delete_qdq_nodes(self, q_node, dq_node): @@ -1228,11 +1300,11 @@ def delete_qdq_nodes(self, q_node, dq_node): def optimize_conv_add_fusion(self, node): """Fuse conv and add.""" - if node.type != 'Add': + if node.type != "Add": return [] conv_node = self.get_node_by_output(node.input[0]) - if conv_node.type != 'Conv': + if conv_node.type != "Conv": return [] if len(self.find_output_consumers(conv_node.output[0])) > 1: @@ -1240,7 +1312,7 @@ def optimize_conv_add_fusion(self, node): next_nodes = self.find_output_consumers(node.output[0]) for next_node in next_nodes: - if next_node.type == 'Add': + if next_node.type == "Add": return [] if self.is_const(node.input[1]): @@ -1254,30 +1326,27 @@ def optimize_conv_add_fusion(self, node): input_dequantize_node = self.get_node_by_output(conv_node.input[0]) weight_dequantize_node = self.get_node_by_output(conv_node.input[1]) if re.search(r"\w+:\d+", input_dequantize_node.input[1]): - input_dequantize_node.input[1] = input_dequantize_node.input[1].rsplit(':', 1)[0] + input_dequantize_node.input[1] = input_dequantize_node.input[1].rsplit(":", 1)[0] if re.search(r"\w+:\d+", weight_dequantize_node.input[1]): - weight_dequantize_node.input[1] = weight_dequantize_node.input[1].rsplit(':', 1)[0] - input_scale = self.get_node_by_name( - input_dequantize_node.input[1]).get_tensor_value(as_list=False) - weight_scale = self.get_node_by_name( - weight_dequantize_node.input[1]).get_tensor_value(as_list=False) + weight_dequantize_node.input[1] = weight_dequantize_node.input[1].rsplit(":", 1)[0] + input_scale = self.get_node_by_name(input_dequantize_node.input[1]).get_tensor_value(as_list=False) + weight_scale = self.get_node_by_name(weight_dequantize_node.input[1]).get_tensor_value(as_list=False) bias_scale_val = input_scale * weight_scale bias_zp_val = np.zeros(bias_scale_val.shape, dtype=np.int32).reshape(-1) quantized_bias = (bias_tensor / bias_scale_val).round().astype(np.int32) - bias_scale = self.make_const(name=utils.set_name(node.name+"_scale"), - np_val=bias_scale_val).output[0] - bias_zero_point = self.make_const(utils.set_name(node.name+"_zero_point"), - bias_zp_val).output[0] - bias_input = self.make_const(name=utils.set_name(node.name+"_x"), - np_val=quantized_bias).output[0] - - dequant_bias_node = self.make_node(op_type="DequantizeLinear", - inputs=[bias_input, bias_scale, bias_zero_point], - outputs=[conv_node.name], - shapes=[bias_scale_val.shape], - attr=weight_dequantize_node.attr, - dtypes=[TensorProto.INT32], - name=utils.set_name("DequantLinearNode")) + bias_scale = self.make_const(name=utils.set_name(node.name + "_scale"), np_val=bias_scale_val).output[0] + bias_zero_point = self.make_const(utils.set_name(node.name + "_zero_point"), bias_zp_val).output[0] + bias_input = self.make_const(name=utils.set_name(node.name + "_x"), np_val=quantized_bias).output[0] + + dequant_bias_node = self.make_node( + op_type="DequantizeLinear", + inputs=[bias_input, bias_scale, bias_zero_point], + outputs=[conv_node.name], + shapes=[bias_scale_val.shape], + attr=weight_dequantize_node.attr, + dtypes=[TensorProto.INT32], + name=utils.set_name("DequantLinearNode"), + ) # Backup the conv and biasadd values conv_type = conv_node.type @@ -1296,8 +1365,16 @@ def optimize_conv_add_fusion(self, node): self.remove_node(conv_node.name) self.remove_node(node.name) - self.make_node(conv_type, conv_inputs, attr=conv_attr, name=conv_name, outputs=conv_output, - shapes=[shape], dtypes=[dtype], skip_conversion=False) + self.make_node( + conv_type, + conv_inputs, + attr=conv_attr, + name=conv_name, + outputs=conv_output, + shapes=[shape], + dtypes=[dtype], + skip_conversion=False, + ) return [] def apply_onnx_fusion(self): diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py index 876729886c6..1f5965d7f5d 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py @@ -19,14 +19,16 @@ import copy import logging + import numpy as np +from onnx import AttributeProto, TensorProto, helper, numpy_helper -from onnx import helper, numpy_helper, AttributeProto, TensorProto -from .onnx_schema import get_schema from . import tf2onnx_utils as utils +from .onnx_schema import get_schema logger = logging.getLogger("neural_compressor") + class OnnxNode: """A ONNX Node Wrapper used for graph manipulations.""" @@ -105,8 +107,9 @@ def get_onnx_attrs(self): """Return onnx valid attributes.""" schema = get_schema(self.type, self.graph.opset, self.domain) if schema is None and not (self.is_const() or self.is_graph_input()): - logger.debug("Node %s uses non-stardard onnx op <%s, %s>, skip attribute check", - self.name, self.domain, self.type) + logger.debug( + "Node %s uses non-stardard onnx op <%s, %s>, skip attribute check", self.name, self.domain, self.type + ) onnx_attrs = {} for a in self._attr.values(): if a.name == "value": @@ -162,8 +165,12 @@ def data_format(self, val): def is_nhwc(self): """Return True if node is in NHWC format.""" - utils.assert_error('D' not in self.data_format, "is_nhwc called on %s with spatial=2 but data_format=%s", - self.name, self.data_format) + utils.assert_error( + "D" not in self.data_format, + "is_nhwc called on %s with spatial=2 but data_format=%s", + self.name, + self.data_format, + ) return self.data_format == "NHWC" def is_const(self): @@ -186,9 +193,7 @@ def is_graph_input(self): def is_graph_input_default_const(self): """Check if the node is the input of the graph and const.""" - return self.is_const() and any( - out.is_graph_input() for out in self.graph.find_output_consumers(self.output[0]) - ) + return self.is_const() and any(out.is_graph_input() for out in self.graph.find_output_consumers(self.output[0])) def is_while(self): """Check if the node is while op.""" @@ -222,7 +227,7 @@ def summary(self): lines.append("Outpus:") lines.append("\t{}={}, {}".format(name, g.get_shape(name), g.get_dtype(name))) - return '\n'.join(lines) + return "\n".join(lines) def get_attr(self, name, default=None): """Get raw attribute value.""" @@ -239,19 +244,13 @@ def get_attr_value(self, name, default=None): def get_attr_int(self, name): """Get attribute value as int.""" attr_int = self.get_attr_value(name) - utils.assert_error( - attr_int is not None and isinstance(attr_int, int), - "attribute %s is None", name - ) + utils.assert_error(attr_int is not None and isinstance(attr_int, int), "attribute %s is None", name) return attr_int def get_attr_str(self, name, encoding="utf-8"): """Get attribute value as string.""" attr_str = self.get_attr_value(name) - utils.assert_error( - attr_str is not None and isinstance(attr_str, bytes), - "attribute %s is None", name - ) + utils.assert_error(attr_str is not None and isinstance(attr_str, bytes), "attribute %s is None", name) return attr_str.decode(encoding) def set_attr(self, name, value): diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py index 2538f6457cc..b05a9cdfe0c 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py @@ -18,13 +18,15 @@ """ONNX Operator Schemas for Tensorflow model converting to ONNX model.""" import logging -from collections import defaultdict, OrderedDict +from collections import OrderedDict, defaultdict + from onnx import defs from . import tf2onnx_utils as utils logger = logging.getLogger("neural_compressor") + class OnnxOpSchema(object): """Wrapper for Onnx schema.""" @@ -87,9 +89,7 @@ def _register_all_schemas_with_history(): ordered_map = defaultdict(lambda: defaultdict(OrderedDict)) for name, domain_version_schema_map in name_domain_version_schema_map.items(): for domain, version_schema_map in domain_version_schema_map.items(): - ordered_map[name][domain] = OrderedDict( - sorted(version_schema_map.items(), key=lambda x: -x[0]) - ) + ordered_map[name][domain] = OrderedDict(sorted(version_schema_map.items(), key=lambda x: -x[0])) return ordered_map @@ -113,6 +113,7 @@ def _parse_domain_opset_versions(schemas): _domain_opset_versions = _parse_domain_opset_versions(_schemas) + def get_schema(name, max_inclusive_opset_version, domain=None): """Get schema by name within specific version.""" domain = domain or utils.ONNX_DOMAIN @@ -123,6 +124,7 @@ def get_schema(name, max_inclusive_opset_version, domain=None): return schema return None + def get_max_supported_opset_version(domain=None): """Get max supported opset version by current onnx package given a domain.""" domain = domain or utils.ONNX_DOMAIN diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py index 89b01e60bea..f428037f8c8 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py @@ -17,18 +17,21 @@ # """Utils for Tensorflow model converting to ONNX model.""" -import os import copy import logging +import os import re + +import numpy as np import tensorflow as tf from google.protobuf import text_format -import numpy as np -from tensorflow.core.framework import types_pb2, tensor_pb2 +from onnx import OperatorSetIdProto, TensorProto, defs, helper, numpy_helper, onnx_pb, shape_inference +from tensorflow.core.framework import tensor_pb2, types_pb2 from tensorflow.python.framework import tensor_util -from onnx import helper, onnx_pb, numpy_helper, defs, TensorProto, OperatorSetIdProto, shape_inference + from neural_compressor.utils.utility import LazyImport -t2o = LazyImport('tf2onnx') + +t2o = LazyImport("tf2onnx") logger = logging.getLogger("neural_compressor") @@ -53,19 +56,56 @@ # ignore the following attributes TF2ONNX_IGNORED_NODE_ATTRS = { - "T", "unknown_rank", "_class", "Tshape", "use_cudnn_on_gpu", "Index", "Tpaddings", - "TI", "Tparams", "Tindices", "Tlen", "Tdim", "Tin", "dynamic_size", "Tmultiples", - "Tblock_shape", "Tcrops", "index_type", "Taxis", "U", "maxval", - "Tout", "Tlabels", "Tindex", "element_shape", "Targmax", "Tperm", "Tcond", - "T_threshold", "shape_type", "_lower_using_switch_merge", - "parallel_iterations", "_num_original_outputs", "output_types", "output_shapes", - "key_dtype", "value_dtype" "capacity", "component_types", "shapes", "SrcT", "Treal", - "Toutput_types", "dense_shapes", "Tdense", "Tsegmentids", "Tshift", "Tnumsegments" + "T", + "unknown_rank", + "_class", + "Tshape", + "use_cudnn_on_gpu", + "Index", + "Tpaddings", + "TI", + "Tparams", + "Tindices", + "Tlen", + "Tdim", + "Tin", + "dynamic_size", + "Tmultiples", + "Tblock_shape", + "Tcrops", + "index_type", + "Taxis", + "U", + "maxval", + "Tout", + "Tlabels", + "Tindex", + "element_shape", + "Targmax", + "Tperm", + "Tcond", + "T_threshold", + "shape_type", + "_lower_using_switch_merge", + "parallel_iterations", + "_num_original_outputs", + "output_types", + "output_shapes", + "key_dtype", + "value_dtype" "capacity", + "component_types", + "shapes", + "SrcT", + "Treal", + "Toutput_types", + "dense_shapes", + "Tdense", + "Tsegmentids", + "Tshift", + "Tnumsegments", } -TF2ONNX_SUBGRAPH_ATTRS = { - "body", "cond", "then_branch", "else_branch", "f" -} +TF2ONNX_SUBGRAPH_ATTRS = {"body", "cond", "then_branch", "else_branch", "f"} TF2ONNX_DTYPE_MAP = { types_pb2.DT_FLOAT: onnx_pb.TensorProto.FLOAT, @@ -87,7 +127,7 @@ types_pb2.DT_COMPLEX128: onnx_pb.TensorProto.COMPLEX128, types_pb2.DT_BOOL: onnx_pb.TensorProto.BOOL, types_pb2.DT_RESOURCE: onnx_pb.TensorProto.INT64, - types_pb2.DT_VARIANT: onnx_pb.TensorProto.UNDEFINED + types_pb2.DT_VARIANT: onnx_pb.TensorProto.UNDEFINED, } @@ -116,19 +156,38 @@ # Note: opset 7 and opset 8 came out with IR3 but we need IR4 because of PlaceholderWithDefault # Refer from https://github.com/onnx/onnx/blob/main/docs/Versioning.md#released-versions OPSET_TO_IR_VERSION = { - 1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 3, 7: 4, 8: 4, 9: 4, 10: 5, 11: 6, 12: 7, 13: 7, 14: 7, 15: 8, 16: 8, 17: 8 + 1: 3, + 2: 3, + 3: 3, + 4: 3, + 5: 3, + 6: 3, + 7: 4, + 8: 4, + 9: 4, + 10: 5, + 11: 6, + 12: 7, + 13: 7, + 14: 7, + 15: 8, + 16: 8, + 17: 8, } DEFAULT_TARGET = [] INSERTED_OP_NAME = 1 + + def set_name(name): """Set op name for inserted ops.""" global INSERTED_OP_NAME INSERTED_OP_NAME += 1 return "{}__{}".format(name, INSERTED_OP_NAME) + def find_opset(opset): """Find opset.""" if opset is None or opset == 0: @@ -138,11 +197,13 @@ def find_opset(opset): opset = PREFERRED_OPSET return opset + def assert_error(bool_val, error_msg, *args): """Raise error message.""" if not bool_val: raise ValueError("Assert failure: " + error_msg % args) + def map_numpy_to_onnx_dtype(np_dtype): """Map numpy dtype to ONNX dtype.""" for onnx_dtype, numpy_dtype in ONNX_TO_NUMPY_DTYPE.items(): @@ -150,18 +211,22 @@ def map_numpy_to_onnx_dtype(np_dtype): return onnx_dtype raise ValueError("unsupported numpy dtype '%s' for mapping to onnx" % np_dtype) + def map_onnx_to_numpy_type(onnx_type): """Map ONNX dtype to numpy dtype.""" return ONNX_TO_NUMPY_DTYPE[onnx_type] + def add_port_to_name(name, nr=0): """Map node output number to name.""" return name + ":" + str(nr) + def get_tensorflow_node_attr(node, name): """Parse tensorflow node attribute.""" return node.get_attr(name) + def get_tensorflow_tensor_shape(tensor): """Get shape from tensorflow tensor.""" shape = [] @@ -171,6 +236,7 @@ def get_tensorflow_tensor_shape(tensor): shape = None return shape + def get_tensorflow_node_shape_attr(node): """Get shape from tensorflow attr "shape".""" dims = None @@ -182,12 +248,14 @@ def get_tensorflow_node_shape_attr(node): pass return dims + def map_tensorflow_dtype(dtype): """Convert tensorflow dtype to ONNX.""" if dtype: dtype = TF2ONNX_DTYPE_MAP[dtype] return dtype + def get_tensorflow_tensor_data(tensor): """Get data from tensorflow tensor.""" if not isinstance(tensor, tensor_pb2.TensorProto): @@ -197,6 +265,7 @@ def get_tensorflow_tensor_data(tensor): raise ValueError("np_data=", np_data, " isn't ndarray") return np_data + def convert_tensorflow_tensor_to_onnx(tensor, name=""): """Convert tensorflow tensor to onnx tensor.""" np_data = get_tensorflow_tensor_data(tensor) @@ -207,23 +276,28 @@ def convert_tensorflow_tensor_to_onnx(tensor, name=""): # Faster but fails on Unicode np_data = np_data.astype(np.str).astype(object) except UnicodeDecodeError: - decode = np.vectorize(lambda x: x.decode('UTF-8')) + decode = np.vectorize(lambda x: x.decode("UTF-8")) np_data = decode(np_data).astype(object) except: # pylint: disable=bare-except raise RuntimeError("Not support type: {}".format(type(np_data.flat[0]))) return numpy_helper.from_array(np_data, name=name) + def read_tensorflow_node_attrs(node): """Read tensorflow node attribute names.""" attr = {} for attr_name in node.node_def.attr: value = get_tensorflow_node_attr(node, attr_name) - if attr_name == 'T' and node.type in ('QuantizeV2', 'Dequantize'): - attr[attr_name] = TensorProto.INT8 if get_tensorflow_node_attr(node, attr_name) == 'qint8' \ - else TensorProto.UINT8 - elif attr_name in TF2ONNX_IGNORED_NODE_ATTRS or attr_name in TF2ONNX_SUBGRAPH_ATTRS or \ - isinstance(value, tensor_pb2.TensorProto): + if attr_name == "T" and node.type in ("QuantizeV2", "Dequantize"): + attr[attr_name] = ( + TensorProto.INT8 if get_tensorflow_node_attr(node, attr_name) == "qint8" else TensorProto.UINT8 + ) + elif ( + attr_name in TF2ONNX_IGNORED_NODE_ATTRS + or attr_name in TF2ONNX_SUBGRAPH_ATTRS + or isinstance(value, tensor_pb2.TensorProto) + ): pass elif attr_name == "shape": shape = get_tensorflow_node_shape_attr(node) @@ -240,6 +314,7 @@ def read_tensorflow_node_attrs(node): return attr + def infer_onnx_shape_dtype(node, opset_version, input_shapes, input_dtypes, initializers=None): """Infer shapes and dtypes for outputs of the node. @@ -281,10 +356,7 @@ def build_onnx_op(node): # strict_mode arg doesn't exist in old onnx packages inferred_model = shape_inference.infer_shapes(model_proto) except Exception: # pylint: disable=broad-except - logger.warning( - "ONNX Failed to infer shapes and dtypes for [%s, type: %s]", - node.name, node.type, exc_info=1 - ) + logger.warning("ONNX Failed to infer shapes and dtypes for [%s, type: %s]", node.name, node.type, exc_info=1) return None, None shapes = {} @@ -297,9 +369,7 @@ def build_onnx_op(node): dtypes[output.name] = TensorProto.UNDEFINED # Missing dim_value in shapes of onnx means unknown which is -1 in our convertor if tensor_type.HasField("shape"): - shapes[output.name] = [ - dim.dim_value if dim.HasField("dim_value") else -1 for dim in tensor_type.shape.dim - ] + shapes[output.name] = [dim.dim_value if dim.HasField("dim_value") else -1 for dim in tensor_type.shape.dim] else: shapes[output.name] = None output_shapes = [] @@ -315,15 +385,21 @@ def build_onnx_op(node): output_dtypes.append(TensorProto.UNDEFINED) return output_shapes, output_dtypes + def make_onnx_shape(shape): - """Shape with -1 is not valid in onnx ... make it a name.""" + """Shape with -1 is not valid in onnx ... + + make it a name. + """ if shape: # don't do this if input is a scalar return [set_name("unk") if i == -1 else i for i in shape] return shape + class SeqType: """Wrap around TensorProto.* to signify a tensor sequence of a given type.""" + def __init__(self, tensor_dtype): """Initlization.""" self.dtype = tensor_dtype @@ -338,6 +414,7 @@ def __repr__(self): """Return string of SeqType's dtype.""" return "SeqType(%r)" % self.dtype + def make_onnx_inputs_outputs(name, elem_type, shape, **kwargs): """Wrapper for creating onnx graph inputs or outputs. @@ -350,12 +427,8 @@ def make_onnx_inputs_outputs(name, elem_type, shape, **kwargs): elem_type = onnx_pb.TensorProto.UNDEFINED elif isinstance(elem_type, SeqType): return helper.make_tensor_sequence_value_info(name, elem_type.dtype, make_onnx_shape(shape), **kwargs) - return helper.make_tensor_value_info( - name, - elem_type, - make_onnx_shape(shape), - **kwargs - ) + return helper.make_tensor_value_info(name, elem_type, make_onnx_shape(shape), **kwargs) + def save_protobuf(path, message, as_text=False): """Save ONNX protobuf file.""" @@ -369,16 +442,19 @@ def save_protobuf(path, message, as_text=False): with open(path, "wb") as f: f.write(message.SerializeToString()) + def is_onnx_domain(domain): """Check if it's onnx domain.""" if domain is None or domain == "": return True return False + def is_list_or_tuple(obj): """Check the object is list or tuple.""" return isinstance(obj, (list, tuple)) + def are_shapes_equal(src, dest): """Check whether 2 shapes are equal.""" if src is None: @@ -393,6 +469,7 @@ def are_shapes_equal(src, dest): return False return all(i == j for i, j in zip(src, dest)) + def get_subgraphs_from_onnx(model_proto): """Returns an iterator over the graphs/subgraphs of a model (using dfs).""" stack = [model_proto.graph] @@ -406,29 +483,27 @@ def get_subgraphs_from_onnx(model_proto): if hasattr(attr, "graphs"): stack.extend(attr.graphs) + def initialize_name_counter(model_proto): """Avoid name conflicts by initializing the counter used by make_name based on the provided model.""" suffix_regex = re.compile(r"__(\d+)(:\d+)?$") + def avoid_name(name): global INSERTED_OP_NAME suffix = suffix_regex.search(name) if suffix: INSERTED_OP_NAME = max(INSERTED_OP_NAME, int(suffix.group(1)) + 1) + for g in get_subgraphs_from_onnx(model_proto): for n in g.node: avoid_name(n.name) for out in n.output: avoid_name(out) + def get_index_from_strided_slice_of_shape(node, outputs_to_values): """Returns the index of the dimension that the strided slice is reading from the shape node or None.""" - attr_vals = { - 'shrink_axis_mask': 1, - 'ellipsis_mask': 0, - 'begin_mask': 0, - 'new_axis_mask': 0, - 'end_mask': 0 - } + attr_vals = {"shrink_axis_mask": 1, "ellipsis_mask": 0, "begin_mask": 0, "new_axis_mask": 0, "end_mask": 0} for a in node.node_def.attr: if a in attr_vals: i = get_tensorflow_node_attr(node, a) @@ -446,6 +521,7 @@ def get_index_from_strided_slice_of_shape(node, outputs_to_values): return None return i1 + def compute_const_folding_using_tf(g, const_node_values, graph_outputs): """Find nodes with constant inputs and compute their values using TF.""" if const_node_values is None: @@ -489,8 +565,11 @@ def is_huge_shape(x): # Find ops with constant inputs and compute their values input_names = [i.name for i in node.inputs] output_names = [i.name for i in node.outputs] - if node.type == 'StridedSlice' and input_names[0] in shape_node_outputs \ - and output_names[0] not in outputs_to_values: + if ( + node.type == "StridedSlice" + and input_names[0] in shape_node_outputs + and output_names[0] not in outputs_to_values + ): shape = shape_node_outputs[input_names[0]] i = get_index_from_strided_slice_of_shape(node, outputs_to_values) if i is not None and 0 <= i < len(shape) and shape[i] is not None: @@ -498,10 +577,19 @@ def is_huge_shape(x): outputs_to_values[output_names[0]] = np.array(shape[i], dtype=np_dtype) outputs_to_dtypes[node.outputs[0].name] = node.outputs[0].dtype progress = True - can_fold = node.type not in ['Enter', 'Placeholder', 'PlaceholderWithDefault', 'Switch', 'Merge', - 'NextIteration', 'Exit', 'QuantizeAndDequantizeV2', 'QuantizeAndDequantizeV3', - 'QuantizeAndDequantizeV4'] - can_fold = can_fold and not node.type.startswith('Random') + can_fold = node.type not in [ + "Enter", + "Placeholder", + "PlaceholderWithDefault", + "Switch", + "Merge", + "NextIteration", + "Exit", + "QuantizeAndDequantizeV2", + "QuantizeAndDequantizeV3", + "QuantizeAndDequantizeV4", + ] + can_fold = can_fold and not node.type.startswith("Random") can_fold = can_fold and len(input_names) > 0 and all(inp in outputs_to_values for inp in input_names) # We can only fold nodes with a single output can_fold = can_fold and len(output_names) == 1 and output_names[0] not in outputs_to_values @@ -512,7 +600,7 @@ def is_huge_shape(x): g2 = tf.Graph() with g2.as_default(): for inp in input_names: - t2o.tf_loader.tf_placeholder(outputs_to_dtypes[inp], name=inp.split(':')[0]) + t2o.tf_loader.tf_placeholder(outputs_to_dtypes[inp], name=inp.split(":")[0]) mini_graph_def = g2.as_graph_def() mini_graph_def.node.append(node.node_def) g3 = tf.Graph() @@ -525,11 +613,16 @@ def is_huge_shape(x): inp_shapes.append(inp_np.shape) try: with t2o.tf_loader.tf_session() as sess: - tf.import_graph_def(mini_graph_def, name='') + tf.import_graph_def(mini_graph_def, name="") results = sess.run(output_names, feed_dict=feed_dict) if is_huge_shape(results[0].shape) and all(is_small_shape(inp) for inp in inp_shapes): - logger.debug("Skipping folding of node %s since result shape %s is much larger " - "than input shapes %s", node.name, results[0].shape, inp_shapes) + logger.debug( + "Skipping folding of node %s since result shape %s is much larger " + "than input shapes %s", + node.name, + results[0].shape, + inp_shapes, + ) else: outputs_to_values[output_names[0]] = results[0] outputs_to_dtypes[output_names[0]] = node.outputs[0].dtype diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py index 01884a63595..b56563f9b13 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py @@ -17,21 +17,36 @@ """Insert QDQ pattern Graph Rewriter.""" import copy -import numpy as np +import re from collections import namedtuple + +import numpy as np from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util -from neural_compressor.utils.utility import dump_elapsed_time +from tensorflow.python.framework import dtypes, tensor_util + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -import re +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase + class GenerateGraphWithQDQPattern(GraphRewriterBase): """Insert Q/DQ pairs before quantizable ops.""" - def __init__(self, model, calibration_data, op_wise_config, fake_quant, fp32_ops, - bf16_ops, quantized_nodes, device, performance_only, itex_mode): + + def __init__( + self, + model, + calibration_data, + op_wise_config, + fake_quant, + fp32_ops, + bf16_ops, + quantized_nodes, + device, + performance_only, + itex_mode, + ): """Initialization.""" super().__init__(model) self.data = calibration_data @@ -43,40 +58,59 @@ def __init__(self, model, calibration_data, op_wise_config, fake_quant, fp32_ops self.device = device self.performance_only = performance_only self.itex_mode = itex_mode - self.node_details = namedtuple('node_details', ['node', 'output']) + self.node_details = namedtuple("node_details", ["node", "output"]) self.node_name_mapping = {} - self.check_op_list = {"ConcatV2", "Conv2D", "Conv3D", "DepthwiseConv2D", "QuantizeV2", "DepthwiseConv2dNative", - "MaxPool", "MaxPool3D", "FusedBatchNormV3", "Requantize", "RequantizePerChannel", "AvgPool", "Pad", - "CropAndResize", "Dequantize", "Mean", "MatMul", "BatchMatMul", "BatchMatMulV2", - "FakeQuantWithMinMaxVars", "_MklFusedInstanceNorm", - "Conv2DBackpropInput", "Conv3DBackpropInputV2", "Sigmoid"} + self.check_op_list = { + "ConcatV2", + "Conv2D", + "Conv3D", + "DepthwiseConv2D", + "QuantizeV2", + "DepthwiseConv2dNative", + "MaxPool", + "MaxPool3D", + "FusedBatchNormV3", + "Requantize", + "RequantizePerChannel", + "AvgPool", + "Pad", + "CropAndResize", + "Dequantize", + "Mean", + "MatMul", + "BatchMatMul", + "BatchMatMulV2", + "FakeQuantWithMinMaxVars", + "_MklFusedInstanceNorm", + "Conv2DBackpropInput", + "Conv3DBackpropInputV2", + "Sigmoid", + } for node in self.model.node: if node.name in self.node_name_mapping: - raise ValueError("Duplicate Node Found when _parse_graph, the node name is {}" \ - .format(node.name)) + raise ValueError("Duplicate Node Found when _parse_graph, the node name is {}".format(node.name)) self.node_name_mapping[node.name] = self.node_details(node=node, output=[]) for node_name in self.node_name_mapping: for each_input in self.node_name_mapping[node_name].node.input: - self.node_name_mapping \ - [Helper.node_name_from_input(each_input)].output.append(node_name) + self.node_name_mapping[Helper.node_name_from_input(each_input)].output.append(node_name) @dump_elapsed_time("Pass GenerateGraphWithQDQPattern") def do_transformation(self): """Generate the graph with QDQ patterns, this is the first step to do new api quantizaiton.""" min_max_values = {} for i in self.data: - if i.find('_requant') == -1: - key, value = i.rsplit(':', 1)[0], i.rsplit(':', 1)[1] - key = key.split('_eightbit_')[0][1:] + key[-5:] + if i.find("_requant") == -1: + key, value = i.rsplit(":", 1)[0], i.rsplit(":", 1)[1] + key = key.split("_eightbit_")[0][1:] + key[-5:] if key not in min_max_values: min_max_values[key] = [float(value[1:-1])] else: min_max_values[key].append(float(value[1:-1])) quantizable_op_names = [] for i in min_max_values: - if i.split('__')[0] not in quantizable_op_names: - quantizable_op_names.append(i.split('__')[0]) + if i.split("__")[0] not in quantizable_op_names: + quantizable_op_names.append(i.split("__")[0]) self.g = GraphAnalyzer() self.g.graph = copy.deepcopy(self.model) @@ -96,19 +130,28 @@ def do_transformation(self): is_asymmetric = op_wise_cfg[2] if self.graph_info[op_name].node.op == "ConcatV2": if not self.itex_mode: - self._insert_qdq_pattern_for_concatv2(self.graph_info[op_name].node, - is_asymmetric) + self._insert_qdq_pattern_for_concatv2(self.graph_info[op_name].node, is_asymmetric) else: - self._insert_qdq_pattern_for_common_ops(self.graph_info[op_name].node, - is_asymmetric) + self._insert_qdq_pattern_for_common_ops(self.graph_info[op_name].node, is_asymmetric) # insert QDQ pattern for op's weight self.g_weight = GraphAnalyzer() self.g_weight.graph = self.g.dump_graph() self.graph_info = self.g_weight.parse_graph() target_nodes = self.g_weight.query_fusion_pattern_nodes( - [["Conv2D", "Conv3D", "DepthwiseConv2dNative", "MatMul", \ - "BatchMatMul", "BatchMatMulV2", "Conv2DBackpropInput", "Conv3DBackpropInputV2"]]) + [ + [ + "Conv2D", + "Conv3D", + "DepthwiseConv2dNative", + "MatMul", + "BatchMatMul", + "BatchMatMulV2", + "Conv2DBackpropInput", + "Conv3DBackpropInputV2", + ] + ] + ) for i in target_nodes: if i[0] not in quantizable_op_names: continue @@ -119,13 +162,13 @@ def do_transformation(self): computational_node = self.graph_info[computational_node_name].node weight_name = computational_node.input[1] if re.search(r"\w+:\d+", weight_name): - weight_node = self.graph_info[weight_name.rsplit(':', 1)[0]].node + weight_node = self.graph_info[weight_name.rsplit(":", 1)[0]].node else: weight_node = self.graph_info[weight_name].node - if weight_node.op == 'Enter': # pragma: no cover + if weight_node.op == "Enter": # pragma: no cover if self.itex_mode: parent_node = self.graph_info[Helper.node_name_from_input(weight_node.input[0])].node - if not parent_node.op == 'Const': + if not parent_node.op == "Const": continue weight_node = parent_node else: @@ -138,19 +181,15 @@ def do_transformation(self): per_channel = False weight_bit = 7 - self._insert_qdq_pattern_for_weight_node(computational_node, - weight_node, - weight_name, - min_max_values, - per_channel, - weight_bit, - self.device) + self._insert_qdq_pattern_for_weight_node( + computational_node, weight_node, weight_name, min_max_values, per_channel, weight_bit, self.device + ) # Adaption for strip equivalent nodes feature # Replicate shared Dequantize for next step fusion self.g_qdq = GraphAnalyzer() self.g_qdq.graph = self.g_weight.dump_graph() self.graph_info = self.g_qdq.parse_graph() - patterns = [['QuantizeV2'], ['Dequantize']] + patterns = [["QuantizeV2"], ["Dequantize"]] matched_nodes = self.g_qdq.query_fusion_pattern_nodes(patterns) for i in matched_nodes: quantize_node_name = self.graph_info[i[0]].node.name @@ -162,18 +201,19 @@ def do_transformation(self): for index in range(len_deq_outputs - 1): rep_dequantize_node = Helper.create_node( - "Dequantize", deq_node_name + '_' + str(index + 1), - [quantize_node_name, quantize_node_name + ':1', quantize_node_name + ':2']) - rep_dequantize_node.attr["T"].CopyFrom(deq_node.attr['T']) - rep_dequantize_node.attr["mode"].CopyFrom(deq_node.attr['mode']) - if 'axis' in deq_node.attr: - rep_dequantize_node.attr["axis"].CopyFrom(deq_node.attr['axis']) - next_node_name = self.g_qdq.node_name_details[deq_node_name].outputs[index+1] + "Dequantize", + deq_node_name + "_" + str(index + 1), + [quantize_node_name, quantize_node_name + ":1", quantize_node_name + ":2"], + ) + rep_dequantize_node.attr["T"].CopyFrom(deq_node.attr["T"]) + rep_dequantize_node.attr["mode"].CopyFrom(deq_node.attr["mode"]) + if "axis" in deq_node.attr: + rep_dequantize_node.attr["axis"].CopyFrom(deq_node.attr["axis"]) + next_node_name = self.g_qdq.node_name_details[deq_node_name].outputs[index + 1] self.g_qdq.add_node(rep_dequantize_node, quantize_node_name, [next_node_name]) for input_index, each_input in enumerate(self.g_qdq.node_name_details[next_node_name].node.input): if each_input == deq_node_name: - self.g_qdq.node_name_details[next_node_name].node.input[input_index] = \ - rep_dequantize_node.name + self.g_qdq.node_name_details[next_node_name].node.input[input_index] = rep_dequantize_node.name return self.g_qdq.dump_graph() @@ -183,54 +223,63 @@ def _check_op_list(self, node_type): def _find_relu_node(self, node): """Find Relu node algorithm to identify the positive input.""" - if node.op == 'MaxPool': + if node.op == "MaxPool": self.check_op_list.add("BiasAdd") - if (node.op in ("Relu", "Relu6", "Elu") or \ - (node.op.find("AndRelu") != -1 and \ - ('alpha' not in node.attr or ('alpha' in node.attr and node.attr['alpha'].f == 0)))) \ - and (node.op != "Relu" - or not self.performance_only - or self.node_name_mapping \ - [Helper.node_name_from_input(node.input[0])].node.op.find("FusedBatchNorm") == -1 - or self.node_name_mapping \ - [Helper.node_name_from_input(node.input[0])].node.attr['is_training'].b - or len(self.node_name_mapping \ - [Helper.node_name_from_input(node.input[0])].output) > 1): - return True - elif 'T' in node.attr and dtypes.DType(node.attr['T'].type) in (dtypes.quint8, dtypes.uint8): + if ( + node.op in ("Relu", "Relu6", "Elu") + or ( + node.op.find("AndRelu") != -1 + and ("alpha" not in node.attr or ("alpha" in node.attr and node.attr["alpha"].f == 0)) + ) + ) and ( + node.op != "Relu" + or not self.performance_only + or self.node_name_mapping[Helper.node_name_from_input(node.input[0])].node.op.find("FusedBatchNorm") == -1 + or self.node_name_mapping[Helper.node_name_from_input(node.input[0])].node.attr["is_training"].b + or len(self.node_name_mapping[Helper.node_name_from_input(node.input[0])].output) > 1 + ): + return True + elif "T" in node.attr and dtypes.DType(node.attr["T"].type) in (dtypes.quint8, dtypes.uint8): return True - elif (node.op.find("QuantizedConv") != -1 - or node.op.find("QuantizedDepthwiseConv") != -1 or - node.op.find("QuantizedMatMul") != -1 - ) and ((node.op.find("Relu") == -1 and node.op.find("Elu") == -1) or \ - ('alpha' in node.attr and node.attr['alpha'].f > 0)): + elif ( + node.op.find("QuantizedConv") != -1 + or node.op.find("QuantizedDepthwiseConv") != -1 + or node.op.find("QuantizedMatMul") != -1 + ) and ( + (node.op.find("Relu") == -1 and node.op.find("Elu") == -1) + or ("alpha" in node.attr and node.attr["alpha"].f > 0) + ): return False - elif self.itex_mode and node.op in ('Add', 'AddV2', 'AddN'): + elif self.itex_mode and node.op in ("Add", "AddV2", "AddN"): if re.search(r"\w+:\d+", node.input[0]): - input0_node = self.node_name_mapping[node.input[0].rsplit(':', 1)[0]].node + input0_node = self.node_name_mapping[node.input[0].rsplit(":", 1)[0]].node else: input0_node = self.node_name_mapping[node.input[0]].node if re.search(r"\w+:\d+", node.input[1]): - input1_node = self.node_name_mapping[node.input[1].rsplit(':', 1)[0]].node + input1_node = self.node_name_mapping[node.input[1].rsplit(":", 1)[0]].node else: input1_node = self.node_name_mapping[node.input[1]].node - if input0_node.op in ('BiasAdd', 'Add', 'AddV2', 'AddN') or \ - input1_node.op in ('BiasAdd', 'Add', 'AddV2', 'AddN'): + if input0_node.op in ("BiasAdd", "Add", "AddV2", "AddN") or input1_node.op in ( + "BiasAdd", + "Add", + "AddV2", + "AddN", + ): return False return self._find_relu_node(input0_node) and self._find_relu_node(input1_node) - elif self._check_op_list(node.op) or (self.itex_mode and node.op in ('Add', 'AddV2')): - if node.op == 'ConcatV2': + elif self._check_op_list(node.op) or (self.itex_mode and node.op in ("Add", "AddV2")): + if node.op == "ConcatV2": find_relu = False - for i in range(0,node.attr['N'].i): + for i in range(0, node.attr["N"].i): if re.search(r"\w+:\d+", node.input[i]): - input_node = self.node_name_mapping[node.input[i].rsplit(':', 1)[0]].node + input_node = self.node_name_mapping[node.input[i].rsplit(":", 1)[0]].node else: input_node = self.node_name_mapping[node.input[i]].node find_relu |= self._find_relu_node(input_node) return find_relu if re.search(r"\w+:\d+", node.input[0]): - input_node = self.node_name_mapping[node.input[0].rsplit(':', 1)[0]].node + input_node = self.node_name_mapping[node.input[0].rsplit(":", 1)[0]].node else: input_node = self.node_name_mapping[node.input[0]].node return self._find_relu_node(input_node) @@ -245,20 +294,25 @@ def _insert_qdq_pattern_for_common_ops(self, original_node, is_asymmetric): else: all_inputs = self.node_name_mapping[original_node.name].node.input[:1] for each_input_name in all_inputs: - if each_input_name[0] == '^': + if each_input_name[0] == "^": continue # if dq+maxpool is detected as input of this node # the qdq in pattern dq+maxpool+q should be with the same dtype in the itex mode - if self.itex_mode and each_input_name in self.node_name_mapping \ - and self.node_name_mapping[each_input_name].node.op == "MaxPool" \ - and self.graph_info[self.graph_info[each_input_name].node.input[0]].node.op == "Dequantize": + if ( + self.itex_mode + and each_input_name in self.node_name_mapping + and self.node_name_mapping[each_input_name].node.op == "MaxPool" + and self.graph_info[self.graph_info[each_input_name].node.input[0]].node.op == "Dequantize" + ): maxpool_node = self.graph_info[each_input_name].node dtype = dtypes.DType(self.graph_info[maxpool_node.input[0]].node.attr["T"].type) elif self.node_name_mapping[original_node.name].node.op == "MatMul": dtype = dtypes.quint8 - elif self.node_name_mapping[original_node.name].node.op == "BatchMatMulV2" \ - or self.node_name_mapping[original_node.name].node.op == "BatchMatMul": + elif ( + self.node_name_mapping[original_node.name].node.op == "BatchMatMulV2" + or self.node_name_mapping[original_node.name].node.op == "BatchMatMul" + ): dtype = dtypes.qint8 # the qdq in pattern dq+bn+relu+q and dq+bn+q should be s8 in itex mode elif self.node_name_mapping[original_node.name].node.op == "FusedBatchNormV3": @@ -267,8 +321,7 @@ def _insert_qdq_pattern_for_common_ops(self, original_node, is_asymmetric): input_node_name = Helper.node_name_from_input(each_input_name) if input_node_name in self.graph_info: if self.graph_info[input_node_name].node.op == "Dequantize": - dtype = dtypes.DType( - self.graph_info[input_node_name].node.attr["T"].type) + dtype = dtypes.DType(self.graph_info[input_node_name].node.attr["T"].type) elif self.graph_info[input_node_name].node.op == "FusedBatchNormV3": dtype = dtypes.qint8 elif self._find_relu_node(self.node_name_mapping[original_node.name].node): @@ -276,39 +329,37 @@ def _insert_qdq_pattern_for_common_ops(self, original_node, is_asymmetric): else: dtype = dtypes.qint8 else: - dtype = dtypes.quint8 if self._find_relu_node( - self.node_name_mapping[original_node.name].node - ) else dtypes.qint8 - self._insert_qdq_pattern_for_each_input(original_node.name, - namespace_prefix, - each_input_name, - is_asymmetric, - dtype, - device=self.device) - + dtype = ( + dtypes.quint8 + if self._find_relu_node(self.node_name_mapping[original_node.name].node) + else dtypes.qint8 + ) + self._insert_qdq_pattern_for_each_input( + original_node.name, namespace_prefix, each_input_name, is_asymmetric, dtype, device=self.device + ) def _insert_qdq_pattern_for_concatv2(self, original_node, is_asymmetric): """Insert QDQ patterns for each input of ConcatV2.""" namespace_prefix = original_node.name + "_eightbit" - normal_inputs = [i for i in original_node.input if i[0] != '^'] + normal_inputs = [i for i in original_node.input if i[0] != "^"] num_input = len(normal_inputs) - original_inputs = normal_inputs[0:num_input - 1] + original_inputs = normal_inputs[0 : num_input - 1] input_idx = 0 for original_input_name in original_inputs: - self._insert_qdq_pattern_for_each_input(original_node.name, - namespace_prefix, - original_input_name, - is_asymmetric, - dtypes.quint8, - input_idx, - device=self.device) + self._insert_qdq_pattern_for_each_input( + original_node.name, + namespace_prefix, + original_input_name, + is_asymmetric, + dtypes.quint8, + input_idx, + device=self.device, + ) input_idx += 1 - - def _insert_qdq_pattern_for_each_input(self, op_name, namespace_prefix, - input_name, is_asymmetric, - dtype=dtypes.quint8, input_index=0, - device='cpu'): + def _insert_qdq_pattern_for_each_input( + self, op_name, namespace_prefix, input_name, is_asymmetric, dtype=dtypes.quint8, input_index=0, device="cpu" + ): """Takes one float input to an op, and converts it to quantized form.""" unique_input_name = input_name.replace(":", "__port__").replace("^", "__hat__") min_input_name = namespace_prefix + "_min_" + unique_input_name @@ -318,55 +369,49 @@ def _insert_qdq_pattern_for_each_input(self, op_name, namespace_prefix, reshape_dims_name = namespace_prefix + "_reshape_dims" + unique_input_name reduction_dims_name = namespace_prefix + "_reduction_dims" + unique_input_name - if self.fake_quant: # pragma: no cover - min_node = Helper.create_constant_node( - min_input_name, -1., dtypes.float32, device="cpu") - max_node = Helper.create_constant_node( - max_input_name, 1., dtypes.float32, device="cpu") + if self.fake_quant: # pragma: no cover + min_node = Helper.create_constant_node(min_input_name, -1.0, dtypes.float32, device="cpu") + max_node = Helper.create_constant_node(max_input_name, 1.0, dtypes.float32, device="cpu") quant_v2_node = Helper.create_node( - "QuantizeV2", quantize_input_name, - [input_name, min_input_name, max_input_name]) + "QuantizeV2", quantize_input_name, [input_name, min_input_name, max_input_name] + ) Helper.set_attr_dtype(quant_v2_node, "T", dtype) if not is_asymmetric: Helper.set_attr_string(quant_v2_node, "round_mode", b"HALF_TO_EVEN") - #Helper.set_attr_bool(quant_v2_node, "narrow_range", False if is_asymmetric else True) + # Helper.set_attr_bool(quant_v2_node, "narrow_range", False if is_asymmetric else True) if "BatchMatMul" in self.graph_info[op_name].node.op: - Helper.set_attr_string( - quant_v2_node, "mode", b"SCALED") + Helper.set_attr_string(quant_v2_node, "mode", b"SCALED") else: - Helper.set_attr_string( - quant_v2_node, "mode", b"MIN_FIRST" if is_asymmetric else b"SCALED") + Helper.set_attr_string(quant_v2_node, "mode", b"MIN_FIRST" if is_asymmetric else b"SCALED") if "Concat" in self.graph_info[op_name].node.op: dequantize_node = Helper.create_node( - "Dequantize", op_name + '_dequantize_' + str(input_index), - [quant_v2_node.name, quant_v2_node.name + ':1', quant_v2_node.name + ':2']) + "Dequantize", + op_name + "_dequantize_" + str(input_index), + [quant_v2_node.name, quant_v2_node.name + ":1", quant_v2_node.name + ":2"], + ) else: dequantize_node = Helper.create_node( - "Dequantize", op_name + '_dequantize', - [quant_v2_node.name, quant_v2_node.name + ':1', quant_v2_node.name + ':2']) + "Dequantize", + op_name + "_dequantize", + [quant_v2_node.name, quant_v2_node.name + ":1", quant_v2_node.name + ":2"], + ) Helper.set_attr_dtype(dequantize_node, "T", dtype) if "BatchMatMul" in self.graph_info[op_name].node.op: - Helper.set_attr_string( - dequantize_node, "mode", b"SCALED") + Helper.set_attr_string(dequantize_node, "mode", b"SCALED") else: - Helper.set_attr_string( - dequantize_node, "mode", b"MIN_FIRST" if is_asymmetric else b"SCALED") + Helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST" if is_asymmetric else b"SCALED") - self.g.add_node(quant_v2_node, - self.graph_info[op_name].node.input[0], - [dequantize_node.name]) + self.g.add_node(quant_v2_node, self.graph_info[op_name].node.input[0], [dequantize_node.name]) self.g.add_node(dequantize_node, quant_v2_node.name, [op_name]) self.g.add_node(min_node, None, [quant_v2_node.name]) self.g.add_node(max_node, None, [quant_v2_node.name]) self.graph_info[op_name].node.input[input_index] = dequantize_node.name else: - reshape_dims_node = Helper.create_constant_node( - reshape_dims_name, -1, dtypes.int32, [1]) - reduction_dims_node = Helper.create_constant_node( - reduction_dims_name, 0, dtypes.int32, [1]) + reshape_dims_node = Helper.create_constant_node(reshape_dims_name, -1, dtypes.int32, [1]) + reduction_dims_node = Helper.create_constant_node(reduction_dims_name, 0, dtypes.int32, [1]) reshape_input_name = namespace_prefix + "_reshape_" + unique_input_name - if self.itex_mode and self.graph_info[op_name].node.op == 'FusedBatchNormV3': + if self.itex_mode and self.graph_info[op_name].node.op == "FusedBatchNormV3": min_input_name = namespace_prefix + "_input7_output_min" max_input_name = namespace_prefix + "_input8_output_max" quantize_input_name = namespace_prefix + "_quantize_bn" @@ -375,23 +420,19 @@ def _insert_qdq_pattern_for_each_input(self, op_name, namespace_prefix, max_input_name = namespace_prefix + "_max_" + unique_input_name quantize_input_name = namespace_prefix + "_quantize_" + unique_input_name - reshape_input_node = Helper.create_node( - "Reshape", reshape_input_name, - [input_name, reshape_dims_name]) + reshape_input_node = Helper.create_node("Reshape", reshape_input_name, [input_name, reshape_dims_name]) Helper.set_attr_dtype(reshape_input_node, "T", dtypes.float32) - min_input_node = Helper.create_node( - "Min", min_input_name, [reshape_input_name, reduction_dims_name]) + min_input_node = Helper.create_node("Min", min_input_name, [reshape_input_name, reduction_dims_name]) Helper.set_attr_dtype(min_input_node, "T", dtypes.float32) Helper.set_attr_dtype(min_input_node, "Tidx", dtypes.int32) Helper.set_attr_bool(min_input_node, "keep_dims", False) - max_input_node = Helper.create_node( - "Max", max_input_name, [reshape_input_name, reduction_dims_name]) + max_input_node = Helper.create_node("Max", max_input_name, [reshape_input_name, reduction_dims_name]) Helper.set_attr_dtype(max_input_node, "T", dtypes.float32) Helper.set_attr_dtype(max_input_node, "Tidx", dtypes.int32) Helper.set_attr_bool(max_input_node, "keep_dims", False) - + if "BatchMatMul" in self.graph_info[op_name].node.op: min_input_node.input.append("^" + input_name) max_input_node.input.append("^" + input_name) @@ -399,40 +440,39 @@ def _insert_qdq_pattern_for_each_input(self, op_name, namespace_prefix, if self.itex_mode: min_input_node.input.append("^" + input_name) max_input_node.input.append("^" + input_name) - quant_v2_node = Helper.create_node("QuantizeV2", quantize_input_name, - [input_name, min_input_name, max_input_name]) + quant_v2_node = Helper.create_node( + "QuantizeV2", quantize_input_name, [input_name, min_input_name, max_input_name] + ) Helper.set_attr_dtype(quant_v2_node, "T", dtype) if not is_asymmetric: Helper.set_attr_string(quant_v2_node, "round_mode", b"HALF_TO_EVEN") - #Helper.set_attr_bool(quant_v2_node, "narrow_range", False if is_asymmetric else True) + # Helper.set_attr_bool(quant_v2_node, "narrow_range", False if is_asymmetric else True) if self.performance_only or "BatchMatMul" in self.graph_info[op_name].node.op: - Helper.set_attr_string( - quant_v2_node, "mode", b"SCALED") + Helper.set_attr_string(quant_v2_node, "mode", b"SCALED") else: - Helper.set_attr_string( - quant_v2_node, "mode", b"MIN_FIRST" if is_asymmetric else b"SCALED") + Helper.set_attr_string(quant_v2_node, "mode", b"MIN_FIRST" if is_asymmetric else b"SCALED") if "Concat" in self.graph_info[op_name].node.op: dequantize_node = Helper.create_node( - "Dequantize", op_name + '_dequantize_' + str(input_index), - [quant_v2_node.name, quant_v2_node.name + ':1', quant_v2_node.name + ':2']) + "Dequantize", + op_name + "_dequantize_" + str(input_index), + [quant_v2_node.name, quant_v2_node.name + ":1", quant_v2_node.name + ":2"], + ) else: dequantize_node = Helper.create_node( - "Dequantize", op_name + '_dequantize', - [quant_v2_node.name, quant_v2_node.name + ':1', quant_v2_node.name + ':2']) + "Dequantize", + op_name + "_dequantize", + [quant_v2_node.name, quant_v2_node.name + ":1", quant_v2_node.name + ":2"], + ) Helper.set_attr_dtype(dequantize_node, "T", dtype) if self.performance_only or "BatchMatMul" in self.graph_info[op_name].node.op: - Helper.set_attr_string( - dequantize_node, "mode", b"SCALED") + Helper.set_attr_string(dequantize_node, "mode", b"SCALED") else: - Helper.set_attr_string( - dequantize_node, "mode", b"MIN_FIRST" if is_asymmetric else b"SCALED") + Helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST" if is_asymmetric else b"SCALED") if self.graph_info[op_name].node.op in ("Conv2DBackpropInput", "Conv3DBackpropInputV2"): input_index = 2 - self.g.add_node(quant_v2_node, - self.graph_info[op_name].node.input[input_index], - [dequantize_node.name]) + self.g.add_node(quant_v2_node, self.graph_info[op_name].node.input[input_index], [dequantize_node.name]) self.g.add_node(dequantize_node, quant_v2_node.name, [op_name]) self.g.add_node(reshape_dims_node, None, [reshape_input_name]) self.g.add_node(reduction_dims_node, None, [min_input_name, max_input_name]) @@ -441,14 +481,9 @@ def _insert_qdq_pattern_for_each_input(self, op_name, namespace_prefix, self.g.add_node(max_input_node, reshape_input_name, [quant_v2_node.name]) self.graph_info[op_name].node.input[input_index] = dequantize_node.name - def _insert_qdq_pattern_for_weight_node(self, - computational_node, - weight_node, - weight_name, - min_max_values, - per_channel, - weight_bit=7.0, - device='cpu'): + def _insert_qdq_pattern_for_weight_node( + self, computational_node, weight_node, weight_name, min_max_values, per_channel, weight_bit=7.0, device="cpu" + ): """Insert QDQ pattern for weight node.""" host_op_type = computational_node.op base_name = weight_node.name + "_" @@ -456,24 +491,30 @@ def _insert_qdq_pattern_for_weight_node(self, min_name = base_name + "min" max_name = base_name + "max" epsilon = 1e-4 # Needs to be set empirically if accuracy is not satisfactory - range_coefficent = 127 / (2 ** weight_bit - 1) + range_coefficent = 127 / (2**weight_bit - 1) min_value = 0 max_value = 0 insert_reshape = False shape_convert = None shape_revert = None # The weight node of BatchMatMul may have no value - if 'value' in weight_node.attr and \ - host_op_type in ("Conv2D", "MatMul", "BatchMatMul", "BatchMatMulV2", "Conv3D", \ - "Conv2DBackpropInput", "Conv3DBackpropInputV2"): + if "value" in weight_node.attr and host_op_type in ( + "Conv2D", + "MatMul", + "BatchMatMul", + "BatchMatMulV2", + "Conv3D", + "Conv2DBackpropInput", + "Conv3DBackpropInputV2", + ): float_tensor = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) if per_channel: - if host_op_type in ('Conv3D', 'Conv3DBackpropInputV2'): + if host_op_type in ("Conv3D", "Conv3DBackpropInputV2"): ranges = np.abs(float_tensor).max(axis=(0, 1, 2, 3)) - elif host_op_type in ('Conv2D', 'Conv2DBackpropInput'): + elif host_op_type in ("Conv2D", "Conv2DBackpropInput"): ranges = np.abs(float_tensor).max(axis=(0, 1, 2)) - elif host_op_type in ('MatMul'): # pragma: no cover - if 'transpose_b' in weight_node.attr and weight_node.attr["transpose_b"].b: # pragma: no cover + elif host_op_type in ("MatMul"): # pragma: no cover + if "transpose_b" in weight_node.attr and weight_node.attr["transpose_b"].b: # pragma: no cover ranges = np.abs(float_tensor).max(axis=(1)) else: # itex qdq needs to transpose this range @@ -532,13 +573,11 @@ def _insert_qdq_pattern_for_weight_node(self, shape_convert = [a, b, c * d] shape_revert = [a, b, c, d] else: - min_value = np.min(min_max_values[computational_node.name+'__min']) - max_value = np.max(min_max_values[computational_node.name+'__max']) + min_value = np.min(min_max_values[computational_node.name + "__min"]) + max_value = np.max(min_max_values[computational_node.name + "__max"]) - min_node = Helper.create_constant_node(min_name, min_value, - dtypes.float32, device="cpu") - max_node = Helper.create_constant_node(max_name, max_value, - dtypes.float32, device="cpu") + min_node = Helper.create_constant_node(min_name, min_value, dtypes.float32, device="cpu") + max_node = Helper.create_constant_node(max_name, max_value, dtypes.float32, device="cpu") if "BatchMatMul" in host_op_type and "BatchMatMul" not in weight_node.op: min_node.input.append("^" + weight_name) max_node.input.append("^" + weight_name) @@ -547,55 +586,53 @@ def _insert_qdq_pattern_for_weight_node(self, max_enter_node = None if insert_reshape: reshape_dims_4to3_name = qint8_const_name + "_reshape_dims_4to3_" - reshape_dims_4to3_node = Helper.create_constant_node( - reshape_dims_4to3_name, shape_convert, dtypes.int32) + reshape_dims_4to3_node = Helper.create_constant_node(reshape_dims_4to3_name, shape_convert, dtypes.int32) reshape_4to3_name = qint8_const_name + "_reshape_4to3_" - reshape_4to3_node = Helper.create_node("Reshape", reshape_4to3_name, - [weight_node.name, reshape_dims_4to3_name]) - reshape_4to3_node.attr["T"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + reshape_4to3_node = Helper.create_node( + "Reshape", reshape_4to3_name, [weight_node.name, reshape_dims_4to3_name] + ) + reshape_4to3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) quant_node = Helper.create_node( - "QuantizeV2", qint8_const_name + '_quant', - [reshape_4to3_name, min_name, max_name]) + "QuantizeV2", qint8_const_name + "_quant", [reshape_4to3_name, min_name, max_name] + ) else: quant_node = Helper.create_node( - "QuantizeV2", qint8_const_name + '_quant', - [weight_node.name, min_name, max_name]) + "QuantizeV2", qint8_const_name + "_quant", [weight_node.name, min_name, max_name] + ) dequant_node = Helper.create_node( - "Dequantize", base_name + '_dequant', - [quant_node.name, quant_node.name + ':1', quant_node.name + ':2']) + "Dequantize", base_name + "_dequant", [quant_node.name, quant_node.name + ":1", quant_node.name + ":2"] + ) Helper.set_attr_dtype(quant_node, "T", dtypes.qint8) Helper.set_attr_string(quant_node, "mode", b"SCALED") Helper.set_attr_string(quant_node, "round_mode", b"HALF_TO_EVEN") Helper.set_attr_dtype(dequant_node, "T", dtypes.qint8) Helper.set_attr_string(dequant_node, "mode", b"SCALED") if per_channel: - if host_op_type in ('Conv2D', 'Conv2DBackpropInput'): - Helper.set_attr_int(quant_node, 'axis', 3) - Helper.set_attr_int(dequant_node, 'axis', 3) - elif host_op_type in ('Conv3D', 'Conv3DBackpropInputV2'): - Helper.set_attr_int(quant_node, 'axis', 4) - Helper.set_attr_int(dequant_node, 'axis', 4) - elif host_op_type == 'MatMul': - Helper.set_attr_int(quant_node, 'axis', 1) - Helper.set_attr_int(dequant_node, 'axis', 1) + if host_op_type in ("Conv2D", "Conv2DBackpropInput"): + Helper.set_attr_int(quant_node, "axis", 3) + Helper.set_attr_int(dequant_node, "axis", 3) + elif host_op_type in ("Conv3D", "Conv3DBackpropInputV2"): + Helper.set_attr_int(quant_node, "axis", 4) + Helper.set_attr_int(dequant_node, "axis", 4) + elif host_op_type == "MatMul": + Helper.set_attr_int(quant_node, "axis", 1) + Helper.set_attr_int(dequant_node, "axis", 1) else: - Helper.set_attr_int(quant_node, 'axis', -1) - Helper.set_attr_int(dequant_node, 'axis', -1) - if host_op_type == 'DepthwiseConv2dNative': - Helper.set_attr_int(quant_node, 'axis', 2) - Helper.set_attr_int(dequant_node, 'axis', 2) + Helper.set_attr_int(quant_node, "axis", -1) + Helper.set_attr_int(dequant_node, "axis", -1) + if host_op_type == "DepthwiseConv2dNative": + Helper.set_attr_int(quant_node, "axis", 2) + Helper.set_attr_int(dequant_node, "axis", 2) if insert_reshape: reshape_dims_3to4_name = qint8_const_name + "_reshape_dims_3to4_" - reshape_dims_3to4_node = Helper.create_constant_node( - reshape_dims_3to4_name, shape_revert, dtypes.int32) + reshape_dims_3to4_node = Helper.create_constant_node(reshape_dims_3to4_name, shape_revert, dtypes.int32) reshape_3to4_name = qint8_const_name + "_reshape_3to4_" - reshape_3to4_node = Helper.create_node("Reshape", reshape_3to4_name, - [dequant_node.name, reshape_dims_3to4_name]) - reshape_3to4_node.attr["T"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + reshape_3to4_node = Helper.create_node( + "Reshape", reshape_3to4_name, [dequant_node.name, reshape_dims_3to4_name] + ) + reshape_3to4_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) self.g_weight.add_node(reshape_dims_4to3_node, None, [reshape_4to3_name]) self.g_weight.add_node(reshape_dims_3to4_node, None, [reshape_3to4_name]) self.g_weight.add_node(reshape_4to3_node, weight_node.name, [quant_node.name]) @@ -606,32 +643,51 @@ def _insert_qdq_pattern_for_weight_node(self, self.g_weight.add_node(reshape_3to4_node, dequant_node.name, [computational_node.name]) computational_node.input[1] = reshape_3to4_node.name else: - if computational_node.name in self.g.parent_frame_details and \ - self.g.parent_frame_details[computational_node.name]: # pragma: no cover - weight_enter_node = Helper.create_node('Enter', \ - weight_node.name + '_enter', [weight_node.name]) - Helper.set_attr_string(weight_enter_node, 'frame_name', - self.g.parent_frame_details[computational_node.name].attr['frame_name'].s) - Helper.set_attr_dtype(weight_enter_node, 'T', dtypes.float32) - Helper.set_attr_bool(weight_enter_node, 'is_constant', True) - Helper.set_attr_int(weight_enter_node, 'parallel_iterations', \ - self.g.parent_frame_details[computational_node.name].attr['parallel_iterations'].i) - - min_enter_node = Helper.create_node('Enter', min_name + '_enter', [min_name]) - Helper.set_attr_string(min_enter_node, 'frame_name', - self.g.parent_frame_details[computational_node.name].attr['frame_name'].s) - Helper.set_attr_dtype(min_enter_node, 'T', dtypes.float32) - Helper.set_attr_bool(min_enter_node, 'is_constant', True) - Helper.set_attr_int(min_enter_node, 'parallel_iterations', \ - self.g.parent_frame_details[computational_node.name].attr['parallel_iterations'].i) - - max_enter_node = Helper.create_node('Enter', max_name + '_enter', [max_name]) - Helper.set_attr_string(max_enter_node, 'frame_name', - self.g.parent_frame_details[computational_node.name].attr['frame_name'].s) - Helper.set_attr_dtype(max_enter_node, 'T', dtypes.float32) - Helper.set_attr_bool(max_enter_node, 'is_constant', True) - Helper.set_attr_int(max_enter_node, 'parallel_iterations',\ - self.g.parent_frame_details[computational_node.name].attr['parallel_iterations'].i) + if ( + computational_node.name in self.g.parent_frame_details + and self.g.parent_frame_details[computational_node.name] + ): # pragma: no cover + weight_enter_node = Helper.create_node("Enter", weight_node.name + "_enter", [weight_node.name]) + Helper.set_attr_string( + weight_enter_node, + "frame_name", + self.g.parent_frame_details[computational_node.name].attr["frame_name"].s, + ) + Helper.set_attr_dtype(weight_enter_node, "T", dtypes.float32) + Helper.set_attr_bool(weight_enter_node, "is_constant", True) + Helper.set_attr_int( + weight_enter_node, + "parallel_iterations", + self.g.parent_frame_details[computational_node.name].attr["parallel_iterations"].i, + ) + + min_enter_node = Helper.create_node("Enter", min_name + "_enter", [min_name]) + Helper.set_attr_string( + min_enter_node, + "frame_name", + self.g.parent_frame_details[computational_node.name].attr["frame_name"].s, + ) + Helper.set_attr_dtype(min_enter_node, "T", dtypes.float32) + Helper.set_attr_bool(min_enter_node, "is_constant", True) + Helper.set_attr_int( + min_enter_node, + "parallel_iterations", + self.g.parent_frame_details[computational_node.name].attr["parallel_iterations"].i, + ) + + max_enter_node = Helper.create_node("Enter", max_name + "_enter", [max_name]) + Helper.set_attr_string( + max_enter_node, + "frame_name", + self.g.parent_frame_details[computational_node.name].attr["frame_name"].s, + ) + Helper.set_attr_dtype(max_enter_node, "T", dtypes.float32) + Helper.set_attr_bool(max_enter_node, "is_constant", True) + Helper.set_attr_int( + max_enter_node, + "parallel_iterations", + self.g.parent_frame_details[computational_node.name].attr["parallel_iterations"].i, + ) self.g_weight.add_node(quant_node, weight_name, []) self.g_weight.add_node(min_node, None, [min_enter_node.name]) @@ -654,23 +710,21 @@ def _insert_qdq_pattern_for_weight_node(self, def _ignore_insert_qdq_pattern(self, matched_node_name): """For some cases we don't need to insert QDQ patterns.""" - if (matched_node_name in self.fp32_ops or matched_node_name in self.bf16_ops) and \ - ((matched_node_name,) not in self.quantized_nodes): + if (matched_node_name in self.fp32_ops or matched_node_name in self.bf16_ops) and ( + (matched_node_name,) not in self.quantized_nodes + ): return True - if matched_node_name not in self.op_wise_config and \ - (matched_node_name, ) not in self.quantized_nodes: + if matched_node_name not in self.op_wise_config and (matched_node_name,) not in self.quantized_nodes: return True - #TODO Remove below two lines once the TF enabled the QuantizedMatMul while + # TODO Remove below two lines once the TF enabled the QuantizedMatMul while # transpose_a could be set to True. - if not self.itex_mode and self.graph_info[matched_node_name].node.op == "MatMul": - if self.graph_info[matched_node_name].node.attr["transpose_a"].b == True: + if not self.itex_mode and self.graph_info[matched_node_name].node.op == "MatMul": + if self.graph_info[matched_node_name].node.attr["transpose_a"].b is True: return True if "FusedBatchNorm" in self.graph_info[matched_node_name].node.op and not self.itex_mode: return True if "_MklFusedInstanceNorm" == self.graph_info[matched_node_name].node.op and not self.itex_mode: return True return False - - diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py index 6eb9bc7439b..cb3e0d12180 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py @@ -17,11 +17,12 @@ """Merge duplicated QDQ patterns Graph Rewriter.""" from tensorflow.core.framework import node_def_pb2 -from neural_compressor.utils.utility import dump_elapsed_time -from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.utils.utility import dump_elapsed_time + +from ..graph_base import GraphRewriterBase class MergeDuplicatedQDQOptimizer(GraphRewriterBase): @@ -34,7 +35,7 @@ def do_transformation(self): cur_graph.graph = self.model graph_info = cur_graph.parse_graph() - patterns = [['QuantizeV2'], ['Dequantize']] + patterns = [["QuantizeV2"], ["Dequantize"]] matched_nodes = cur_graph.query_fusion_pattern_nodes(patterns) quantizev2_input_map = {} @@ -60,8 +61,7 @@ def do_transformation(self): do_merge = True for i in quantizev2_nodes: - if i.name != new_quantize_node.name and \ - i.attr['T'].type != new_quantize_node.attr['T'].type: + if i.name != new_quantize_node.name and i.attr["T"].type != new_quantize_node.attr["T"].type: do_merge = False break @@ -76,10 +76,10 @@ def do_transformation(self): # set the new QuantizeV2 node as all the other input of the Dequantize nodes for i in dequantize_map[input_map_node_name]: if i.name != new_dequantize_node.name: - cur_graph.node_name_details[i.name].node.ClearField('input') - cur_graph.node_name_details[i.name].node.input.extend([ - new_quantize_node.name, new_quantize_node.name + ':1', - new_quantize_node.name + ':2']) + cur_graph.node_name_details[i.name].node.ClearField("input") + cur_graph.node_name_details[i.name].node.input.extend( + [new_quantize_node.name, new_quantize_node.name + ":1", new_quantize_node.name + ":2"] + ) # remove the duplicated quantized nodes for i in quantizev2_nodes: diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py index 88f69f09190..26f8ac38f6b 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py @@ -16,9 +16,10 @@ # limitations under the License. """Share QDQ for ITEX Y pattern Graph Rewriter.""" +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.utils.utility import dump_elapsed_time + from ..graph_base import GraphRewriterBase -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer class ShareQDQForItexYPatternOptimizer(GraphRewriterBase): @@ -26,6 +27,7 @@ class ShareQDQForItexYPatternOptimizer(GraphRewriterBase): Only 1 Q/DQ before Add op need to be inserted. Insert 2 Q/DQ breaks the ITEX fusion pattern. """ + @dump_elapsed_time("Pass ShareQDQForItexYPatternOptimizer") def do_transformation(self): """Share the QDQ of one output of Relu node with the another output which is Add node.""" @@ -33,7 +35,7 @@ def do_transformation(self): g.graph = self.model graph_info = g.parse_graph() - patterns = [['Relu', 'MaxPool'], ['QuantizeV2'], ['Dequantize']] + patterns = [["Relu", "MaxPool"], ["QuantizeV2"], ["Dequantize"]] matched_nodes = g.query_fusion_pattern_nodes(patterns) for i in matched_nodes: @@ -43,8 +45,10 @@ def do_transformation(self): add_node_name = g.node_name_details[relu_node_name].outputs[0] quantize_node_name = g.node_name_details[relu_node_name].outputs[1] - if 'Add' not in g.node_name_details[add_node_name].node.op or \ - g.node_name_details[quantize_node_name].node.op != 'QuantizeV2': + if ( + "Add" not in g.node_name_details[add_node_name].node.op + or g.node_name_details[quantize_node_name].node.op != "QuantizeV2" + ): continue dequantize_node_name = graph_info[i[2]].node.name diff --git a/neural_compressor/adaptor/tf_utils/graph_util.py b/neural_compressor/adaptor/tf_utils/graph_util.py index 2a9c1f63e24..874819c925f 100644 --- a/neural_compressor/adaptor/tf_utils/graph_util.py +++ b/neural_compressor/adaptor/tf_utils/graph_util.py @@ -17,30 +17,30 @@ """Tensorflow Graph Utils Helper Classes.""" import copy - -import re import logging +import re from collections import namedtuple -import numpy as np -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import node_def_pb2 +import numpy as np +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 from tensorflow.python.framework import tensor_util + from neural_compressor.utils.utility import singleton logger = logging.getLogger("neural_compressor") + @singleton -class GraphAnalyzer(): +class GraphAnalyzer: """Tensorflow Graph Analyzer class which implemented under singleton mode. This class provides the following API: * Analyze the graph * Analyze the input/output node names of the specified graph """ + # TODO add the positive input flag - node_details = namedtuple('node_details', ['node', 'outputs']) + node_details = namedtuple("node_details", ["node", "outputs"]) def __init__(self, extend_engine=None): """Intialization. @@ -79,16 +79,32 @@ def _has_positive_input(self, start_node): elif op_type in ("Concat", "Add", "AddV2", "AddN"): for each_input in start_node.input: has_relu = self._has_positive_input( - self.node_name_details[GraphRewriterHelper.node_name_from_input(each_input)].node) + self.node_name_details[GraphRewriterHelper.node_name_from_input(each_input)].node + ) if not has_relu: return False return True - elif op_type in ("Conv3D", "Conv2D", "DepthwiseConv2D", "QuantizeV2", "DepthwiseConv2dNative", - "MaxPool", "MaxPool3D", "Requantize", "AvgPool", "Pad", "CropAndResize", "Dequantize", - "Mean", "MatMul", "FusedBatchNormV3", "_MklFusedInstanceNorm"): + elif op_type in ( + "Conv3D", + "Conv2D", + "DepthwiseConv2D", + "QuantizeV2", + "DepthwiseConv2dNative", + "MaxPool", + "MaxPool3D", + "Requantize", + "AvgPool", + "Pad", + "CropAndResize", + "Dequantize", + "Mean", + "MatMul", + "FusedBatchNormV3", + "_MklFusedInstanceNorm", + ): return self._has_positive_input( - self.node_name_details[GraphRewriterHelper.node_name_from_input( - start_node.input[0])].node) + self.node_name_details[GraphRewriterHelper.node_name_from_input(start_node.input[0])].node + ) else: return False @@ -116,11 +132,30 @@ def get_graph_input_output(self): """ input_node_names = [] output_node_names = [] - unlikely_output_types = ['Const', 'HostConst', 'Assign', 'NoOp', 'Parameter', 'Assert', 'save', - 'global_step', 'read', 'switch', 'cond', 'train', - 'init_ops', '[A-Za-z]+Dataset'] - unlikely_input_types = ['FIFOQueueV2', 'QueueDequeueV2', 'QueueDequeueUpToV2', - 'OneShotIterator', 'IteratorGetNext', 'IteratorV2'] + unlikely_output_types = [ + "Const", + "HostConst", + "Assign", + "NoOp", + "Parameter", + "Assert", + "save", + "global_step", + "read", + "switch", + "cond", + "train", + "init_ops", + "[A-Za-z]+Dataset", + ] + unlikely_input_types = [ + "FIFOQueueV2", + "QueueDequeueV2", + "QueueDequeueUpToV2", + "OneShotIterator", + "IteratorGetNext", + "IteratorV2", + ] exclude_input_names = [] extra_input_names = [] @@ -131,19 +166,22 @@ def get_graph_input_output(self): exclude_input_names += i.outputs else: extra_input_names.append(i.node.name) - if i.node.op in ['Const', 'HostConst', 'Variable', 'VariableV2']: + if i.node.op in ["Const", "HostConst", "Variable", "VariableV2"]: continue if not i.node.input and not i.outputs: logger.debug("Skip isolated node {}.".format(i.node.name)) - elif i.node.op == 'Placeholder': + elif i.node.op == "Placeholder": input_node_names.append(i.node.name) elif not i.node.input: if i.node.op not in unlikely_input_types: input_node_names.append(i.node.name) else: exclude_input_names += i.outputs - elif not i.outputs and i.node.op not in unlikely_output_types \ - and not re.match(unlikely_output_types[-1], i.node.op): + elif ( + not i.outputs + and i.node.op not in unlikely_output_types + and not re.match(unlikely_output_types[-1], i.node.op) + ): output_node_names.append(i.node.name) else: pass @@ -152,8 +190,9 @@ def get_graph_input_output(self): for extra_input_name in extra_input_names: input_node_names.append(extra_input_name) - logger.warning("Found possible input node names: {}, output node names: {}.".format( - input_node_names, output_node_names)) + logger.warning( + "Found possible input node names: {}, output node names: {}.".format(input_node_names, output_node_names) + ) return (input_node_names, output_node_names) @@ -167,7 +206,7 @@ def query_fusion_pattern_nodes(self, patterns=None): [string list]: The matched node names which saved as the string list. """ if self.extend_engine: - #Todo keep this for future extension API + # Todo keep this for future extension API pass else: return self._search_patterns(patterns) @@ -203,6 +242,7 @@ def _search_patterns(self, input_pattern): ['Conv2D', 'BiasAdd', 'AddN', 'Relu6']] ] """ + def _validate_input(data, creteria): if isinstance(creteria, str) and data == creteria: return True @@ -302,13 +342,11 @@ def _dfs(op_names, op_types, graph_info, node, pattern): useless_match_list = [] for index, value in enumerate(sorted_output): - if index == len(sorted_output) - 1: break next_matched_op_names = sorted_output[index + 1][:-1] - if len(value[:-1]) < len(next_matched_op_names) and \ - _compare_list(value[:-1], next_matched_op_names): + if len(value[:-1]) < len(next_matched_op_names) and _compare_list(value[:-1], next_matched_op_names): useless_match_list.append(value) for i in useless_match_list: @@ -345,25 +383,24 @@ def remove_node_with_single_input_output(self, node_name): logger.debug("The {} is not a valid node name.".format(node_name)) return False - non_const_node_count = len([ - GraphRewriterHelper.node_name_from_input(i) - for i in self.node_name_details[node_name].node.input if self.node_name_details[ - GraphRewriterHelper.node_name_from_input(i)].node.op != "Const" - ]) + non_const_node_count = len( + [ + GraphRewriterHelper.node_name_from_input(i) + for i in self.node_name_details[node_name].node.input + if self.node_name_details[GraphRewriterHelper.node_name_from_input(i)].node.op != "Const" + ] + ) if non_const_node_count > 1: logger.debug("The target node {} has more than one input.".format(node_name)) return False try: - - top_node_name = GraphRewriterHelper.node_name_from_input( - self.node_name_details[node_name].node.input[0]) + top_node_name = GraphRewriterHelper.node_name_from_input(self.node_name_details[node_name].node.input[0]) for bottom_node_name in self.node_name_details[node_name].outputs: update_output_name = [ - bottom_node_name if i == node_name else i - for i in self.node_name_details[top_node_name].outputs + bottom_node_name if i == node_name else i for i in self.node_name_details[top_node_name].outputs ] self.node_name_details[top_node_name]._replace(outputs=update_output_name) @@ -373,7 +410,7 @@ def remove_node_with_single_input_output(self, node_name): ] if self.node_name_details[bottom_node_name].node.input: - self.node_name_details[bottom_node_name].node.ClearField('input') + self.node_name_details[bottom_node_name].node.ClearField("input") self.node_name_details[bottom_node_name].node.input.extend(update_input_name) except Exception as e: @@ -404,11 +441,7 @@ def remove_node(self, node_name): logger.debug("{} has been removed.".format(node_name)) return True - def replace_const_node(self, - new_const_node, - target_node, - old_constant_node_name, - replace_all=True): + def replace_const_node(self, new_const_node, target_node, old_constant_node_name, replace_all=True): """Replace the specified const node with another one. Args: @@ -417,23 +450,22 @@ def replace_const_node(self, need to be replaced const node. old_constant_node_name (string): the outdated const node name. replace_all (bool): replace the specified node name once or not. - """ new_const_node_name = new_const_node.name - self.node_name_details[new_const_node_name] = self.node_details(node=new_const_node, - outputs=target_node) + self.node_name_details[new_const_node_name] = self.node_details(node=new_const_node, outputs=target_node) for sub_node in target_node: - if not sub_node in self.node_name_details: + if sub_node not in self.node_name_details: continue for index, each_node_name in enumerate(self.node_name_details[sub_node].node.input): - if each_node_name + ':0' == old_constant_node_name \ - or each_node_name == old_constant_node_name: - new_input_name = self.node_name_details[sub_node].node.input[:index] + [ - new_const_node_name - ] + self.node_name_details[sub_node].node.input[index + 1:] - self.node_name_details[sub_node].node.ClearField('input') + if each_node_name + ":0" == old_constant_node_name or each_node_name == old_constant_node_name: + new_input_name = ( + self.node_name_details[sub_node].node.input[:index] + + [new_const_node_name] + + self.node_name_details[sub_node].node.input[index + 1 :] + ) + self.node_name_details[sub_node].node.ClearField("input") self.node_name_details[sub_node].node.input.extend(new_input_name) if old_constant_node_name in self.node_name_details: self.node_name_details[old_constant_node_name].outputs.remove(sub_node) @@ -469,15 +501,16 @@ def replace_constant_graph_with_constant_node(self, new_node, old_end_node_name) self.node_name_details.pop(input_name) output_node_name = self.node_name_details[old_end_node_name].outputs self.replace_node(new_node, old_end_node_name, output_node_name) - self.node_name_details[new_node_name].node.ClearField('input') + self.node_name_details[new_node_name].node.ClearField("input") except Exception as e: logger.info("Fail to replace {} due to {}.".format(old_end_node_name, str(e))) return False else: return True - def replace_single_node(self, new_node, old_output_node_names, old_output_name, - old_input_node_names, old_input_name): + def replace_single_node( + self, new_node, old_output_node_names, old_output_name, old_input_node_names, old_input_name + ): """Insert one node into the graph. Args: @@ -495,19 +528,17 @@ def replace_single_node(self, new_node, old_output_node_names, old_output_name, self.node_name_details[i].outputs.remove(old_output_name) self.node_name_details[i].outputs.append(new_node_name) - self.node_name_details[new_node_name] = self.node_details(node=new_node, - outputs=old_input_node_names) + self.node_name_details[new_node_name] = self.node_details(node=new_node, outputs=old_input_node_names) for each_input_node_name in old_input_node_names: - for index, each_node_name in enumerate( - self.node_name_details[each_input_node_name].node.input): - if self.node_name_details[each_input_node_name].node.input and ( - each_node_name) == old_input_name: - new_input_name = self.node_name_details[ - each_input_node_name].node.input[:index] + [ - new_node_name - ] + self.node_name_details[each_input_node_name].node.input[index + 1:] - self.node_name_details[each_input_node_name].node.ClearField('input') + for index, each_node_name in enumerate(self.node_name_details[each_input_node_name].node.input): + if self.node_name_details[each_input_node_name].node.input and (each_node_name) == old_input_name: + new_input_name = ( + self.node_name_details[each_input_node_name].node.input[:index] + + [new_node_name] + + self.node_name_details[each_input_node_name].node.input[index + 1 :] + ) + self.node_name_details[each_input_node_name].node.ClearField("input") self.node_name_details[each_input_node_name].node.input.extend(new_input_name) def replace_node(self, new_node, old_node_name, output_nodes_name): @@ -519,8 +550,7 @@ def replace_node(self, new_node, old_node_name, output_nodes_name): output_nodes_name (string list): output node names list """ new_node_name = new_node.name - self.node_name_details[new_node_name] = self.node_details(node=new_node, - outputs=output_nodes_name) + self.node_name_details[new_node_name] = self.node_details(node=new_node, outputs=output_nodes_name) old_node = self.node_name_details[old_node_name].node for input_node_name in old_node.input: if input_node_name in self.node_name_details: @@ -529,13 +559,16 @@ def replace_node(self, new_node, old_node_name, output_nodes_name): for node_name in output_nodes_name: for index, each_node_name in enumerate(self.node_name_details[node_name].node.input): - if self.node_name_details[ - node_name].node.input and GraphRewriterHelper.node_name_from_input( - each_node_name) == old_node_name: - new_input_name = self.node_name_details[node_name].node.input[:index] + [ - new_node_name - ] + self.node_name_details[node_name].node.input[index + 1:] - self.node_name_details[node_name].node.ClearField('input') + if ( + self.node_name_details[node_name].node.input + and GraphRewriterHelper.node_name_from_input(each_node_name) == old_node_name + ): + new_input_name = ( + self.node_name_details[node_name].node.input[:index] + + [new_node_name] + + self.node_name_details[node_name].node.input[index + 1 :] + ) + self.node_name_details[node_name].node.ClearField("input") self.node_name_details[node_name].node.input.extend(new_input_name) self.remove_node(old_node_name) @@ -550,36 +583,40 @@ def add_node(self, new_node, start_node_name, end_node_names): new_node_name = new_node.name if new_node_name in self.node_name_details: - logger.debug("Remove the existed node {} from internal data structure.".format( - (new_node_name))) + logger.debug("Remove the existed node {} from internal data structure.".format((new_node_name))) self.node_name_details.pop(new_node_name) - self.node_name_details[new_node_name] = self.node_details(node=new_node, - outputs=end_node_names) + self.node_name_details[new_node_name] = self.node_details(node=new_node, outputs=end_node_names) for end_node_name in end_node_names: # Update start node's output info if end_node_name not in self.node_name_details: continue - if start_node_name and end_node_name in self.node_name_details[GraphRewriterHelper. \ - node_name_from_input(start_node_name)].outputs: - self.node_name_details[GraphRewriterHelper.node_name_from_input( - start_node_name)].outputs.remove(end_node_name) + if ( + start_node_name + and end_node_name + in self.node_name_details[GraphRewriterHelper.node_name_from_input(start_node_name)].outputs + ): + self.node_name_details[GraphRewriterHelper.node_name_from_input(start_node_name)].outputs.remove( + end_node_name + ) # reset output node's input - for index, each_node_name in enumerate( - self.node_name_details[end_node_name].node.input): + for index, each_node_name in enumerate(self.node_name_details[end_node_name].node.input): if each_node_name == start_node_name: - new_input_name = self.node_name_details[end_node_name].node.input[:index] + [ - new_node_name - ] + self.node_name_details[end_node_name].node.input[index + 1:] - self.node_name_details[end_node_name].node.ClearField('input') + new_input_name = ( + self.node_name_details[end_node_name].node.input[:index] + + [new_node_name] + + self.node_name_details[end_node_name].node.input[index + 1 :] + ) + self.node_name_details[end_node_name].node.ClearField("input") self.node_name_details[end_node_name].node.input.extend(new_input_name) # add the inserted node into the start node's output. if start_node_name: - self.node_name_details[GraphRewriterHelper.node_name_from_input( - start_node_name)].outputs.append(new_node_name) + self.node_name_details[GraphRewriterHelper.node_name_from_input(start_node_name)].outputs.append( + new_node_name + ) def dump_graph(self): """Dump the current model's graphdef. @@ -600,6 +637,7 @@ def get_frame_info(self): [parent_frame_details]: OrderedDict frame info of the graph nodes. """ from collections import OrderedDict + self.parent_frame_details = OrderedDict() input_node_names, _ = self.get_graph_input_output() @@ -619,16 +657,20 @@ def get_frame_info(self): inputs = node_details.node.input if not inputs: self.parent_frame_details[node_details.node.name] = None - if self.node_name_details[output].node.op == 'Enter': + if self.node_name_details[output].node.op == "Enter": self.parent_frame_details[output] = self.node_name_details[output].node - elif self.node_name_details[output].node.op == 'Exit': + elif self.node_name_details[output].node.op == "Exit": self.parent_frame_details[output] = None else: if output in self.parent_frame_details and self.parent_frame_details[output]: - if node_details.node.name in self.parent_frame_details and \ - self.parent_frame_details[node_details.node.name]: - assert self.parent_frame_details[output].attr['frame_name'] == \ - self.parent_frame_details[node_details.node.name].attr['frame_name'] + if ( + node_details.node.name in self.parent_frame_details + and self.parent_frame_details[node_details.node.name] + ): + assert ( + self.parent_frame_details[output].attr["frame_name"] + == self.parent_frame_details[node_details.node.name].attr["frame_name"] + ) else: if node_details.node.name in self.parent_frame_details: self.parent_frame_details[output] = self.parent_frame_details[node_details.node.name] @@ -661,14 +703,14 @@ def parse_graph(self, input_graph_def=None): for node_name, node_details in self.node_name_details.items(): # update the upper node's output information. for each_input in node_details.node.input: - self.node_name_details[GraphRewriterHelper.node_name_from_input( - each_input)].outputs.append(node_name) + self.node_name_details[GraphRewriterHelper.node_name_from_input(each_input)].outputs.append(node_name) return self.node_name_details -class GraphRewriterHelper(): +class GraphRewriterHelper: """Encapsulates the graph operation into one class.""" + node_name_cache = {} node_name_port_cache = {} @@ -727,7 +769,7 @@ def create_node(op, name, inputs): return new_node @staticmethod - def create_constant_node(name, value, dtype, shape=None, device='cpu'): + def create_constant_node(name, value, dtype, shape=None, device="cpu"): """Create constant node. Args: @@ -741,8 +783,7 @@ def create_constant_node(name, value, dtype, shape=None, device='cpu'): Returns: [type]: [description] """ - node = GraphRewriterHelper.create_node("Const" if device == 'cpu' else "HostConst", name, - []) + node = GraphRewriterHelper.create_node("Const" if device == "cpu" else "HostConst", name, []) GraphRewriterHelper.set_attr_dtype(node, "dtype", dtype) GraphRewriterHelper.set_attr_tensor(node, "value", value, dtype, shape) return node @@ -764,8 +805,8 @@ def set_attr_tensor(node, key, value, dtype, shape=None): shape (int list, optional): the input tensor's shape. Defaults to None. """ node.attr[key].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape)) + ) @staticmethod def set_attr_type_list(node, key, value): @@ -827,7 +868,6 @@ def node_name_from_input(node_name): return GraphRewriterHelper.node_name_cache[node_name] - @staticmethod def values_from_const(node_def): """Extracts the values from a const NodeDef as a numpy ndarray. @@ -841,17 +881,23 @@ def values_from_const(node_def): Raises: ValueError: If the node isn't a Const. """ - assert node_def.op == 'Const', "Node named '%s' should be a Const op." % node_def.name + assert node_def.op == "Const", "Node named '%s' should be a Const op." % node_def.name input_tensor = node_def.attr["value"].tensor tensor_value = tensor_util.MakeNdarray(input_tensor) return tensor_value @staticmethod - def generate_int32_bias_for_conv(bias_tensor, channel_size, - max_input, min_input, - max_filter_tensor, min_filter_tensor, - activation_range, weights_range=127.0): + def generate_int32_bias_for_conv( + bias_tensor, + channel_size, + max_input, + min_input, + max_filter_tensor, + min_filter_tensor, + activation_range, + weights_range=127.0, + ): """Static method that generate int32 bias for conv op. Args: @@ -871,14 +917,18 @@ def generate_int32_bias_for_conv(bias_tensor, channel_size, scales = [] if len(max_filter_tensor) > 1: for i in range(channel_size): - scales.append(activation_range * weights_range / - (max(abs(max_input), abs(min_input)) * - max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])))) + scales.append( + activation_range + * weights_range + / (max(abs(max_input), abs(min_input)) * max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i]))) + ) else: for i in range(channel_size): - scales.append(activation_range * weights_range / - (max(abs(max_input), abs(min_input)) * - max(abs(max_filter_tensor[0]), abs(min_filter_tensor[0])))) + scales.append( + activation_range + * weights_range + / (max(abs(max_input), abs(min_input)) * max(abs(max_filter_tensor[0]), abs(min_filter_tensor[0]))) + ) int32_bias = [] if channel_size > 1: for i in range(bias_length): @@ -890,10 +940,15 @@ def generate_int32_bias_for_conv(bias_tensor, channel_size, return int32_bias @staticmethod - def generate_int32_bias_for_matmul(bias_tensor, weights_tensor, - input_range, max_input, min_input, - max_filter_value, min_filter_value, - ): + def generate_int32_bias_for_matmul( + bias_tensor, + weights_tensor, + input_range, + max_input, + min_input, + max_filter_value, + min_filter_value, + ): """Static method that generate int32 bias for matmul op. Args: @@ -908,25 +963,25 @@ def generate_int32_bias_for_matmul(bias_tensor, weights_tensor, Returns: int32_bias: int32 bias """ - bias_scale = 255.0 * 127.0 / ( - input_range * max(abs(max_filter_value), abs(min_filter_value))) + bias_scale = 255.0 * 127.0 / (input_range * max(abs(max_filter_value), abs(min_filter_value))) relative_scale = 255 * min_input / (max_input - min_input) int32_bias = [] - for bias_index, value in enumerate( - np.sum(np.array(weights_tensor, dtype=np.int32), - axis=0, - dtype=np.int32)): + for bias_index, value in enumerate(np.sum(np.array(weights_tensor, dtype=np.int32), axis=0, dtype=np.int32)): if bias_index >= bias_tensor.size: continue - int32_bias.append(int(np.around(bias_tensor[bias_index] * - bias_scale + value * relative_scale))) + int32_bias.append(int(np.around(bias_tensor[bias_index] * bias_scale + value * relative_scale))) return int32_bias @staticmethod - def generate_int32_bias_for_matmul_per_channel(bias_tensor, weights_tensor, max_input, min_input, - max_filter_tensor, min_filter_tensor, - ): # pragma: no cover + def generate_int32_bias_for_matmul_per_channel( + bias_tensor, + weights_tensor, + max_input, + min_input, + max_filter_tensor, + min_filter_tensor, + ): # pragma: no cover """Static method that generate per-channel int32 bias for matmul op. Args: @@ -946,12 +1001,14 @@ def generate_int32_bias_for_matmul_per_channel(bias_tensor, weights_tensor, max_ scales = [] relative_scale = 255 * min_input / (max_input - min_input) for i in range(channel_size): - scales.append(activation_range * weights_range / - ((max_input - min_input) * - max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])))) + scales.append( + activation_range + * weights_range + / ((max_input - min_input) * max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i]))) + ) int32_bias = [] for i in range(channel_size): - value = np.sum(np.array(weights_tensor),axis=0,dtype=np.int32)[i] + value = np.sum(np.array(weights_tensor), axis=0, dtype=np.int32)[i] int32_bias.append((int)(np.around(value * relative_scale + bias_tensor[i] * scales[i]))) return int32_bias @@ -966,6 +1023,7 @@ def gen_valid_sampling_log(log_path): Returns: the sampling min max value. """ + def gen_per_iter(data): res = [] requant_tmp = [] @@ -978,18 +1036,18 @@ def gen_per_iter(data): odd_list = sorted_requant[::2] even_list = sorted_requant[1::2] for index, value in enumerate(even_list): - min_value = min(0, float(value.split(':')[1][1:-1])) - max_value = float(odd_list[index].split(':')[1][1:-1]) + min_value = min(0, float(value.split(":")[1][1:-1])) + max_value = float(odd_list[index].split(":")[1][1:-1]) max_value = max_value if max_value > min_value else min_value + 1e-05 - mixed_str = value.split(':')[0] + '_max:[' + \ - str(min_value) + '][' + str(max_value) + ']' + mixed_str = value.split(":")[0] + "_max:[" + str(min_value) + "][" + str(max_value) + "]" res.append(mixed_str) return res + with open(log_path) as f: - valid_data = [i.strip() for i in f.readlines() if i.startswith(';')] + valid_data = [i.strip() for i in f.readlines() if i.startswith(";")] - first_line = valid_data[0].rsplit(':')[0] + first_line = valid_data[0].rsplit(":")[0] iterations = 0 for i in valid_data: @@ -1004,9 +1062,9 @@ def gen_per_iter(data): final_res = [] for i in range(iterations): - final_res.extend(gen_per_iter(valid_data[int(i*step): int(step*( i+ 1))])) - if i + 1 == iterations and int(step*( i+ 1)) < len(valid_data): - final_res.extend(gen_per_iter(valid_data[int(step*( i+ 1)): len(valid_data)])) + final_res.extend(gen_per_iter(valid_data[int(i * step) : int(step * (i + 1))])) + if i + 1 == iterations and int(step * (i + 1)) < len(valid_data): + final_res.extend(gen_per_iter(valid_data[int(step * (i + 1)) : len(valid_data)])) return final_res @@ -1016,18 +1074,17 @@ def analysis_rnn_model(graph_def, bf16_ops=[], fp32_ops=[]): g = GraphAnalyzer() g.graph = graph_def graph_info = g.parse_graph() - rnn_pattern = [['TensorArrayV3'], ['Enter'], ['TensorArrayReadV3'], \ - ['MatMul'], ['BiasAdd']] + rnn_pattern = [["TensorArrayV3"], ["Enter"], ["TensorArrayReadV3"], ["MatMul"], ["BiasAdd"]] target_nodes = g.query_fusion_pattern_nodes(rnn_pattern) res = {} for i in target_nodes: if i[-3] not in bf16_ops and i[-3] not in fp32_ops: - res[(i[-3], i[-2])] = graph_info[i[1]].node.attr['frame_name'].s.decode() + res[(i[-3], i[-2])] = graph_info[i[1]].node.attr["frame_name"].s.decode() - dynamic_rnn_pattern = [['Enter'], ['MatMul'], ['BiasAdd']] + dynamic_rnn_pattern = [["Enter"], ["MatMul"], ["BiasAdd"]] target_nodes = g.query_fusion_pattern_nodes(dynamic_rnn_pattern) for i in target_nodes: if i[-3] not in bf16_ops and i[-3] not in fp32_ops: - res[(i[1], i[2])] = graph_info[i[0]].node.attr['frame_name'].s.decode() + res[(i[1], i[2])] = graph_info[i[0]].node.attr["frame_name"].s.decode() return res diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py index 2c5e979c211..97f3f6ce3d4 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py @@ -17,9 +17,11 @@ """QAT Fake Quantize Graph Class.""" import abc + import six import tensorflow as tf + @six.add_metaclass(abc.ABCMeta) class FakeQuantizeBase(object): """ABC interface class for applying fake quantization by insert qdq.""" @@ -46,7 +48,7 @@ def __call__(self, inputs, range, training, **kwargs): @abc.abstractmethod def get_config(self): """Returns the config used to serialize the 'FakeQuantize'.""" - raise NotImplementedError('FakeQuantize should implement get_config().') + raise NotImplementedError("FakeQuantize should implement get_config().") @classmethod def from_config(cls, config): @@ -60,17 +62,11 @@ def from_config(cls, config): """ return cls(**config) + class FakeQuantize(FakeQuantizeBase): """The class that applies fake quantization.""" - def __init__( - self, - per_channel=False, - num_bits=8, - channel_axis=-1, - symmetric=True, - narrow_range=True - ): + def __init__(self, per_channel=False, num_bits=8, channel_axis=-1, symmetric=True, narrow_range=True): """Initialize a FakeQuantize class. Args: @@ -89,7 +85,7 @@ def __init__( self.symmetric = symmetric self.narrow_range = narrow_range self.channel_axis = channel_axis - self.name_prefix = 'FakeQuantize' + self.name_prefix = "FakeQuantize" def __call__(self, inputs, ranges, training, **kwargs): """Applying fake quantization by insert qdq. @@ -122,9 +118,7 @@ def __call__(self, inputs, ranges, training, **kwargs): if self.per_channel: if input_dim >= 2: - batch_min = tf.math.reduce_min( - inputs, axis=reduce_dims, name="BatchMin" - ) + batch_min = tf.math.reduce_min(inputs, axis=reduce_dims, name="BatchMin") else: batch_min = inputs else: @@ -132,9 +126,7 @@ def __call__(self, inputs, ranges, training, **kwargs): if self.per_channel: if input_dim >= 2: - batch_max = tf.math.reduce_max( - inputs, axis=reduce_dims, name="BatchMax" - ) + batch_max = tf.math.reduce_max(inputs, axis=reduce_dims, name="BatchMax") else: batch_max = inputs else: @@ -168,12 +160,11 @@ def _insert_qdq(self, inputs, min_var, max_var): inputs (tf.Tensor): A tensor containing values to be quantized. min_var (tf.Variable): A variable containing quantization range lower end(s). max_var (tf.Variable): A variable containing quantization range upper end(s). - + Returns: outputs (tf.Tensor): A tensor containing quantized values. """ if self.per_channel: - return tf.quantization.quantize_and_dequantize_v2( inputs, min_var, @@ -203,10 +194,10 @@ def get_config(self): config (dict): A dict containing required information. """ return { - 'num_bits': self.num_bits, - 'per_channel': self.per_channel, - 'symmetric': self.symmetric, - 'narrow_range': self.narrow_range + "num_bits": self.num_bits, + "per_channel": self.per_channel, + "symmetric": self.symmetric, + "narrow_range": self.narrow_range, } def __eq__(self, other): @@ -221,10 +212,12 @@ def __eq__(self, other): if not isinstance(other, FakeQuantize): return False - return (self.num_bits == other.num_bits and - self.per_channel == other.per_channel and - self.symmetric == other.symmetric and - self.narrow_range == other.narrow_range) + return ( + self.num_bits == other.num_bits + and self.per_channel == other.per_channel + and self.symmetric == other.symmetric + and self.narrow_range == other.narrow_range + ) def __ne__(self, other): """Check if this instance is not equal to another instance. diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py index 4af09a9c8a0..1f50f20879e 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py @@ -21,11 +21,13 @@ global_config = {} logger = logging.getLogger("neural_compressor") -class QuantizeConfig(): + +class QuantizeConfig: """Class for building custom quantize config. There should be only one QuantizeConfig instance for global setting. """ + def __new__(cls): """Created a QuantizeConfig instance and add it to the global_config dict. @@ -33,7 +35,7 @@ def __new__(cls): instance (QuantizeConfig) : The created QuantizeConfig instance. """ instance = super().__new__(cls) - global_config['quantize_config'] = instance + global_config["quantize_config"] = instance return instance def __init__(self): @@ -41,13 +43,13 @@ def __init__(self): self.quantize_recipe = {} self.model_name = None - def add_quantize_recipe(self, quantize_recipe): # pragma: no cover + def add_quantize_recipe(self, quantize_recipe): # pragma: no cover """Add custom recipe for quantization to the QuantizeConfig instance. Args: quantize_recipe (dict): A dict that decide whether given layers should be quantized. A typical quantize_recipe will be a dict of layer_name and - dict as key-value pairs. In each value dict, there should be + dict as key-value pairs. In each value dict, there should be a {'quantize': bool} key-value pair and a {'index': list} pair. The latter one is used to decide which inputs should be quantized in some layers with multiple inputs. @@ -56,7 +58,7 @@ def add_quantize_recipe(self, quantize_recipe): # pragma: no cover 'conv5_block3_3_add' : {'quantize': True, 'index': [1, 3]} } """ - self.quantize_recipe.update(quantize_recipe) + self.quantize_recipe.update(quantize_recipe) def query_layer(self, layer_name): """Query if a specific layer is in the quantize_recipe dict. @@ -71,7 +73,7 @@ def query_layer(self, layer_name): return self.quantize_recipe[layer_name] return {} - def remove_layer(self, layer_name): # pragma: no cover + def remove_layer(self, layer_name): # pragma: no cover """Remove a specific layer from the quantize_recipe dict. Args: @@ -80,7 +82,7 @@ def remove_layer(self, layer_name): # pragma: no cover if layer_name in self.quantize_recipe: del self.quantize_recipe[layer_name] - def remove_layers(self, layer_names): # pragma: no cover + def remove_layers(self, layer_names): # pragma: no cover """Remove a batch of layers from the quantize_recipe dict. Args: @@ -89,7 +91,7 @@ def remove_layers(self, layer_names): # pragma: no cover for layer_name in layer_names: self.remove_layer(layer_name) - def get_quantize_recipe(self): # pragma: no cover + def get_quantize_recipe(self): # pragma: no cover """Get the current recipe dict for quantization. Returns: @@ -97,7 +99,7 @@ def get_quantize_recipe(self): # pragma: no cover """ return self.quantize_recipe - def is_empty(self): # pragma: no cover + def is_empty(self): # pragma: no cover """Check if the recipe of quantization is an empty dict. Returns: @@ -107,15 +109,21 @@ def is_empty(self): # pragma: no cover return False return True - def clear_quantize_recipe(self): # pragma: no cover + def clear_quantize_recipe(self): # pragma: no cover """Clear recipe of quantization to be an empty dict.""" self.quantize_recipe.clear() + layer_wise_config = { - 'quantize_layers': {'Conv2D', 'Dense', 'DepthwiseConv2D', 'MaxPooling2D', - 'AveragePooling2D', 'GlobalAveragePooling2D'}, - 'possible_quantize_layers': {'Multiply', 'Concatenate', 'Add', 'BatchNormalization'}, - 'weighted_layers': {'Conv2D', 'Dense', 'DepthwiseConv2D'}, - 'multiple_inputs_layers': {'Multiply', 'Concatenate', 'Add'} + "quantize_layers": { + "Conv2D", + "Dense", + "DepthwiseConv2D", + "MaxPooling2D", + "AveragePooling2D", + "GlobalAveragePooling2D", + }, + "possible_quantize_layers": {"Multiply", "Concatenate", "Add", "BatchNormalization"}, + "weighted_layers": {"Conv2D", "Dense", "DepthwiseConv2D"}, + "multiple_inputs_layers": {"Multiply", "Concatenate", "Add"}, } - diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py index 84026f0c474..d28d9474f2b 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py @@ -16,9 +16,10 @@ # limitations under the License. """QAT Quantize Helper Class.""" -from .quantize_wrapper import QuantizeWrapper +from .quantize_config import QuantizeConfig, global_config, layer_wise_config from .quantize_layers.optimize_layer import config_quantizable_layers -from .quantize_config import layer_wise_config, global_config, QuantizeConfig +from .quantize_wrapper import QuantizeWrapper + def init_quantize_config(model, quantize_recipe=None): """Initialize quantization config at the beginning of QAT process. @@ -28,11 +29,12 @@ def init_quantize_config(model, quantize_recipe=None): quantize_recipe (dict): A dict that decide whether given layers should be quantized. Returns: - config (QuantizeConfig): QuantizeConfig instance used to decide whether a specific layer + config (QuantizeConfig): QuantizeConfig instance used to decide whether a specific layer should be quantized. """ - assert 'quantize_config' not in global_config, ("quantize_config has been unexpectedly " - "created. Please check your QAT workflow") + assert "quantize_config" not in global_config, ( + "quantize_config has been unexpectedly " "created. Please check your QAT workflow" + ) config = QuantizeConfig() config_quantizable_layers(model) @@ -42,6 +44,7 @@ def init_quantize_config(model, quantize_recipe=None): return config + def _is_quantizable_layer(layer): """Query if the input layer should be quantized. @@ -54,22 +57,25 @@ def _is_quantizable_layer(layer): quantizable = True layer_class = layer.__class__.__name__ - quantize_config = global_config['quantize_config'] + quantize_config = global_config["quantize_config"] specific_layer_config = quantize_config.query_layer(layer.name) if specific_layer_config: # the layer is set to be unquantizable by QuantizeConfig - if not specific_layer_config['quantize']: + if not specific_layer_config["quantize"]: return False else: - if layer_class in layer_wise_config['quantize_layers'] or \ - layer_class in layer_wise_config['possible_quantize_layers']: - return True + if ( + layer_class in layer_wise_config["quantize_layers"] + or layer_class in layer_wise_config["possible_quantize_layers"] + ): + return True - if layer_class not in layer_wise_config['quantize_layers']: + if layer_class not in layer_wise_config["quantize_layers"]: quantizable = False return quantizable + def qat_clone_function(layer): """Wrap or leave given layer based on quantize config object parameters. @@ -79,7 +85,7 @@ def qat_clone_function(layer): Returns: wrapped_layer (QuantizeWrapper): layer wrapped by QuantizeWrapper class. """ - wrapped_layer= layer + wrapped_layer = layer if _is_quantizable_layer(layer): wrapped_layer = QuantizeWrapper(layer) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py index 2b6cc64af46..620942261e1 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py @@ -19,16 +19,14 @@ from .quantize_layer_add import QuantizeLayerAdd from .quantize_layer_bn import QuantizeLayerBatchNormalization + def config_quantizable_layers(model): """Configure the quantizable layers.""" - quantize_layer_mapping = { - 'Add': QuantizeLayerAdd, - 'BatchNormalization': QuantizeLayerBatchNormalization - } + quantize_layer_mapping = {"Add": QuantizeLayerAdd, "BatchNormalization": QuantizeLayerBatchNormalization} for layer_class, quantize_layer in quantize_layer_mapping.items(): quantize_layer_mapping[layer_class] = quantize_layer() for layer in model.layers: if layer.__class__.__name__ in quantize_layer_mapping: - quantize_layer_mapping[layer.__class__.__name__](layer) \ No newline at end of file + quantize_layer_mapping[layer.__class__.__name__](layer) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py index 6a3ad79b945..fcd3f8fad49 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py @@ -17,19 +17,21 @@ """Quantization Add Layer Class.""" import logging + from .quantize_layer_base import QuantizeLayerBase logger = logging.getLogger("neural_compressor") -class QuantizeLayerAdd(QuantizeLayerBase): # pragma: no cover + +class QuantizeLayerAdd(QuantizeLayerBase): # pragma: no cover """The class for quantization of Add.""" def __init__(self): """Initialize QuantizeLayerAdd class.""" self.quantize_patterns = [ - ['Conv', 'BatchNorm', 'Add'], - ['Conv', 'BatchNorm', 'Activation', 'Add'], - ['Conv', 'BatchNorm', 'Activation', 'Dropout', 'Add'] + ["Conv", "BatchNorm", "Add"], + ["Conv", "BatchNorm", "Activation", "Add"], + ["Conv", "BatchNorm", "Activation", "Dropout", "Add"], ] super().__init__() @@ -45,8 +47,10 @@ def _quantizable_add(self): """ input_layer = self._find_input_layers(self.layer) if len(input_layer) == 1: - logger.warning("The layer 'Add' should have more than one input. " - "You input a model with layer {} which has only one input".format(self.layer.name)) + logger.warning( + "The layer 'Add' should have more than one input. " + "You input a model with layer {} which has only one input".format(self.layer.name) + ) return False return True @@ -59,7 +63,7 @@ def __call__(self, layer): as quantizable by QuantizeConfig. Args: - layer (tf.keras.layers.Layer): The keras layer to be estimated. + layer (tf.keras.layers.Layer): The keras layer to be estimated. """ self.layer = layer if self._quantizable_add(): @@ -67,9 +71,8 @@ def __call__(self, layer): fused_conv_index = None for i, input_layer in enumerate(input_layers): # Check that the input is a Conv pattern - if 'Conv' in input_layer.__class__.__name__ or self._find_patterns(input_layer): - if hasattr(input_layer, 'outbound_nodes') and \ - len(getattr(input_layer, 'outbound_nodes')) == 1: + if "Conv" in input_layer.__class__.__name__ or self._find_patterns(input_layer): + if hasattr(input_layer, "outbound_nodes") and len(getattr(input_layer, "outbound_nodes")) == 1: fused_conv_index = i break @@ -77,5 +80,4 @@ def __call__(self, layer): if fused_conv_index: del input_indexes[fused_conv_index] - self.quantize_config.add_quantize_recipe({self.layer.name: {'quantize': True, - 'index': input_indexes}}) + self.quantize_config.add_quantize_recipe({self.layer.name: {"quantize": True, "index": input_indexes}}) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py index 5634c163434..6f4c82872c3 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py @@ -18,20 +18,21 @@ from ..quantize_config import global_config -class QuantizeLayerBase(): # pragma: no cover + +class QuantizeLayerBase: # pragma: no cover """QuantizeLayer Base Class.""" + def __init__(self): """Initialize QuantizeLayerBase class.""" self.quantize_patterns = [] - assert 'quantize_config' in global_config, \ - "QuantizeConfig is not correctly created." - self.quantize_config = global_config['quantize_config'] + assert "quantize_config" in global_config, "QuantizeConfig is not correctly created." + self.quantize_config = global_config["quantize_config"] def _find_input_layers(self, layer): """Find all inputs of a specific layer. Args: - layer (tf.keras.layers.Layer): The target keras layer that this method + layer (tf.keras.layers.Layer): The target keras layer that this method is to find its input layers. Returns: @@ -51,7 +52,7 @@ def _find_patterns(self, layer): """Checks if the input layer can satisfy the patterns. Args: - layer (tf.keras.layers.Layer): The input keras layer that this method + layer (tf.keras.layers.Layer): The input keras layer that this method is to find patterns. Returns: @@ -63,7 +64,7 @@ def _find_patterns(self, layer): for quantize_pattern in self.quantize_patterns: index = len(quantize_pattern) - 2 previous_layer = layer - while(index >= 0): + while index >= 0: previous_layer = self._find_input_layers(previous_layer) if quantize_pattern[index] not in previous_layer.__class__.__name__: break @@ -81,6 +82,6 @@ def __call__(self, layer): as quantizable by QuantizeConfig. Args: - layer (tf.keras.layers.Layer): The keras layer to be estimated. + layer (tf.keras.layers.Layer): The keras layer to be estimated. """ raise NotImplementedError() diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py index 5cc0ea003d9..03ecf536395 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py @@ -18,7 +18,8 @@ from .quantize_layer_base import QuantizeLayerBase -class QuantizeLayerBatchNormalization(QuantizeLayerBase): # pragma: no cover + +class QuantizeLayerBatchNormalization(QuantizeLayerBase): # pragma: no cover """The class for quantization of BatchNormalization.""" def __init__(self): @@ -37,7 +38,7 @@ def _quantizable_bn(self): input_layer = self._find_input_layers(self.layer) assert len(input_layer) == 1, "BatchNormalization only has one input." input_layer_class = input_layer.__class__.__name__ - if 'Conv' not in input_layer_class: + if "Conv" not in input_layer_class: return True return False @@ -50,8 +51,8 @@ def __call__(self, layer): as quantizable by QuantizeConfig. Args: - layer (tf.keras.layers.Layer): The keras layer to be estimated. + layer (tf.keras.layers.Layer): The keras layer to be estimated. """ self.layer = layer if self._quantizable_bn(): - self.quantize_config.add_quantize_recipe({self.layer.name: {'quantize': True}}) + self.quantize_config.add_quantize_recipe({self.layer.name: {"quantize": True}}) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py index 9b94374893a..2baf26c0c24 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py @@ -16,11 +16,14 @@ # limitations under the License. """QAT Quantize Wrapper Class.""" -import tensorflow as tf from abc import abstractmethod -from .fake_quantize import FakeQuantize + +import tensorflow as tf from tensorflow.python.util import tf_inspect -from .quantize_config import layer_wise_config, global_config + +from .fake_quantize import FakeQuantize +from .quantize_config import global_config, layer_wise_config + class QuantizeWrapperBase(tf.keras.layers.Wrapper): """Base class for quantize wrapper.""" @@ -36,9 +39,10 @@ def __init__(self, layer, **kwargs): """ assert layer is not None, "'layer' should not be None." - assert isinstance(layer, tf.keras.layers.Layer) or isinstance(layer, - tf.keras.Model),("'layer' can only be a 'tf.keras.layers.Layer' instance." - " You passed an instance of type: {input}.".format(input=layer.__class__.__name__)) + assert isinstance(layer, tf.keras.layers.Layer) or isinstance(layer, tf.keras.Model), ( + "'layer' can only be a 'tf.keras.layers.Layer' instance." + " You passed an instance of type: {input}.".format(input=layer.__class__.__name__) + ) if "name" not in kwargs: kwargs["name"] = self._make_layer_name(layer) @@ -82,25 +86,25 @@ def _init_min_max_variables(self, name, shape): """ min_variable = self.layer.add_weight( name + "_min", - shape = (shape), - trainable = False, - initializer = tf.keras.initializers.Constant(-6.0), + shape=(shape), + trainable=False, + initializer=tf.keras.initializers.Constant(-6.0), ) max_variable = self.layer.add_weight( name + "_max", - shape = (shape), - trainable = False, - initializer = tf.keras.initializers.Constant(6.0), + shape=(shape), + trainable=False, + initializer=tf.keras.initializers.Constant(6.0), ) return min_variable, max_variable def query_input_index(self): """Query QuantizeConfig to check if there is any designated input index for this layer.""" - quantize_config = global_config['quantize_config'] + quantize_config = global_config["quantize_config"] custom_layer_config = quantize_config.query_layer(self.layer) - if custom_layer_config and 'index' in custom_layer_config: - self.index = custom_layer_config['index'] + if custom_layer_config and "index" in custom_layer_config: + self.index = custom_layer_config["index"] @abstractmethod def call(self, inputs, training=None): @@ -108,7 +112,7 @@ def call(self, inputs, training=None): Args: inputs (tf.Tensor or dict/list/tuple): Inputs of the wrapped layer. - + Returns: outputs (tf.Tensor or dict/list/tuple): Outputs of the wrapped layer. """ @@ -122,7 +126,7 @@ def trainable(self): @trainable.setter def trainable(self, value): """Set trainable attribute for the layer and its sublayers. - + Args: value (Boolean): The desired state for the layer's trainable attribute. """ @@ -170,6 +174,7 @@ def losses(self): """ return self.layer.losses + self._losses + class QuantizeWrapper(QuantizeWrapperBase): """General QuantizeWrapper for quantizable layers. @@ -187,13 +192,13 @@ def __init__(self, layer, **kwargs): """ super().__init__(layer, **kwargs) - self.kernel = 'kernel' + self.kernel = "kernel" self.kernel_weights = None self.channel_axis = kwargs.get("axis", -1) - if self._layer_class == 'DepthwiseConv2D': - self.kernel = 'depthwise_kernel' + if self._layer_class == "DepthwiseConv2D": + self.kernel = "depthwise_kernel" self.channel_axis = 2 - if self._layer_class in layer_wise_config['multiple_inputs_layers']: + if self._layer_class in layer_wise_config["multiple_inputs_layers"]: self.query_input_index() def build(self, input_shape): @@ -204,13 +209,12 @@ def build(self, input_shape): """ super().build(input_shape) - if self._layer_class in layer_wise_config['weighted_layers']: + if self._layer_class in layer_wise_config["weighted_layers"]: self.kernel_weights = getattr(self.layer, self.kernel) weight_min, weight_max = self._init_min_max_variables( - name = self.kernel_weights.name.split(":")[0], - shape = self.kernel_weights.shape[self.channel_axis] - ) + name=self.kernel_weights.name.split(":")[0], shape=self.kernel_weights.shape[self.channel_axis] + ) self.weight_range = {"min_var": weight_min, "max_var": weight_max} self._trainable_weights.append(self.kernel_weights) @@ -224,9 +228,8 @@ def build(self, input_shape): if num_input == 1: inputs_min, inputs_max = self._init_min_max_variables( - name = self.layer.name + "_input{}".format(0), - shape = None - ) + name=self.layer.name + "_input{}".format(0), shape=None + ) self.inputs_range = {"min_var": inputs_min, "max_var": inputs_max} else: self.inputs_range = [] @@ -234,9 +237,8 @@ def build(self, input_shape): self.inputs_range.append({}) if i in self.index: inputs_min, inputs_max = self._init_min_max_variables( - name = self.layer.name + "_input{}".format(i), - shape = None - ) + name=self.layer.name + "_input{}".format(i), shape=None + ) self.inputs_range[i] = {"min_var": inputs_min, "max_var": inputs_max} def call(self, inputs, training=None): @@ -252,18 +254,18 @@ def call(self, inputs, training=None): training = tf.keras.backend.learning_phase() # Quantize all weights, and replace them in the underlying layer. - if self._layer_class in layer_wise_config['weighted_layers']: + if self._layer_class in layer_wise_config["weighted_layers"]: weight_quantizer = FakeQuantize( - per_channel = True, - channel_axis = self.channel_axis, + per_channel=True, + channel_axis=self.channel_axis, ) quantized_weight = weight_quantizer(self.kernel_weights, self.weight_range, training) setattr(self.layer, self.kernel, quantized_weight) quantized_inputs = inputs inputs_quantizer = FakeQuantize( - per_channel = False, - channel_axis = self.channel_axis, + per_channel=False, + channel_axis=self.channel_axis, ) if not isinstance(quantized_inputs, tf.Tensor): @@ -272,11 +274,11 @@ def call(self, inputs, training=None): quantized_inputs[i] = inputs_quantizer(inputs[i], self.inputs_range[i], training) else: quantized_inputs = inputs_quantizer(inputs, self.inputs_range, training) - + args = tf_inspect.getfullargspec(self.layer.call).args if "training" in args: outputs = self.layer.call(quantized_inputs, training=training) else: outputs = self.layer.call(quantized_inputs) - return outputs \ No newline at end of file + return outputs diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py index d71fee23fbc..313c84217fd 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py @@ -16,27 +16,26 @@ # limitations under the License. """Quantize FusedBatchNormV3 to int8 op.""" -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import graph_pb2, node_def_pb2 from tensorflow.python.framework import dtypes from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper + from ..quantize_graph_base import QuantizeNodeBase + class FuseNodeStartWithFusedBatchNormV3(QuantizeNodeBase): """Quantize FusedBatchNormV3 to int8 op _QuantizedFusedBatchNorm.""" def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) if self.new_api: self.fusion_mapping = { - 'FusedBatchNormV3': self.apply_newly_bn_relu_fusion, - 'FusedBatchNormV3Relu': self.apply_newly_bn_relu_fusion, - 'FusedBatchNormV3LeakyRelu': self.apply_newly_bn_leakyrelu_fusion + "FusedBatchNormV3": self.apply_newly_bn_relu_fusion, + "FusedBatchNormV3Relu": self.apply_newly_bn_relu_fusion, + "FusedBatchNormV3LeakyRelu": self.apply_newly_bn_leakyrelu_fusion, } else: self.fusion_mapping = {} @@ -46,22 +45,21 @@ def apply_newly_bn_relu_fusion(self, match_node_name): """Apply the BN + Relu fusion.""" matched_node = self.node_name_mapping[match_node_name[0]] skip_node_name = match_node_name[1:] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) scale_name = normal_inputs[1] offset_name = normal_inputs[2] mean_name = normal_inputs[3] - variance_name = normal_inputs[4] + variance_name = normal_inputs[4] all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = [ - all_input_names[0], + all_input_names[0], scale_name, offset_name, mean_name, variance_name, all_input_names[1], - all_input_names[2] + all_input_names[2], ] for _, node in enumerate(self.input_graph.node): @@ -70,19 +68,20 @@ def apply_newly_bn_relu_fusion(self, match_node_name): elif node.name == match_node_name[0]: self.logger.debug("Matched node {} with input {}.".format(node.name, node.input)) - relu_node_name = match_node_name[1] if len(match_node_name)==2 else None + relu_node_name = match_node_name[1] if len(match_node_name) == 2 else None - node_op = '_QuantizedFusedBatchNorm' + node_op = "_QuantizedFusedBatchNorm" quantized_node_name = node.name + "_eightbit_quantized_bn" output_min_node_name = quantized_node_name + "_input7_output_min" output_max_node_name = quantized_node_name + "_input8_output_max" - quantized_node_input_names = all_input_names + \ - [output_min_node_name] + [output_max_node_name] + control_inputs - output_min_node = helper.create_constant_node(output_min_node_name, -1., dtypes.float32) - output_max_node = helper.create_constant_node(output_max_node_name, 1., dtypes.float32) + quantized_node_input_names = ( + all_input_names + [output_min_node_name] + [output_max_node_name] + control_inputs + ) + output_min_node = helper.create_constant_node(output_min_node_name, -1.0, dtypes.float32) + output_max_node = helper.create_constant_node(output_max_node_name, 1.0, dtypes.float32) quantized_bn_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) if relu_node_name is not None: - helper.set_attr_string(quantized_bn_node, "activation_mode", b'Relu') + helper.set_attr_string(quantized_bn_node, "activation_mode", b"Relu") if self.node_name_mapping[offset_name].node.op == "Const": helper.set_attr_bool(quantized_bn_node, "is_offset_const", True) else: @@ -94,9 +93,9 @@ def apply_newly_bn_relu_fusion(self, match_node_name): helper.set_attr_dtype(quantized_bn_node, "T", dtypes.qint8) helper.set_attr_dtype(quantized_bn_node, "U", dtypes.float32) helper.set_attr_dtype(quantized_bn_node, "Tout", dtypes.qint8) + """# 0. - """ - # 0. x + x # 1. scale # 2. offset # 3. mean @@ -106,39 +105,46 @@ def apply_newly_bn_relu_fusion(self, match_node_name): # 7. {output_min} # 8. {output_max} """ - helper.set_attr_type_list(quantized_bn_node, 'input_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - + helper.set_attr_type_list( + quantized_bn_node, + "input_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + """# 0. - """ - # 0. output + output # 1. output_min # 2. output_max """ - helper.set_attr_type_list(quantized_bn_node, 'out_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) + helper.set_attr_type_list( + quantized_bn_node, + "out_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(output_min_node) self.add_output_graph_node(output_max_node) self.add_output_graph_node(quantized_bn_node) self._intel_cpu_add_dequantize_result_node( - quantized_output_name = quantized_node_name, - original_node_name = match_node_name[-1], - dtype = dtypes.qint8, - min_tensor_index = 1, - performance_only=self.performance_only - ) + quantized_output_name=quantized_node_name, + original_node_name=match_node_name[-1], + dtype=dtypes.qint8, + min_tensor_index=1, + performance_only=self.performance_only, + ) else: new_node = node_def_pb2.NodeDef() @@ -149,8 +155,7 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): """Apply BN + LeakyRelu fusion.""" matched_node = self.node_name_mapping[match_node_name[0]] skip_node_name = match_node_name[1:] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) scale_name = normal_inputs[1] offset_name = normal_inputs[2] mean_name = normal_inputs[3] @@ -164,7 +169,7 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): mean_name, variance_name, all_input_names[1], - all_input_names[2] + all_input_names[2], ] for _, node in enumerate(self.input_graph.node): @@ -173,19 +178,21 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): elif node.name == match_node_name[0]: self.logger.debug("Matched node {} with input {}.".format(node.name, node.input)) leakyrelu_node_name = match_node_name[1] - node_op = '_QuantizedFusedBatchNorm' + node_op = "_QuantizedFusedBatchNorm" quantized_node_name = node.name + "_eightbit_quantized_bn" output_min_node_name = quantized_node_name + "_input7_output_min" output_max_node_name = quantized_node_name + "_input8_output_max" - quantized_node_input_names = all_input_names + \ - [output_min_node_name] + [output_max_node_name] + control_inputs - output_min_node = helper.create_constant_node(output_min_node_name, -1., dtypes.float32) - output_max_node = helper.create_constant_node(output_max_node_name, 1., dtypes.float32) + quantized_node_input_names = ( + all_input_names + [output_min_node_name] + [output_max_node_name] + control_inputs + ) + output_min_node = helper.create_constant_node(output_min_node_name, -1.0, dtypes.float32) + output_max_node = helper.create_constant_node(output_max_node_name, 1.0, dtypes.float32) quantized_bn_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) - helper.set_attr_string(quantized_bn_node, "activation_mode", b'LeakyRelu') - helper.copy_attr(quantized_bn_node, "alpha", \ - self.node_name_mapping[leakyrelu_node_name].node.attr["alpha"]) + helper.set_attr_string(quantized_bn_node, "activation_mode", b"LeakyRelu") + helper.copy_attr( + quantized_bn_node, "alpha", self.node_name_mapping[leakyrelu_node_name].node.attr["alpha"] + ) if self.node_name_mapping[offset_name].node.op == "Const": helper.set_attr_bool(quantized_bn_node, "is_offset_const", True) else: @@ -197,9 +204,9 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): helper.set_attr_dtype(quantized_bn_node, "T", dtypes.qint8) helper.set_attr_dtype(quantized_bn_node, "U", dtypes.float32) helper.set_attr_dtype(quantized_bn_node, "Tout", dtypes.qint8) + """# 0. - """ - # 0. x + x # 1. scale # 2. offset # 3. mean @@ -209,39 +216,46 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): # 7. {output_min} # 8. {output_max} """ - helper.set_attr_type_list(quantized_bn_node, 'input_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - + helper.set_attr_type_list( + quantized_bn_node, + "input_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + """# 0. - """ - # 0. output + output # 1. output_min # 2. output_max """ - helper.set_attr_type_list(quantized_bn_node, 'out_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) + helper.set_attr_type_list( + quantized_bn_node, + "out_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(output_min_node) self.add_output_graph_node(output_max_node) self.add_output_graph_node(quantized_bn_node) self._intel_cpu_add_dequantize_result_node( - quantized_output_name = quantized_node_name, - original_node_name = match_node_name[-1], - dtype = dtypes.qint8, - min_tensor_index = 1, - performance_only=self.performance_only - ) + quantized_output_name=quantized_node_name, + original_node_name=match_node_name[-1], + dtype=dtypes.qint8, + min_tensor_index=1, + performance_only=self.performance_only, + ) else: new_node = node_def_pb2.NodeDef() @@ -251,7 +265,7 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): def get_longest_fuse(self): """Get the longest fusion pattern.""" self._get_op_list() - real_patterns = [pattern[1 :-1] for pattern in self.sorted_patterns] + real_patterns = [pattern[1:-1] for pattern in self.sorted_patterns] # Cannot match if: self._is_match([['Q','BN','Relu','DQ']],['Q','BN','DQ']]) matched_rule, matched_node_name = self._is_match(real_patterns) return matched_rule, matched_node_name @@ -259,21 +273,21 @@ def get_longest_fuse(self): def apply_the_transform(self): """Apply the BN int8 fusion.""" self._get_op_list() - real_patterns = [pattern[1 :-1] for pattern in self.sorted_patterns] + real_patterns = [pattern[1:-1] for pattern in self.sorted_patterns] # Cannot match if: self._is_match([['Q','BN','Relu','DQ']],['Q','BN','DQ']]) matched_rule, matched_node_name = self._is_match(real_patterns) if matched_node_name: self.output_graph = graph_pb2.GraphDef() - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) bn_node = self.node_name_mapping[matched_node_name[0]].node - is_training = bn_node.attr['is_training'].b - if fusion_name in self.fusion_mapping and is_training == False: + is_training = bn_node.attr["is_training"].b + if fusion_name in self.fusion_mapping and is_training is False: self.fusion_mapping[fusion_name](matched_node_name) else: - if is_training == True: - self.logger.info \ - ("Skip quantizing the BN node '{}' due to the attr 'is_training == true'." \ - .format(bn_node.name)) + if is_training is True: + self.logger.info( + "Skip quantizing the BN node '{}' due to the attr 'is_training == true'.".format(bn_node.name) + ) self.exclude_bn_nodes.append(bn_node.name) elif self.new_api: self.logger.info("Unknown fusion pattern {} .".format(fusion_name)) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py index 7290c9cca61..466de8bbacb 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py @@ -16,13 +16,16 @@ # limitations under the License. """Quantize ConcatV2 to int8 op.""" -import re import os -from tensorflow.python.framework import dtypes +import re + from tensorflow.core.framework import node_def_pb2 -from ..quantize_graph_base import QuantizeNodeBase +from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper +from ..quantize_graph_base import QuantizeNodeBase + class FuseNodeStartWithConcatV2(QuantizeNodeBase): """Quantize ConcatV2 to int8 op QuantizedConcatV2.""" @@ -30,9 +33,7 @@ class FuseNodeStartWithConcatV2(QuantizeNodeBase): def __init__(self, **kwargs): """Initilizaiton.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) self.dtype = dtypes.quint8 self.exclude_concat_nodes = [] @@ -41,7 +42,7 @@ def _get_node_from_name(self, name): if name.startswith("^"): name = name[1:] if re.search(r"\w+:\d+", name): - node = self.node_name_mapping[name.rsplit(':', 1)[0]].node + node = self.node_name_mapping[name.rsplit(":", 1)[0]].node else: node = self.node_name_mapping[name].node return node @@ -52,50 +53,53 @@ def _get_first_input_from_name(self, name): return name node = self._get_node_from_name(name) if len(node.input) == 0: - return '' + return "" return node.input[0] def _quantizable_concat(self, node): """Check if the ConcatV2 is quantizable.""" deq_type = [] is_quantizable = True - if self.performance_only or os.getenv('TF_FORCE_CONCAT_OPTS') == '1': + if self.performance_only or os.getenv("TF_FORCE_CONCAT_OPTS") == "1": _, normal_inputs = self._get_node_input(node.name) - original_inputs = normal_inputs[:node.attr['N'].i] + original_inputs = normal_inputs[: node.attr["N"].i] # the input chain of concatv2 is QuantizedOp -> (req) -> q -> dq -> concat for each_input in original_inputs: dq_input = self._get_first_input_from_name(each_input) q_input = self._get_first_input_from_name(dq_input) pre_input = self._get_first_input_from_name(q_input) - if pre_input == '': + if pre_input == "": continue req_input = self._get_node_from_name(pre_input) - + # the concatv2 with these Ops as inputs can't be reranged - if req_input.op in ['_QuantizedFusedBatchNorm', '_QuantizedFusedInstanceNorm']: + if req_input.op in ["_QuantizedFusedBatchNorm", "_QuantizedFusedInstanceNorm"]: is_quantizable = False break - if req_input.op == 'Requantize' or req_input.op == 'RequantizePerChannel' \ - or req_input.op.startswith('Quantized'): + if ( + req_input.op == "Requantize" + or req_input.op == "RequantizePerChannel" + or req_input.op.startswith("Quantized") + ): is_quantizable = True - if str(self.node_name_mapping[pre_input].node.attr['out_type']) != '': - deq_type.append(self.node_name_mapping[pre_input].node.attr['out_type'].type) + if str(self.node_name_mapping[pre_input].node.attr["out_type"]) != "": + deq_type.append(self.node_name_mapping[pre_input].node.attr["out_type"].type) else: - deq_type.append(self.node_name_mapping[pre_input].node.attr['T'].type) + deq_type.append(self.node_name_mapping[pre_input].node.attr["T"].type) else: - for input_node_name in node.input[:node.attr['N'].i]: + for input_node_name in node.input[: node.attr["N"].i]: node_name = helper.node_name_from_input(input_node_name) if self.node_name_mapping[node_name].node.op != "Dequantize": self.exclude_concat_nodes.append(node.name) return False - deq_type.append(self.node_name_mapping[node_name].node.attr['T'].type) + deq_type.append(self.node_name_mapping[node_name].node.attr["T"].type) if len(set(deq_type)) != 1: is_quantizable = False else: - if self.performance_only or os.getenv('TF_FORCE_CONCAT_OPTS') == '1': + if self.performance_only or os.getenv("TF_FORCE_CONCAT_OPTS") == "1": self.dtype = dtypes.DType(deq_type[0]) if not is_quantizable: @@ -111,7 +115,7 @@ def _apply_concatv2_quantization(self, match_node_name): _, normal_inputs = self._get_node_input(matched_node.node.name) num_input = len(normal_inputs) shape_input_name = normal_inputs[num_input - 1] - original_inputs = normal_inputs[0:num_input - 1] + original_inputs = normal_inputs[0 : num_input - 1] input_names = [] min_names = [] @@ -160,27 +164,26 @@ def get_longest_fuse(self, do_transform=False): """Get longest fusion pattern.""" self._get_op_list() matched_node_name = [] - + for k, v in enumerate(self.op_list): if v in set(fusion[1] for fusion in self.sorted_patterns): - cur_node = self.node_name_mapping[list( - self.node_name_mapping.keys())[k]].node + cur_node = self.node_name_mapping[list(self.node_name_mapping.keys())[k]].node if cur_node.name != self.start_node_name: continue - + # possible attributes that decide output data type - output_attr_list = ['out_type', 'T', 'dtype', 'Taxis', 'Tindices', 'Tparams'] + output_attr_list = ["out_type", "T", "dtype", "Taxis", "Tindices", "Tparams"] if not do_transform: _, normal_inputs = self._get_node_input(cur_node.name) - original_inputs = normal_inputs[:cur_node.attr['N'].i] + original_inputs = normal_inputs[: cur_node.attr["N"].i] unsupported_input_type = False for each_input in original_inputs: each_input_name = helper.node_name_from_input(each_input) input_dtype = None for output_attr in output_attr_list: input_dtype_attr = self.node_name_mapping[each_input_name].node.attr[output_attr] - if str(input_dtype_attr) != '': + if str(input_dtype_attr) != "": input_dtype = dtypes.DType(input_dtype_attr.type) break if input_dtype != dtypes.bfloat16 and input_dtype != dtypes.float32: @@ -194,41 +197,41 @@ def get_longest_fuse(self, do_transform=False): continue if v != sub_rule[1]: continue - - if (self.performance_only or os.getenv('TF_FORCE_CONCAT_OPTS') == '1') \ - and not do_transform: + + if (self.performance_only or os.getenv("TF_FORCE_CONCAT_OPTS") == "1") and not do_transform: matched_node_name.clear() matched_node_name.append(cur_node.name) return sub_rule, matched_node_name - + if self._quantizable_concat(cur_node): - if dtypes.as_dtype(cur_node.attr["T"].type) == dtypes.float32 and \ - not re.search(r'map(_\d+)?/while', cur_node.name): + if dtypes.as_dtype(cur_node.attr["T"].type) == dtypes.float32 and not re.search( + r"map(_\d+)?/while", cur_node.name + ): matched_node_name.clear() matched_node_name.append(sub_rule[0]) matched_node_name.append(cur_node.name) matched_node_name.append(sub_rule[-1]) return sub_rule, matched_node_name else: - if self.performance_only or os.getenv('TF_FORCE_CONCAT_OPTS') == '1': + if self.performance_only or os.getenv("TF_FORCE_CONCAT_OPTS") == "1": new_inputs = [] control_inputs, normal_inputs = self._get_node_input(cur_node.name) - original_inputs = normal_inputs[:cur_node.attr['N'].i] + original_inputs = normal_inputs[: cur_node.attr["N"].i] for each_input in original_inputs: each_node = self._get_node_from_name(each_input) - if each_node.op == 'Dequantize': + if each_node.op == "Dequantize": q_input = self._get_first_input_from_name(each_input) - if q_input == '': + if q_input == "": continue - if self._get_node_from_name(q_input).op == 'QuantizeV2': + if self._get_node_from_name(q_input).op == "QuantizeV2": pre_input = self._get_first_input_from_name(q_input) new_inputs.append(pre_input) - elif self._get_node_from_name(q_input).op == 'Requantize': + elif self._get_node_from_name(q_input).op == "Requantize": new_inputs.append(each_input) else: new_inputs.append(each_input) new_inputs.append(normal_inputs[-1]) - cur_node.ClearField('input') + cur_node.ClearField("input") cur_node.input.extend(new_inputs + control_inputs) return None, None @@ -238,10 +241,10 @@ def apply_the_transform(self): self._get_op_list() matched_rule, matched_node_name = self.get_longest_fuse(do_transform=True) if matched_node_name: - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) if fusion_name == "DequantizeConcatV2QuantizeV2": self._apply_concatv2_quantization(matched_node_name) - else: # pragma: no cover + else: # pragma: no cover self.logger.info("Unknown fusion pattern {}.".format(fusion_name)) if self.remove_redundant_quant_flag: self.input_graph = self.remove_redundant_quantization(self.input_graph) @@ -255,4 +258,4 @@ def apply_the_transform(self): if self.remove_redundant_quant_flag: self.input_graph = self.remove_redundant_quantization(self.input_graph) - return self.input_graph, self.exclude_concat_nodes \ No newline at end of file + return self.input_graph, self.exclude_concat_nodes diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py index ab59a94ace6..baff8c3010b 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py @@ -16,26 +16,25 @@ # limitations under the License. """Quantize Conv2D/Conv3D/DepthwiseConv2dNative.""" +import numpy as np import tensorflow as tf -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util +from tensorflow.core.framework import graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper + from ..quantize_graph_base import QuantizeNodeBase -import numpy as np + class FuseNodeStartWithConv2d(QuantizeNodeBase): """Quantize Conv2D/Conv3D/DepthwiseConv2dNative to int8 op.""" + exclude_conv_nodes = [] def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) if self.new_api: # fmt: off self.fusion_mapping = { @@ -79,13 +78,13 @@ def __init__(self, **kwargs): 'DequantizeConv2DAddswish_f32QuantizeV2': self.apply_newly_conv_biasadd_swishf32_fusion, 'DequantizeConv2DAddV2swish_f32QuantizeV2': self.apply_newly_conv_biasadd_swishf32_fusion, 'DequantizeConv2Dswish_f32QuantizeV2': self.apply_newly_conv_biasadd_swishf32_fusion, - 'DequantizeDepthwiseConv2dNativeBiasAddAddRelu6MulMulQuantizeV2': + 'DequantizeDepthwiseConv2dNativeBiasAddAddRelu6MulMulQuantizeV2': self.apply_conv_biasadd_hardswish_fusion, 'DequantizeDepthwiseConv2dNativeAddRelu6MulMulQuantizeV2': self.apply_conv_biasadd_hardswish_fusion, - 'DequantizeDepthwiseConv2dNativeBiasAddswish_f32QuantizeV2': + 'DequantizeDepthwiseConv2dNativeBiasAddswish_f32QuantizeV2': self.apply_newly_conv_biasadd_swishf32_fusion, 'DequantizeDepthwiseConv2dNativeAddswish_f32QuantizeV2': self.apply_newly_conv_biasadd_swishf32_fusion, - 'DequantizeDepthwiseConv2dNativeAddV2swish_f32QuantizeV2': + 'DequantizeDepthwiseConv2dNativeAddV2swish_f32QuantizeV2': self.apply_newly_conv_biasadd_swishf32_fusion, 'DequantizeDepthwiseConv2dNativeswish_f32QuantizeV2': self.apply_newly_conv_biasadd_swishf32_fusion, 'DequantizeDepthwiseConv2dNativeAddRelu6QuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, @@ -93,7 +92,7 @@ def __init__(self, **kwargs): 'DequantizeDepthwiseConv2dNativeReluQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeDepthwiseConv2dNativeRelu6QuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeDepthwiseConv2dNativeBiasAddQuantizeV2': self.apply_newly_conv_biasadd_fusion, - 'DequantizeDepthwiseConv2dNativeBiasAddLeakyReluQuantizeV2': + 'DequantizeDepthwiseConv2dNativeBiasAddLeakyReluQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeDepthwiseConv2dNativeLeakyReluQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeDepthwiseConv2dNativeBiasAddRelu6QuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, @@ -144,42 +143,48 @@ def _insert_dummy_biasadd(self, match_node_name, matched_node): op_b_node_name = weights_name[0] op_b_node = self.node_name_mapping[op_b_node_name].node - if op_a_node.op == 'Const' and op_b_node.op != 'Const': + if op_a_node.op == "Const" and op_b_node.op != "Const": pass else: from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer + g = GraphAnalyzer() g.graph = self.input_graph graph_info = g.parse_graph() next_node_names = graph_info[matched_node.node.name].outputs - bias_node_name = target_node_name + '_dummy_biasadd' - bias_const_node_name = target_node_name + '_fake_const' + bias_node_name = target_node_name + "_dummy_biasadd" + bias_const_node_name = target_node_name + "_fake_const" - if matched_node.node.op in ('Conv2D' or 'DepthwiseConv2dNative') and \ - matched_node.node.attr['data_format'].s == b'NHWC': + if ( + matched_node.node.op in ("Conv2D" or "DepthwiseConv2dNative") + and matched_node.node.attr["data_format"].s == b"NHWC" + ): t_b_index = 3 - elif matched_node.node.op in ('Conv2D' or 'DepthwiseConv2dNative') and \ - matched_node.node.op.attr['data_format'].s == b'NCHW': + elif ( + matched_node.node.op in ("Conv2D" or "DepthwiseConv2dNative") + and matched_node.node.op.attr["data_format"].s == b"NCHW" + ): t_b_index = 1 - elif matched_node.node.op == 'Conv3D' and matched_node.node.attr['data_format'].s == b'NDHWC': + elif matched_node.node.op == "Conv3D" and matched_node.node.attr["data_format"].s == b"NDHWC": t_b_index = 4 - elif matched_node.node.op == 'Conv3D' and matched_node.node.attr['data_format'].s == b'NCDHW': + elif matched_node.node.op == "Conv3D" and matched_node.node.attr["data_format"].s == b"NCDHW": t_b_index = 1 - bias_add_length = op_b_node.attr['value'].tensor.tensor_shape.dim[t_b_index].size + bias_add_length = op_b_node.attr["value"].tensor.tensor_shape.dim[t_b_index].size - bias_add_content = [0.] * bias_add_length + bias_add_content = [0.0] * bias_add_length bias_const_node = helper.create_constant_node( - bias_const_node_name, bias_add_content, dtypes.float32, shape=[bias_add_length]) - bias_node = helper.create_node('BiasAdd', bias_node_name, [target_node_name, bias_const_node_name]) + bias_const_node_name, bias_add_content, dtypes.float32, shape=[bias_add_length] + ) + bias_node = helper.create_node("BiasAdd", bias_node_name, [target_node_name, bias_const_node_name]) helper.set_attr_dtype(bias_node, "T", dtypes.float32) g.add_node(bias_node, target_node_name, next_node_names) g.add_node(bias_const_node, None, [bias_node_name]) self.input_graph = g.dump_graph() self._parse_graph(self.input_graph) - new_match_node_name=match_node_name[:2]+[bias_node_name]+match_node_name[2:] - new_match_node_name=match_node_name[:2]+[bias_node_name]+match_node_name[2:] + new_match_node_name = match_node_name[:2] + [bias_node_name] + match_node_name[2:] + new_match_node_name = match_node_name[:2] + [bias_node_name] + match_node_name[2:] return new_match_node_name @@ -198,15 +203,14 @@ def apply_conv3d_add_addn_relu_fusion(self, match_node_name): add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - #after insert dummy biasadd, that is Conv3D+dummybiasadd+add*+add*+relu* - return self.apply_conv3d_add_addn_fusion(new_match_node_name[:4]+[new_match_node_name[-1]]) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # after insert dummy biasadd, that is Conv3D+dummybiasadd+add*+add*+relu* + return self.apply_conv3d_add_addn_fusion(new_match_node_name[:4] + [new_match_node_name[-1]]) - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) _, q_weights_inputs = self._get_node_input(normal_inputs[1]) quantizev2_weights_name = q_weights_inputs[0] @@ -221,15 +225,15 @@ def apply_conv3d_add_addn_relu_fusion(self, match_node_name): sumadd_b_node_name = helper.node_name_from_input(third_node.input[1]) sumadd_b_node = self.node_name_mapping[sumadd_b_node_name].node - if sumadd_a_node.op != 'Const' and sumadd_b_node.op == 'Const': - return self.apply_conv3d_add_fusion(match_node_name[:3]+[match_node_name[-1]]) + if sumadd_a_node.op != "Const" and sumadd_b_node.op == "Const": + return self.apply_conv3d_add_fusion(match_node_name[:3] + [match_node_name[-1]]) forth_node = self.node_name_mapping[match_node_name[4]].node - if third_node.op != 'LeakyRelu' and not self._find_relu_node(matched_node.node): - return self.apply_conv3d_add_fusion(match_node_name[:3]+[match_node_name[-1]]) + if third_node.op != "LeakyRelu" and not self._find_relu_node(matched_node.node): + return self.apply_conv3d_add_fusion(match_node_name[:3] + [match_node_name[-1]]) - is_leakyrelu_add_fusion = third_node.op == 'LeakyRelu' and forth_node.op.find('Add') != -1 - is_relu_add_fusion = third_node.op == 'Relu' and forth_node.op.find('Add') != -1 + is_leakyrelu_add_fusion = third_node.op == "LeakyRelu" and forth_node.op.find("Add") != -1 + is_relu_add_fusion = third_node.op == "Relu" and forth_node.op.find("Add") != -1 relu_offset = 0 if is_leakyrelu_add_fusion or is_relu_add_fusion: @@ -238,30 +242,34 @@ def apply_conv3d_add_addn_relu_fusion(self, match_node_name): relu_node_name = match_node_name[3] else: relu_node_name = match_node_name[4] - sum_index = 1 if match_node_name[2 + relu_offset] == self.node_name_mapping[ - match_node_name[3 + relu_offset]].node.input[0] else 0 + sum_index = ( + 1 + if match_node_name[2 + relu_offset] + == self.node_name_mapping[match_node_name[3 + relu_offset]].node.input[0] + else 0 + ) sum_node_name = self.node_name_mapping[match_node_name[3 + relu_offset]].node.input[sum_index] deq_node = self.node_name_mapping[sum_node_name].node - if deq_node.op != 'Dequantize' or deq_node.op.find("Quantize") != -1: - return self.apply_conv3d_add_fusion(match_node_name[:3]+[match_node_name[-1]]) + if deq_node.op != "Dequantize" or deq_node.op.find("Quantize") != -1: + return self.apply_conv3d_add_fusion(match_node_name[:3] + [match_node_name[-1]]) add_node = self.node_name_mapping[match_node_name[2]].node original_add_input = self.node_name_mapping[add_node.input[1]].node - if original_add_input.op == 'Const': + if original_add_input.op == "Const": shape = tensor_util.MakeNdarray(original_add_input.attr["value"].tensor) - if shape.ndim > 1 and shape.shape[:-1] == (1,1,1,1): + if shape.ndim > 1 and shape.shape[:-1] == (1, 1, 1, 1): squeezed_value = np.squeeze(shape) - squeezed_node = helper.create_constant_node(match_node_name[1] +'_squeezed', \ - squeezed_value, dtypes.float32) + squeezed_node = helper.create_constant_node( + match_node_name[1] + "_squeezed", squeezed_value, dtypes.float32 + ) skip_node_name.append(add_node.input[1]) add_node.input[1] = squeezed_node.name self.add_output_graph_node(squeezed_node) - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit(matched_node.node.op, - self.node_name_mapping[weights_name[0]].node, - self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -282,82 +290,91 @@ def apply_conv3d_add_addn_relu_fusion(self, match_node_name): bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] is_relu6 = self.node_name_mapping[relu_node_name].node.op == "Relu6" - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + [ - sum_node_name - ] + control_inputs - - if sum_node_name.find('mul') != -1: - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + [ - self.node_name_mapping[ - match_node_name[3 + relu_offset]].node.input[sum_index] - ] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + [sum_node_name] + control_inputs + ) + + if sum_node_name.find("mul") != -1: + quantized_node_input_names = ( + all_input_names[:2] + + [bias_node_name] + + [self.node_name_mapping[match_node_name[3 + relu_offset]].node.input[sum_index]] + + all_input_names[2:] + + control_inputs + ) node_op = "_FusedQuantizedConv3D" - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) - input_data_type = dtypes.quint8 if self._find_relu_node( - node) else dtypes.qint8 + input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) if "alpha" in self.node_name_mapping[relu_node_name].node.attr: - helper.copy_attr(quantized_conv_node, "alpha", - self.node_name_mapping[relu_node_name].node.attr["alpha"]) - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Sum', b'Relu']) + helper.copy_attr( + quantized_conv_node, "alpha", self.node_name_mapping[relu_node_name].node.attr["alpha"] + ) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Sum", b"Relu"]) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - #if self.device == 'gpu' else dtypes.qint32) + # if self.device == 'gpu' else dtypes.qint32) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) if is_leakyrelu_add_fusion: - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'LeakyRelu', b'Sum']) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"LeakyRelu", b"Sum"]) elif is_relu_add_fusion: - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Relu', b'Sum']) - - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum,# both cpu and gpu use float32 in New API - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Relu", b"Sum"]) + + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # both cpu and gpu use float32 in New API + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) if is_leakyrelu_add_fusion: - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[4], dtype=dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, + match_node_name[4], + dtype=dtypes.qint8, + performance_only=self.performance_only, + ) else: dtype = dtypes.quint8 - if [i for i in self.node_name_mapping[relu_node_name].output \ - if 'FusedBatchNorm' in self.node_name_mapping[i].node.op and \ - i in self.op_wise_config_name_list]: - dtype = dtypes.qint8 - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtype, is_relu6) + if [ + i + for i in self.node_name_mapping[relu_node_name].output + if "FusedBatchNorm" in self.node_name_mapping[i].node.op and i in self.op_wise_config_name_list + ]: + dtype = dtypes.qint8 + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtype, is_relu6) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name, dtype, - performance_only=self.performance_only) + quantize_down_name, relu_node_name, dtype, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() @@ -381,12 +398,12 @@ def apply_conv3d_add_addn_fusion(self, match_node_name): add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - #after insert dummy biasadd, that is Conv+dummybiasadd+add*+add* - return self.apply_conv3d_add_addn_fusion(new_match_node_name[:4]+[new_match_node_name[-1]]) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # after insert dummy biasadd, that is Conv+dummybiasadd+add*+add* + return self.apply_conv3d_add_addn_fusion(new_match_node_name[:4] + [new_match_node_name[-1]]) control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) @@ -402,32 +419,31 @@ def apply_conv3d_add_addn_fusion(self, match_node_name): sumadd_a_node = self.node_name_mapping[sumadd_a_node_name].node sumadd_b_node_name = helper.node_name_from_input(third_node.input[1]) sumadd_b_node = self.node_name_mapping[sumadd_b_node_name].node - if sumadd_a_node.op != 'Const' and sumadd_b_node.op == 'Const': - return self.apply_conv3d_add_fusion(match_node_name[:3]+[match_node_name[-1]]) + if sumadd_a_node.op != "Const" and sumadd_b_node.op == "Const": + return self.apply_conv3d_add_fusion(match_node_name[:3] + [match_node_name[-1]]) - sum_index = 1 if match_node_name[2] == self.node_name_mapping[ - match_node_name[3]].node.input[0] else 0 + sum_index = 1 if match_node_name[2] == self.node_name_mapping[match_node_name[3]].node.input[0] else 0 sum_node_name = self.node_name_mapping[match_node_name[3]].node.input[sum_index] deq_node = self.node_name_mapping[sum_node_name].node - if deq_node.op != 'Dequantize' or deq_node.op.find("Quantize") != -1: - return self.apply_conv3d_add_fusion(match_node_name[:3]+[match_node_name[-1]]) + if deq_node.op != "Dequantize" or deq_node.op.find("Quantize") != -1: + return self.apply_conv3d_add_fusion(match_node_name[:3] + [match_node_name[-1]]) add_node = self.node_name_mapping[match_node_name[2]].node original_add_input = self.node_name_mapping[add_node.input[1]].node - if original_add_input.op == 'Const': + if original_add_input.op == "Const": shape = tensor_util.MakeNdarray(original_add_input.attr["value"].tensor) - if shape.ndim > 1 and shape.shape[:-1] == (1,1,1,1): + if shape.ndim > 1 and shape.shape[:-1] == (1, 1, 1, 1): squeezed_value = np.squeeze(shape) - squeezed_node = helper.create_constant_node(match_node_name[2] +'_squeezed', \ - squeezed_value, dtypes.float32) + squeezed_node = helper.create_constant_node( + match_node_name[2] + "_squeezed", squeezed_value, dtypes.float32 + ) skip_node_name.append(add_node.input[1]) add_node.input[1] = squeezed_node.name self.add_output_graph_node(squeezed_node) - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit(matched_node.node.op, - self.node_name_mapping[weights_name[0]].node, - self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -438,7 +454,7 @@ def apply_conv3d_add_addn_fusion(self, match_node_name): skip_node_name.append(weights_min_name) skip_node_name.append(weights_max_name) skip_node_name.append(quantizev2_weights_name) - + for _, node in enumerate(self.input_graph.node): if node.name in skip_node_name: self.logger.debug("skip node {}".format(node.name)) @@ -447,54 +463,56 @@ def apply_conv3d_add_addn_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_conv" bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] - - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + [ - sum_node_name - ] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + [sum_node_name] + control_inputs + ) node_op = "_FusedQuantizedConv3D" - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) - input_data_type = dtypes.quint8 if self._find_relu_node( - node) else dtypes.qint8 + input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Sum']) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Sum"]) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - # if self.device == 'gpu' else dtypes.qint32) + # if self.device == 'gpu' else dtypes.qint32) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[3], dtype=dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, match_node_name[3], dtype=dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -510,9 +528,9 @@ def apply_conv3d_add_relu_fusion(self, match_node_name): matched_node = self.node_name_mapping[match_node_name[1]] second_node = self.node_name_mapping[match_node_name[2]].node - if second_node.op in ('Relu', 'Relu6', 'LeakyRelu', 'Elu'): - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - return self.apply_conv3d_add_relu_fusion(new_match_node_name) + if second_node.op in ("Relu", "Relu6", "LeakyRelu", "Elu"): + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + return self.apply_conv3d_add_relu_fusion(new_match_node_name) need_insert_dummy_biasadd = 1 add_a_node_name = helper.node_name_from_input(second_node.input[0]) @@ -520,12 +538,12 @@ def apply_conv3d_add_relu_fusion(self, match_node_name): add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - #after insert dummy biasadd, that is Conv+dummybiasadd+add*+relu* - return self.apply_conv3d_add_addn_relu_fusion(new_match_node_name) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # after insert dummy biasadd, that is Conv+dummybiasadd+add*+relu* + return self.apply_conv3d_add_addn_relu_fusion(new_match_node_name) control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) @@ -542,20 +560,20 @@ def apply_conv3d_add_relu_fusion(self, match_node_name): add_node = self.node_name_mapping[match_node_name[2]].node original_add_input = self.node_name_mapping[add_node.input[1]].node - if original_add_input.op == 'Const': + if original_add_input.op == "Const": shape = tensor_util.MakeNdarray(original_add_input.attr["value"].tensor) - if shape.ndim > 1 and shape.shape[:-1] == (1,1,1,1): + if shape.ndim > 1 and shape.shape[:-1] == (1, 1, 1, 1): squeezed_value = np.squeeze(shape) - squeezed_node = helper.create_constant_node(match_node_name[1] +'_squeezed', \ - squeezed_value, dtypes.float32) + squeezed_node = helper.create_constant_node( + match_node_name[1] + "_squeezed", squeezed_value, dtypes.float32 + ) skip_node_name.append(add_node.input[1]) add_node.input[1] = squeezed_node.name self.add_output_graph_node(squeezed_node) - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit(matched_node.node.op, - self.node_name_mapping[weights_name[0]].node, - self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -580,73 +598,82 @@ def apply_conv3d_add_relu_fusion(self, match_node_name): bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] relu_node_name = match_node_name[3] is_relu6 = self.node_name_mapping[relu_node_name].node.op == "Relu6" - quantized_node_input_names = all_input_names[:2] + \ - [bias_node_name] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) is_leakyrelu = self.node_name_mapping[relu_node_name].node.op == "LeakyRelu" quantized_conv_node = helper.create_node( - "_FusedQuantizedConv3D", - quantized_node_name, - quantized_node_input_names) + "_FusedQuantizedConv3D", quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "alpha" in self.node_name_mapping[relu_node_name].node.attr: - helper.copy_attr(quantized_conv_node, "alpha", - self.node_name_mapping[relu_node_name].node.attr["alpha"]) - if node.op != 'DepthwiseConv3dNative' and "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr( + quantized_conv_node, "alpha", self.node_name_mapping[relu_node_name].node.attr["alpha"] + ) + if node.op != "DepthwiseConv3dNative" and "explicit_paddings" in node.attr: + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_conv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - # if self.device == 'gpu' else dtypes.qint32) + # if self.device == 'gpu' else dtypes.qint32) if self.node_name_mapping[relu_node_name].node.op == "LeakyRelu": - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'LeakyRelu']) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"LeakyRelu"]) elif self.node_name_mapping[relu_node_name].node.op == "Elu": - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Elu']) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Elu"]) else: - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Relu']) - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Relu"]) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) if not is_leakyrelu: dtype = dtypes.quint8 - if [i for i in self.node_name_mapping[relu_node_name].output \ - if 'FusedBatchNorm' in self.node_name_mapping[i].node.op and \ - i in self.op_wise_config_name_list]: - dtype = dtypes.qint8 - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtype, is_relu6) + if [ + i + for i in self.node_name_mapping[relu_node_name].output + if "FusedBatchNorm" in self.node_name_mapping[i].node.op and i in self.op_wise_config_name_list + ]: + dtype = dtypes.qint8 + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtype, is_relu6) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name, dtype, - performance_only=self.performance_only) + quantize_down_name, relu_node_name, dtype, performance_only=self.performance_only + ) else: - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name, dtype=dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, relu_node_name, dtype=dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() @@ -669,11 +696,11 @@ def apply_conv3d_add_fusion(self, match_node_name): add_a_node = self.node_name_mapping[add_a_node_name].node add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - return self.apply_conv3d_add_addn_fusion(new_match_node_name) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + return self.apply_conv3d_add_addn_fusion(new_match_node_name) _, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) @@ -686,21 +713,20 @@ def apply_conv3d_add_fusion(self, match_node_name): add_node = self.node_name_mapping[match_node_name[2]].node original_add_input = self.node_name_mapping[add_node.input[1]].node - if original_add_input.op == 'Const': + if original_add_input.op == "Const": shape = tensor_util.MakeNdarray(original_add_input.attr["value"].tensor) - if shape.ndim > 1 and shape.shape[:-1] == (1,1,1,1): + if shape.ndim > 1 and shape.shape[:-1] == (1, 1, 1, 1): squeezed_value = np.squeeze(shape) - squeezed_node = helper.create_constant_node(match_node_name[2] +'_squeezed', \ - squeezed_value, dtypes.float32) + squeezed_node = helper.create_constant_node( + match_node_name[2] + "_squeezed", squeezed_value, dtypes.float32 + ) skip_node_name.append(add_node.input[1]) add_node.input[1] = squeezed_node.name self.add_output_graph_node(squeezed_node) - - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit(matched_node.node.op, - self.node_name_mapping[weights_name[0]].node, - self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -726,49 +752,55 @@ def apply_conv3d_add_fusion(self, match_node_name): quantized_node_input_names = all_input_names[:2] + [bias_node_name] + all_input_names[2:] if node.op == "Conv3D": quantized_conv_node = helper.create_node( - "_FusedQuantizedConv3D", - quantized_node_name, - quantized_node_input_names) + "_FusedQuantizedConv3D", quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) - if node.op != 'DepthwiseConv3dNative' and "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings",node.attr["explicit_paddings"]) + if node.op != "DepthwiseConv3dNative" and "explicit_paddings" in node.attr: + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - # if self.device == 'gpu' else dtypes.qint32) + # if self.device == 'gpu' else dtypes.qint32) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') - helper.set_attr_dtype(quantized_conv_node, "out_type", - dtypes.qint32) - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd']) - - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd"]) + + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[2], dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, match_node_name[2], dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -790,10 +822,9 @@ def apply_conv3d_single_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit(matched_node.node.op, - self.node_name_mapping[weights_name[0]].node, - self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -815,16 +846,16 @@ def apply_conv3d_single_fusion(self, match_node_name): postfix = "_eightbit_quantized_conv3d" quantized_node_name = node.name + postfix quantized_conv_node = helper.create_node( - "_FusedQuantizedConv3D" - if self.per_channel else "_FusedQuantizedConv3D", - quantized_node_name, all_input_names) + "_FusedQuantizedConv3D" if self.per_channel else "_FusedQuantizedConv3D", + quantized_node_name, + all_input_names, + ) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) - if node.op != 'DepthwiseConv3dNative' and "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + if node.op != "DepthwiseConv3dNative" and "explicit_paddings" in node.attr: + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) @@ -832,29 +863,37 @@ def apply_conv3d_single_fusion(self, match_node_name): helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - # if self.device == 'gpu' else dtypes.qint32) + # if self.device == 'gpu' else dtypes.qint32) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', []) - - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - #dtypes.float32.as_datatype_enum if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum,]) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", []) + + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + # dtypes.float32.as_datatype_enum if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, node.name, dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, node.name, dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -891,9 +930,9 @@ def apply_newly_conv_biasadd_relu_fusion(self, match_node_name): matched_node = self.node_name_mapping[match_node_name[1]] second_node = self.node_name_mapping[match_node_name[2]].node - if second_node.op in ('Relu', 'Relu6', 'LeakyRelu', 'Elu', 'Sigmoid'): - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - return self.apply_newly_conv_biasadd_relu_fusion(new_match_node_name) + if second_node.op in ("Relu", "Relu6", "LeakyRelu", "Elu", "Sigmoid"): + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + return self.apply_newly_conv_biasadd_relu_fusion(new_match_node_name) need_insert_dummy_biasadd = 1 add_a_node_name = helper.node_name_from_input(second_node.input[0]) @@ -901,12 +940,12 @@ def apply_newly_conv_biasadd_relu_fusion(self, match_node_name): add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - #after insert dummy biasadd, that is Conv+dummybiasadd+add*+relu* - return self.apply_newly_conv_biasadd_addn_relu_fusion(new_match_node_name) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # after insert dummy biasadd, that is Conv+dummybiasadd+add*+relu* + return self.apply_newly_conv_biasadd_addn_relu_fusion(new_match_node_name) control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) @@ -917,9 +956,9 @@ def apply_newly_conv_biasadd_relu_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -943,77 +982,86 @@ def apply_newly_conv_biasadd_relu_fusion(self, match_node_name): bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] relu_node_name = match_node_name[3] is_relu6 = self.node_name_mapping[relu_node_name].node.op == "Relu6" - quantized_node_input_names = all_input_names[:2] + \ - [bias_node_name] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) is_leakyrelu = self.node_name_mapping[relu_node_name].node.op == "LeakyRelu" is_elu = self.node_name_mapping[relu_node_name].node.op == "Elu" is_sigmoid = self.node_name_mapping[relu_node_name].node.op == "Sigmoid" - node_op = '_FusedQuantizedDepthwiseConv2D' - if node.op == 'Conv2D': + node_op = "_FusedQuantizedDepthwiseConv2D" + if node.op == "Conv2D": node_op = "_FusedQuantizedConv2D" - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "alpha" in self.node_name_mapping[relu_node_name].node.attr: - helper.copy_attr(quantized_conv_node, "alpha", - self.node_name_mapping[relu_node_name].node.attr["alpha"]) + helper.copy_attr( + quantized_conv_node, "alpha", self.node_name_mapping[relu_node_name].node.attr["alpha"] + ) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_conv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - #if self.device == 'gpu' else dtypes.qint32) - fused_ops = [b'BiasAdd', b'Relu'] + # if self.device == 'gpu' else dtypes.qint32) + fused_ops = [b"BiasAdd", b"Relu"] if is_leakyrelu: - fused_ops = [b'BiasAdd', b'LeakyRelu'] + fused_ops = [b"BiasAdd", b"LeakyRelu"] if is_elu: - fused_ops = [b'BiasAdd', b'Elu'] + fused_ops = [b"BiasAdd", b"Elu"] if is_sigmoid: - fused_ops = [b'BiasAdd', b'Sigmoid'] - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', fused_ops) - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + fused_ops = [b"BiasAdd", b"Sigmoid"] + helper.set_attr_string_list(quantized_conv_node, "fused_ops", fused_ops) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) if not is_leakyrelu: dtype = dtypes.quint8 - if [i for i in self.node_name_mapping[relu_node_name].output \ - if 'FusedBatchNorm' in self.node_name_mapping[i].node.op and \ - i in self.op_wise_config_name_list]: - dtype = dtypes.qint8 - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtype, is_relu6) + if [ + i + for i in self.node_name_mapping[relu_node_name].output + if "FusedBatchNorm" in self.node_name_mapping[i].node.op and i in self.op_wise_config_name_list + ]: + dtype = dtypes.qint8 + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtype, is_relu6) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name, dtype, - performance_only=self.performance_only) + quantize_down_name, relu_node_name, dtype, performance_only=self.performance_only + ) else: - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name, dtype=dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, relu_node_name, dtype=dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -1037,12 +1085,12 @@ def apply_newly_conv_biasadd_fusion(self, match_node_name): add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - # after insert dummy biasadd, that is Conv+dummybiasadd+add - return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # after insert dummy biasadd, that is Conv+dummybiasadd+add + return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name) control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) @@ -1053,9 +1101,9 @@ def apply_newly_conv_biasadd_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -1077,52 +1125,58 @@ def apply_newly_conv_biasadd_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_conv" bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] - quantized_node_input_names = all_input_names[:2] + \ - [bias_node_name] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) - node_op = "_FusedQuantizedConv2D" if node.op == 'Conv2D' \ - else '_FusedQuantizedDepthwiseConv2D' - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + node_op = "_FusedQuantizedConv2D" if node.op == "Conv2D" else "_FusedQuantizedDepthwiseConv2D" + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_conv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - #if self.device == 'gpu' else dtypes.qint32) - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd']) - - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + # if self.device == 'gpu' else dtypes.qint32) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd"]) + + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[2], dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, match_node_name[2], dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() @@ -1146,10 +1200,9 @@ def apply_newly_conv_single_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit(matched_node.node.op, - self.node_name_mapping[weights_name[0]].node, - self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -1170,50 +1223,55 @@ def apply_newly_conv_single_fusion(self, match_node_name): if node.op == "Conv2D": quantized_node_name = node.name + "_eightbit_quantized_conv" - node_op = "_FusedQuantizedConv2D" if node.op == 'Conv2D' \ - else '_FusedQuantizedDepthwiseConv2D' - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - all_input_names) + node_op = "_FusedQuantizedConv2D" if node.op == "Conv2D" else "_FusedQuantizedDepthwiseConv2D" + quantized_conv_node = helper.create_node(node_op, quantized_node_name, all_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_conv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - #if self.device == 'gpu' else dtypes.qint32) - # - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', []) - - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - #dtypes.float32.as_datatype_enum if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + # if self.device == 'gpu' else dtypes.qint32) + # + helper.set_attr_string_list(quantized_conv_node, "fused_ops", []) + + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + # dtypes.float32.as_datatype_enum if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[1], dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, match_node_name[1], dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() @@ -1244,18 +1302,17 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 - if len(match_node_name) == 5 and 'Relu' in match_node_name[3]: + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 + if len(match_node_name) == 5 and "Relu" in match_node_name[3]: return self.apply_newly_conv_biasadd_relu_fusion(match_node_name) if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - #after insert dummy biasadd, that is Conv+dummybiasadd+add*+add*+relu* - return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name[:4]+[new_match_node_name[-1]]) - - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # after insert dummy biasadd, that is Conv+dummybiasadd+add*+add*+relu* + return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name[:4] + [new_match_node_name[-1]]) + + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) _, q_weights_inputs = self._get_node_input(normal_inputs[1]) @@ -1265,37 +1322,42 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): weights_max_name = weights_name[2] third_node = self.node_name_mapping[match_node_name[3]].node - if third_node.op in ('BiasAdd', 'Add', 'AddV2', 'AddN'): + if third_node.op in ("BiasAdd", "Add", "AddV2", "AddN"): sumadd_a_node_name = helper.node_name_from_input(third_node.input[0]) sumadd_a_node = self.node_name_mapping[sumadd_a_node_name].node sumadd_b_node_name = helper.node_name_from_input(third_node.input[1]) sumadd_b_node = self.node_name_mapping[sumadd_b_node_name].node - if sumadd_a_node.op != 'Const' and sumadd_b_node.op == 'Const': + if sumadd_a_node.op != "Const" and sumadd_b_node.op == "Const": return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) forth_node = self.node_name_mapping[match_node_name[4]].node - if forth_node.op not in ('LeakyRelu', 'Relu'): - if third_node.op not in ('LeakyRelu', 'Relu') and not self._find_relu_node(matched_node.node): + if forth_node.op not in ("LeakyRelu", "Relu"): + if third_node.op not in ("LeakyRelu", "Relu") and not self._find_relu_node(matched_node.node): return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) - is_leakyrelu_add_fusion = third_node.op == 'LeakyRelu' and forth_node.op.find('Add') != -1 - is_relu_add_fusion = third_node.op == 'Relu' and forth_node.op.find('Add') != -1 + is_leakyrelu_add_fusion = third_node.op == "LeakyRelu" and forth_node.op.find("Add") != -1 + is_relu_add_fusion = third_node.op == "Relu" and forth_node.op.find("Add") != -1 relu_offset = 0 if is_leakyrelu_add_fusion or is_relu_add_fusion: relu_offset = 1 - sum_index = 1 if match_node_name[2 + relu_offset] == self.node_name_mapping[ - match_node_name[3 + relu_offset]].node.input[0] else 0 + sum_index = ( + 1 + if match_node_name[2 + relu_offset] + == self.node_name_mapping[match_node_name[3 + relu_offset]].node.input[0] + else 0 + ) sum_node_name = self.node_name_mapping[match_node_name[3 + relu_offset]].node.input[sum_index] deq_node = self.node_name_mapping[sum_node_name].node - if (deq_node.op != 'LeakyRelu' and deq_node.op != 'Dequantize' and deq_node.op != 'BiasAdd') or \ - deq_node.op.find("Quantize") != -1: - return self.apply_newly_conv_biasadd_fusion(match_node_name[:3]+[match_node_name[-1]]) + if ( + deq_node.op != "LeakyRelu" and deq_node.op != "Dequantize" and deq_node.op != "BiasAdd" + ) or deq_node.op.find("Quantize") != -1: + return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -1325,86 +1387,94 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): is_relu6 = self.node_name_mapping[relu_node_name].node.op == "Relu6" is_leakyrelu = self.node_name_mapping[relu_node_name].node.op == "LeakyRelu" - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + [ - sum_node_name - ] + control_inputs - - if sum_node_name.find('mul') != -1: - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + [ - self.node_name_mapping[ - match_node_name[3 + relu_offset]].node.input[sum_index] - ] + all_input_names[2:] + control_inputs - - node_op = "_FusedQuantizedConv2D" if node.op == 'Conv2D' \ - else '_FusedQuantizedDepthwiseConv2D' - - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + [sum_node_name] + control_inputs + ) + + if sum_node_name.find("mul") != -1: + quantized_node_input_names = ( + all_input_names[:2] + + [bias_node_name] + + [self.node_name_mapping[match_node_name[3 + relu_offset]].node.input[sum_index]] + + all_input_names[2:] + + control_inputs + ) + + node_op = "_FusedQuantizedConv2D" if node.op == "Conv2D" else "_FusedQuantizedDepthwiseConv2D" + + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) - input_data_type = dtypes.quint8 if self._find_relu_node( - node) else dtypes.qint8 + input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) if "alpha" in self.node_name_mapping[relu_node_name].node.attr: - helper.copy_attr(quantized_conv_node, "alpha", - self.node_name_mapping[relu_node_name].node.attr["alpha"]) + helper.copy_attr( + quantized_conv_node, "alpha", self.node_name_mapping[relu_node_name].node.attr["alpha"] + ) if is_leakyrelu: - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Sum', b'LeakyRelu']) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Sum", b"LeakyRelu"]) else: - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Sum', b'Relu']) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Sum", b"Relu"]) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - # if self.device == 'gpu' else dtypes.qint32) + # if self.device == 'gpu' else dtypes.qint32) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) if is_leakyrelu_add_fusion: - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'LeakyRelu', b'Sum']) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"LeakyRelu", b"Sum"]) elif is_relu_add_fusion: - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Relu', b'Sum']) - - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Relu", b"Sum"]) + + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) if is_leakyrelu_add_fusion or is_leakyrelu or is_relu_add_fusion: - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[4], dtype=dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, + match_node_name[4], + dtype=dtypes.qint8, + performance_only=self.performance_only, + ) else: dtype = dtypes.quint8 - if [i for i in self.node_name_mapping[relu_node_name].output \ - if 'FusedBatchNorm' in self.node_name_mapping[i].node.op and \ - i in self.op_wise_config_name_list]: - dtype = dtypes.qint8 - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtype, is_relu6) + if [ + i + for i in self.node_name_mapping[relu_node_name].output + if "FusedBatchNorm" in self.node_name_mapping[i].node.op and i in self.op_wise_config_name_list + ]: + dtype = dtypes.qint8 + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtype, is_relu6) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name, dtype, - performance_only=self.performance_only) + quantize_down_name, relu_node_name, dtype, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() @@ -1424,34 +1494,33 @@ def apply_conv_biasadd_hardswish_fusion(self, match_node_name): second_node = self.node_name_mapping[match_node_name[2]].node if len(match_node_name) == 7: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - return self.apply_conv_biasadd_hardswish_fusion(new_match_node_name) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + return self.apply_conv_biasadd_hardswish_fusion(new_match_node_name) need_insert_dummy_biasadd = 1 add_a_node_name = helper.node_name_from_input(second_node.input[0]) add_a_node = self.node_name_mapping[add_a_node_name].node add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - #after insert dummy biasadd, that is Conv+dummybiasadd+add+add+relu6+mul+mul - return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name[:4] + [new_match_node_name[-1]]) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # after insert dummy biasadd, that is Conv+dummybiasadd+add+add+relu6+mul+mul + return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name[:4] + [new_match_node_name[-1]]) third_node = self.node_name_mapping[match_node_name[3]].node sumadd_a_node_name = helper.node_name_from_input(third_node.input[0]) sumadd_a_node = self.node_name_mapping[sumadd_a_node_name].node sumadd_b_node_name = helper.node_name_from_input(third_node.input[1]) sumadd_b_node = self.node_name_mapping[sumadd_b_node_name].node - if sumadd_a_node.op != 'Const' and sumadd_b_node.op == 'Const': + if sumadd_a_node.op != "Const" and sumadd_b_node.op == "Const": need_insert_dummy_biasadd = 0 else: - #third node is sumadd + # third node is sumadd return self.apply_newly_conv_biasadd_addn_fusion(match_node_name[:4] + [new_match_node_name[-1]]) - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) _, q_weights_inputs = self._get_node_input(normal_inputs[1]) quantizev2_weights_name = q_weights_inputs[0] @@ -1460,9 +1529,9 @@ def apply_conv_biasadd_hardswish_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -1486,54 +1555,61 @@ def apply_conv_biasadd_hardswish_fusion(self, match_node_name): bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] relu_node_name = match_node_name[4] is_relu6 = self.node_name_mapping[relu_node_name].node.op == "Relu6" - quantized_node_input_names = all_input_names[:2] + \ - [bias_node_name] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) - node_op = "_FusedQuantizedConv2D" if node.op == 'Conv2D' \ - else '_FusedQuantizedDepthwiseConv2D' - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + node_op = "_FusedQuantizedConv2D" if node.op == "Conv2D" else "_FusedQuantizedDepthwiseConv2D" + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "alpha" in self.node_name_mapping[relu_node_name].node.attr: - helper.copy_attr(quantized_conv_node, "alpha", - self.node_name_mapping[relu_node_name].node.attr["alpha"]) + helper.copy_attr( + quantized_conv_node, "alpha", self.node_name_mapping[relu_node_name].node.attr["alpha"] + ) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_conv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - # if self.device == 'gpu' else dtypes.qint32) - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'_FusedHardSwish']) - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + # if self.device == 'gpu' else dtypes.qint32) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"_FusedHardSwish"]) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.quint8, is_relu6) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.quint8, is_relu6) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[6], dtypes.quint8, - performance_only=self.performance_only) + quantize_down_name, match_node_name[6], dtypes.quint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() @@ -1556,7 +1632,7 @@ def apply_newly_conv_biasadd_swishf32_fusion(self, match_node_name): matched_node = self.node_name_mapping[match_node_name[1]] second_node = self.node_name_mapping[match_node_name[2]].node - if second_node.op == 'swish_f32': + if second_node.op == "swish_f32": new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) return self.apply_newly_conv_biasadd_swishf32_fusion(new_match_node_name) @@ -1566,15 +1642,14 @@ def apply_newly_conv_biasadd_swishf32_fusion(self, match_node_name): add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - #TF not support ['BiasAdd', 'Sum', '_FusedSwish'] pattern yet - return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name[:4]+[new_match_node_name[-1]]) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # TF not support ['BiasAdd', 'Sum', '_FusedSwish'] pattern yet + return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name[:4] + [new_match_node_name[-1]]) - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) _, q_weights_inputs = self._get_node_input(normal_inputs[1]) quantizev2_weights_name = q_weights_inputs[0] @@ -1583,9 +1658,9 @@ def apply_newly_conv_biasadd_swishf32_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -1608,53 +1683,62 @@ def apply_newly_conv_biasadd_swishf32_fusion(self, match_node_name): bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] swish_node_name = match_node_name[3] - quantized_node_input_names = all_input_names[:2] + \ - [bias_node_name] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) - node_op = '_FusedQuantizedDepthwiseConv2D' - if node.op == 'Conv2D': + node_op = "_FusedQuantizedDepthwiseConv2D" + if node.op == "Conv2D": node_op = "_FusedQuantizedConv2D" - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "alpha" in self.node_name_mapping[swish_node_name].node.attr: - helper.copy_attr(quantized_conv_node, "alpha", - self.node_name_mapping[swish_node_name].node.attr["alpha"]) + helper.copy_attr( + quantized_conv_node, "alpha", self.node_name_mapping[swish_node_name].node.attr["alpha"] + ) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_conv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) - helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32 - if self.device == 'gpu' else dtypes.qint32) - fused_ops = [b'BiasAdd', b'_FusedSwish'] - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', fused_ops) - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_dtype( + quantized_conv_node, "Tbias", dtypes.float32 if self.device == "gpu" else dtypes.qint32 + ) + fused_ops = [b"BiasAdd", b"_FusedSwish"] + helper.set_attr_string_list(quantized_conv_node, "fused_ops", fused_ops) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum if self.device == "gpu" else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, swish_node_name, dtype=dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, swish_node_name, dtype=dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -1677,27 +1761,26 @@ def apply_newly_conv_biasadd_addn_fusion(self, match_node_name): add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) - #after insert dummy biasadd, that is Conv+dummybiasadd+add*+add* - return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name[:4] + [new_match_node_name[-1]]) + new_match_node_name = self._insert_dummy_biasadd(match_node_name, matched_node) + # after insert dummy biasadd, that is Conv+dummybiasadd+add*+add* + return self.apply_newly_conv_biasadd_addn_fusion(new_match_node_name[:4] + [new_match_node_name[-1]]) third_node = self.node_name_mapping[match_node_name[3]].node sumadd_a_node_name = helper.node_name_from_input(third_node.input[0]) sumadd_a_node = self.node_name_mapping[sumadd_a_node_name].node sumadd_b_node_name = helper.node_name_from_input(third_node.input[1]) sumadd_b_node = self.node_name_mapping[sumadd_b_node_name].node - if sumadd_a_node.op != 'Const' and sumadd_b_node.op == 'Const': + if sumadd_a_node.op != "Const" and sumadd_b_node.op == "Const": return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [new_match_node_name[-1]]) - sum_index = 1 if match_node_name[2] == self.node_name_mapping[ - match_node_name[3]].node.input[0] else 0 + sum_index = 1 if match_node_name[2] == self.node_name_mapping[match_node_name[3]].node.input[0] else 0 sum_node_name = self.node_name_mapping[match_node_name[3]].node.input[sum_index] deq_node = self.node_name_mapping[sum_node_name].node - if deq_node.op != 'Dequantize' or deq_node.op.find("Quantize") != -1: - return self.apply_newly_conv_biasadd_fusion(match_node_name[:3]+[match_node_name[-1]]) + if deq_node.op != "Dequantize" or deq_node.op.find("Quantize") != -1: + return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_inputs = self._get_node_input(normal_inputs[0]) @@ -1708,9 +1791,9 @@ def apply_newly_conv_biasadd_addn_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) @@ -1732,54 +1815,56 @@ def apply_newly_conv_biasadd_addn_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_conv" bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + [ - sum_node_name - ] + control_inputs - node_op = "_FusedQuantizedConv2D" if node.op == 'Conv2D' \ - else '_FusedQuantizedDepthwiseConv2D' + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + [sum_node_name] + control_inputs + ) + node_op = "_FusedQuantizedConv2D" if node.op == "Conv2D" else "_FusedQuantizedDepthwiseConv2D" - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_conv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_conv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_conv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) - input_data_type = dtypes.quint8 if self._find_relu_node( - node) else dtypes.qint8 + input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) - helper.set_attr_string_list(quantized_conv_node, 'fused_ops', [b'BiasAdd', b'Sum']) + helper.set_attr_string_list(quantized_conv_node, "fused_ops", [b"BiasAdd", b"Sum"]) helper.set_attr_dtype(quantized_conv_node, "Tbias", dtypes.float32) - # if self.device == 'gpu' else dtypes.qint32) + # if self.device == 'gpu' else dtypes.qint32) helper.set_attr_dtype(quantized_conv_node, "Tsummand", dtypes.qint32) - helper.set_attr_type_list(quantized_conv_node, 'Thost_inputs', [ - input_data_type.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_conv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_inputs", + [ + input_data_type.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_conv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[3], dtype=dtypes.qint8, - performance_only=self.performance_only) + quantize_down_name, match_node_name[3], dtype=dtypes.qint8, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -1798,20 +1883,19 @@ def apply_the_transform(self): matched_rule, matched_node_name = self._is_match_conv(self.sorted_patterns, True) if matched_node_name: self.output_graph = graph_pb2.GraphDef() - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) if fusion_name in self.fusion_mapping: - if fusion_name.find('DequantizeConv2DAddReluQuantizeV2') != -1: + if fusion_name.find("DequantizeConv2DAddReluQuantizeV2") != -1: for input_name in self.node_name_mapping[matched_node_name[2]].node.input: input_node_name = helper.node_name_from_input(input_name) if input_node_name != matched_node_name[1]: add_const_input_node = self.node_name_mapping[input_node_name].node - add_node_content = tensor_util.MakeNdarray( - add_const_input_node.attr["value"].tensor) + add_node_content = tensor_util.MakeNdarray(add_const_input_node.attr["value"].tensor) if add_node_content.ndim != 1: - fusion_name = 'DequantizeConv2DQuantizeV2' - matched_node_name = matched_node_name[:2]+[matched_node_name[-1]] + fusion_name = "DequantizeConv2DQuantizeV2" + matched_node_name = matched_node_name[:2] + [matched_node_name[-1]] self.fusion_mapping[fusion_name](matched_node_name) - else: # pragma: no cover + else: # pragma: no cover self.logger.info("Unknown fusion pattern {}.".format(fusion_name)) if self.remove_redundant_quant_flag: self.input_graph = self.remove_redundant_quantization(self.input_graph) @@ -1839,19 +1923,18 @@ def _is_match_conv(self, patterns, qdq_inserted=False): for k, v in enumerate(self.op_list): if v in set(fusion[1] for fusion in patterns): - cur_node = self.node_name_mapping[list( - self.node_name_mapping.keys())[k]].node + cur_node = self.node_name_mapping[list(self.node_name_mapping.keys())[k]].node if cur_node.name != self.start_node_name: continue - if ((v in ("Conv2D", "DepthwiseConv2dNative") - and not self.enable_s8) - ) and not self._find_relu_node(cur_node): + if ((v in ("Conv2D", "DepthwiseConv2dNative") and not self.enable_s8)) and not self._find_relu_node( + cur_node + ): self.exclude_conv_nodes.append(cur_node.name) continue _, normal_inputs = self._get_node_input(cur_node.name) - if self.node_name_mapping[normal_inputs[1].rsplit(':')[0]].node.op == 'Split': + if self.node_name_mapping[normal_inputs[1].rsplit(":")[0]].node.op == "Split": self.exclude_conv_nodes.append(cur_node.name) continue @@ -1864,8 +1947,10 @@ def _is_match_conv(self, patterns, qdq_inserted=False): continue if qdq_inserted: - if self.node_name_mapping[normal_inputs[0]].node.op != "Dequantize" or \ - self.node_name_mapping[normal_inputs[1]].node.op != "Dequantize": + if ( + self.node_name_mapping[normal_inputs[0]].node.op != "Dequantize" + or self.node_name_mapping[normal_inputs[1]].node.op != "Dequantize" + ): continue sub_rule_len = len(sub_rule) - 2 @@ -1884,27 +1969,22 @@ def _is_match_conv(self, patterns, qdq_inserted=False): self.logger.debug("Fail to match {}".format(sub_rule)) break - next_node_name = self.node_name_mapping[ - cur_node_name].output[0] + next_node_name = self.node_name_mapping[cur_node_name].output[0] - is_shared_output = True if len( - self.node_name_mapping[cur_node_name].output - ) > 1 else False + is_shared_output = True if len(self.node_name_mapping[cur_node_name].output) > 1 else False add_op_quantizable = True is_hardswish = False if is_shared_output: - if next_node_name.find('hard_swish') != -1: + if next_node_name.find("hard_swish") != -1: self.logger.debug("Find Hard Swish pattern ......") is_hardswish = True count = count + 1 - if next_node_name.find('add') == -1: - next_node_name = self.node_name_mapping[ - cur_node_name].output[1] + if next_node_name.find("add") == -1: + next_node_name = self.node_name_mapping[cur_node_name].output[1] else: add_op_quantizable = False - next_node_op = self.node_name_mapping[ - next_node_name].node.op + next_node_op = self.node_name_mapping[next_node_name].node.op if add_op_quantizable and next_node_op == sub_rule[-sub_rule_len]: if not is_shared_output: matched_node_name.append(next_node_name) @@ -1925,14 +2005,18 @@ def _is_match_conv(self, patterns, qdq_inserted=False): if sub_rule_len == 1: matched_node_name.append(sub_rule[-1]) - if check_hardswish and sub_rule[-2] == 'Mul' and \ - sub_rule[-3] == 'Mul' and sub_rule[-4] == 'Relu6' and \ - sub_rule[-5] == 'Add' and count != 1: + if ( + check_hardswish + and sub_rule[-2] == "Mul" + and sub_rule[-3] == "Mul" + and sub_rule[-4] == "Relu6" + and sub_rule[-5] == "Add" + and count != 1 + ): matched_node_name.clear() self.logger.debug("Fail to match {}.".format(sub_rule)) break - self.logger.debug("Match {} on nodes {}.". - format(sub_rule, matched_node_name)) + self.logger.debug("Match {} on nodes {}.".format(sub_rule, matched_node_name)) return sub_rule, matched_node_name return None, None diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py index ff5e59ee241..62ef524543b 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py @@ -16,32 +16,31 @@ # limitations under the License. """Quantize Conv2DBackpropInput and Conv3DBackpropInputV2.""" +import numpy as np import tensorflow as tf -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util +from tensorflow.core.framework import graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper + from ..quantize_graph_base import QuantizeNodeBase -import numpy as np + class FuseNodeStartWithDeconv2d(QuantizeNodeBase): """Quantize Conv2DBackpropInput and Conv3DBackpropInputV2 and apply the fusion.""" + exclude_deconv_nodes = [] def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) if self.new_api: self.fusion_mapping = { - 'DequantizeConv2DBackpropInputQuantizeV2': self.apply_single_deconv2d_fusion, - 'DequantizeConv2DBackpropInputBiasAddQuantizeV2': self.apply_deconv2d_biasadd_fusion, - 'DequantizeConv3DBackpropInputV2QuantizeV2': self.apply_single_deconv3d_fusion, - 'DequantizeConv3DBackpropInputV2BiasAddQuantizeV2': self.apply_deconv3d_biasadd_fusion + "DequantizeConv2DBackpropInputQuantizeV2": self.apply_single_deconv2d_fusion, + "DequantizeConv2DBackpropInputBiasAddQuantizeV2": self.apply_deconv2d_biasadd_fusion, + "DequantizeConv3DBackpropInputV2QuantizeV2": self.apply_single_deconv3d_fusion, + "DequantizeConv3DBackpropInputV2BiasAddQuantizeV2": self.apply_deconv3d_biasadd_fusion, } def apply_single_deconv2d_fusion(self, match_node_name): @@ -61,9 +60,9 @@ def apply_single_deconv2d_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = [normal_inputs[0]] + [q_weights_name] + q_inputs[:1] all_input_names.append(q_weights_min_name) @@ -84,46 +83,50 @@ def apply_single_deconv2d_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_deconv" - quantized_node_input_names = all_input_names[:2] + \ - all_input_names[2:] + control_inputs + quantized_node_input_names = all_input_names[:2] + all_input_names[2:] + control_inputs node_op = "_FusedQuantizedDeconv2D" - quantized_deconv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_deconv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_deconv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_deconv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_deconv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_deconv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_deconv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_deconv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_deconv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_deconv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_deconv_node, "Tfilter", dtypes.qint8) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_deconv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) - helper.set_attr_type_list(quantized_deconv_node, 'Thost_inputs', [ - dtypes.int32.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - input_data_type.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_deconv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_type_list( + quantized_deconv_node, + "Thost_inputs", + [ + dtypes.int32.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + input_data_type.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_deconv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_deconv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[1], dtypes.qint8) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, match_node_name[1], dtypes.qint8) else: new_node = node_def_pb2.NodeDef() @@ -161,9 +164,9 @@ def apply_deconv2d_biasadd_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = [normal_inputs[0]] + [q_weights_name] + q_inputs[:1] all_input_names.append(q_weights_min_name) @@ -185,50 +188,56 @@ def apply_deconv2d_biasadd_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_deconv" bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] - quantized_node_input_names = all_input_names[:3] + \ - [bias_node_name] + all_input_names[3:] + control_inputs + quantized_node_input_names = ( + all_input_names[:3] + [bias_node_name] + all_input_names[3:] + control_inputs + ) node_op = "_FusedQuantizedDeconv2D" - quantized_deconv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_deconv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_deconv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_deconv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_deconv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_deconv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_deconv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_deconv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_deconv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_deconv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_deconv_node, "Tfilter", dtypes.qint8) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_deconv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) helper.set_attr_dtype(quantized_deconv_node, "Tbias", dtypes.float32) - #if self.device == 'gpu' else dtypes.qint32) - helper.set_attr_string_list(quantized_deconv_node, 'fused_ops', [b'BiasAdd']) - - helper.set_attr_type_list(quantized_deconv_node, 'Thost_inputs', [ - dtypes.int32.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - input_data_type.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_deconv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + # if self.device == 'gpu' else dtypes.qint32) + helper.set_attr_string_list(quantized_deconv_node, "fused_ops", [b"BiasAdd"]) + + helper.set_attr_type_list( + quantized_deconv_node, + "Thost_inputs", + [ + dtypes.int32.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + input_data_type.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_deconv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_deconv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[2], dtypes.qint8) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, match_node_name[2], dtypes.qint8) else: new_node = node_def_pb2.NodeDef() @@ -252,9 +261,9 @@ def apply_single_deconv3d_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = [normal_inputs[0]] + [q_weights_name] + q_inputs[:1] all_input_names.append(q_weights_min_name) @@ -275,46 +284,50 @@ def apply_single_deconv3d_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_deconv" - quantized_node_input_names = all_input_names[:2] + \ - all_input_names[2:] + control_inputs + quantized_node_input_names = all_input_names[:2] + all_input_names[2:] + control_inputs node_op = "_FusedQuantizedDeconv3D" - quantized_deconv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_deconv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_deconv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_deconv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_deconv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_deconv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_deconv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_deconv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_deconv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_deconv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_deconv_node, "Tfilter", dtypes.qint8) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_deconv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) - helper.set_attr_type_list(quantized_deconv_node, 'Thost_inputs', [ - dtypes.int32.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - input_data_type.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_deconv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + helper.set_attr_type_list( + quantized_deconv_node, + "Thost_inputs", + [ + dtypes.int32.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + input_data_type.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_deconv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_deconv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[1], dtypes.qint8) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, match_node_name[1], dtypes.qint8) else: new_node = node_def_pb2.NodeDef() @@ -338,9 +351,9 @@ def apply_deconv3d_biasadd_fusion(self, match_node_name): weights_min_name = weights_name[1] weights_max_name = weights_name[2] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weights_name[0]].node, self.per_channel + ) all_input_names = [normal_inputs[0]] + [q_weights_name] + q_inputs[:1] all_input_names.append(q_weights_min_name) @@ -362,50 +375,56 @@ def apply_deconv3d_biasadd_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_deconv" bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] - quantized_node_input_names = all_input_names[:3] + \ - [bias_node_name] + all_input_names[3:] + control_inputs + quantized_node_input_names = ( + all_input_names[:3] + [bias_node_name] + all_input_names[3:] + control_inputs + ) node_op = "_FusedQuantizedDeconv3D" - quantized_deconv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + quantized_deconv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_deconv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_deconv_node, "padding", node.attr["padding"]) helper.copy_attr(quantized_deconv_node, "data_format", node.attr["data_format"]) if "explicit_paddings" in node.attr: - helper.copy_attr(quantized_deconv_node, "explicit_paddings", - node.attr["explicit_paddings"]) + helper.copy_attr(quantized_deconv_node, "explicit_paddings", node.attr["explicit_paddings"]) helper.copy_attr(quantized_deconv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_deconv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_deconv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_deconv_node, "Tfilter", dtypes.qint8) # helper.set_attr_string(quantized_conv_node, '_kernel', b'QuantizedMklOp') helper.set_attr_dtype(quantized_deconv_node, "out_type", dtypes.qint32) # helper.set_attr_dtype(quantized_conv_node, "alpha", dtypes.quint8) helper.set_attr_dtype(quantized_deconv_node, "Tbias", dtypes.float32) - #if self.device == 'gpu' else dtypes.qint32) - helper.set_attr_string_list(quantized_deconv_node, 'fused_ops', [b'BiasAdd']) - - helper.set_attr_type_list(quantized_deconv_node, 'Thost_inputs', [ - dtypes.int32.as_datatype_enum, - dtypes.qint8.as_datatype_enum, - input_data_type.as_datatype_enum, - dtypes.float32.as_datatype_enum,# if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - helper.set_attr_type_list(quantized_deconv_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, ]) + # if self.device == 'gpu' else dtypes.qint32) + helper.set_attr_string_list(quantized_deconv_node, "fused_ops", [b"BiasAdd"]) + + helper.set_attr_type_list( + quantized_deconv_node, + "Thost_inputs", + [ + dtypes.int32.as_datatype_enum, + dtypes.qint8.as_datatype_enum, + input_data_type.as_datatype_enum, + dtypes.float32.as_datatype_enum, # if self.device == 'gpu' else dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_deconv_node, + "Thost_outputs", + [ + dtypes.qint32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(quantized_deconv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[2], dtypes.qint8) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, match_node_name[2], dtypes.qint8) else: new_node = node_def_pb2.NodeDef() @@ -425,10 +444,10 @@ def apply_the_transform(self): matched_rule, matched_node_name = self._is_match_deconv(self.sorted_patterns, True) if matched_node_name: self.output_graph = graph_pb2.GraphDef() - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) if fusion_name in self.fusion_mapping: self.fusion_mapping[fusion_name](matched_node_name) - else: # pragma: no cover + else: # pragma: no cover self.logger.info("Unknown fusion pattern {}.".format(fusion_name)) if self.remove_redundant_quant_flag: self.input_graph = self.remove_redundant_quantization(self.input_graph) @@ -456,8 +475,7 @@ def _is_match_deconv(self, patterns, qdq_inserted=False): for k, v in enumerate(self.op_list): if v in set(fusion[1] for fusion in patterns): - cur_node = self.node_name_mapping[list( - self.node_name_mapping.keys())[k]].node + cur_node = self.node_name_mapping[list(self.node_name_mapping.keys())[k]].node if cur_node.name != self.start_node_name: continue @@ -476,8 +494,10 @@ def _is_match_deconv(self, patterns, qdq_inserted=False): input_index = 0 if sub_rule[1] in ("Conv2DBackpropInput", "Conv3DBackpropInputV2"): input_index = 2 - if self.node_name_mapping[normal_inputs[input_index]].node.op != "Dequantize" or \ - self.node_name_mapping[normal_inputs[1]].node.op != "Dequantize": + if ( + self.node_name_mapping[normal_inputs[input_index]].node.op != "Dequantize" + or self.node_name_mapping[normal_inputs[1]].node.op != "Dequantize" + ): continue sub_rule_len = len(sub_rule) - 2 @@ -494,15 +514,11 @@ def _is_match_deconv(self, patterns, qdq_inserted=False): self.logger.debug("Fail to match {}".format(sub_rule)) break - next_node_name = self.node_name_mapping[ - cur_node_name].output[0] + next_node_name = self.node_name_mapping[cur_node_name].output[0] - is_shared_output = True if len( - self.node_name_mapping[cur_node_name].output - ) > 1 else False + is_shared_output = True if len(self.node_name_mapping[cur_node_name].output) > 1 else False - next_node_op = self.node_name_mapping[ - next_node_name].node.op + next_node_op = self.node_name_mapping[next_node_name].node.op if next_node_op == sub_rule[-sub_rule_len]: if not is_shared_output: matched_node_name.append(next_node_name) @@ -519,8 +535,7 @@ def _is_match_deconv(self, patterns, qdq_inserted=False): if sub_rule_len == 1: matched_node_name.append(sub_rule[-1]) - self.logger.debug("Match {} on nodes {}.". - format(sub_rule, matched_node_name)) + self.logger.debug("Match {} on nodes {}.".format(sub_rule, matched_node_name)) return sub_rule, matched_node_name return None, None diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py index 9e794509d63..23db0ce7db4 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py @@ -16,27 +16,26 @@ # limitations under the License. """Quantize FusedInstanceNorm.""" -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import graph_pb2, node_def_pb2 from tensorflow.python.framework import dtypes from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper + from ..quantize_graph_base import QuantizeNodeBase + class FuseNodeStartWithFusedInstanceNorm(QuantizeNodeBase): """Quantize FusedInstanceNorm and apply the fusion.""" def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) if self.new_api: self.fusion_mapping = { - '_MklFusedInstanceNormLeakyRelu': self.apply_newly_in_relu_fusion, - '_MklFusedInstanceNormRelu': self.apply_newly_in_relu_fusion, - '_MklFusedInstanceNorm': self.apply_newly_in_relu_fusion + "_MklFusedInstanceNormLeakyRelu": self.apply_newly_in_relu_fusion, + "_MklFusedInstanceNormRelu": self.apply_newly_in_relu_fusion, + "_MklFusedInstanceNorm": self.apply_newly_in_relu_fusion, } else: self.fusion_mapping = {} @@ -45,8 +44,7 @@ def apply_newly_in_relu_fusion(self, match_node_name): """Apply FusedInstanceNorm Relu/LeakyRelu fusion.""" matched_node = self.node_name_mapping[match_node_name[0]] skip_node_name = match_node_name[1:] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) scale_name = normal_inputs[1] offset_name = normal_inputs[2] mean_name = normal_inputs[3] @@ -54,13 +52,13 @@ def apply_newly_in_relu_fusion(self, match_node_name): all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = [ - all_input_names[0], + all_input_names[0], scale_name, offset_name, mean_name, variance_name, all_input_names[1], - all_input_names[2] + all_input_names[2], ] for _, node in enumerate(self.input_graph.node): @@ -69,34 +67,34 @@ def apply_newly_in_relu_fusion(self, match_node_name): elif node.name == match_node_name[0]: self.logger.debug("Matched node {} with input {}.".format(node.name, node.input)) - relu_node_name = match_node_name[1] if len(match_node_name)==2 else None + relu_node_name = match_node_name[1] if len(match_node_name) == 2 else None - node_op = '_QuantizedFusedInstanceNorm' + node_op = "_QuantizedFusedInstanceNorm" quantized_node_name = node.name + "_eightbit_quantized_in" output_min_node_name = quantized_node_name + "_input7_output_min" output_max_node_name = quantized_node_name + "_input8_output_max" - quantized_node_input_names = all_input_names + \ - [output_min_node_name] + [output_max_node_name] + control_inputs - output_min_node = helper.create_constant_node(output_min_node_name, -1., dtypes.float32) - output_max_node = helper.create_constant_node(output_max_node_name, 1., dtypes.float32) - quantized_in_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) - + quantized_node_input_names = ( + all_input_names + [output_min_node_name] + [output_max_node_name] + control_inputs + ) + output_min_node = helper.create_constant_node(output_min_node_name, -1.0, dtypes.float32) + output_max_node = helper.create_constant_node(output_max_node_name, 1.0, dtypes.float32) + quantized_in_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) + if relu_node_name is not None: relu_node = self.node_name_mapping[relu_node_name].node if relu_node.op == "Relu": - helper.set_attr_string(quantized_in_node, "activation_mode", b'Relu') + helper.set_attr_string(quantized_in_node, "activation_mode", b"Relu") elif relu_node.op == "LeakyRelu": - helper.set_attr_string(quantized_in_node, "activation_mode", b'LeakyRelu') - helper.set_attr_float(quantized_in_node, "leakyrelu_alpha", relu_node.attr['alpha'].f) + helper.set_attr_string(quantized_in_node, "activation_mode", b"LeakyRelu") + helper.set_attr_float(quantized_in_node, "leakyrelu_alpha", relu_node.attr["alpha"].f) helper.set_attr_dtype(quantized_in_node, "T", dtypes.qint8) helper.set_attr_dtype(quantized_in_node, "U", dtypes.float32) helper.set_attr_dtype(quantized_in_node, "Tout", dtypes.qint8) - helper.copy_attr(quantized_in_node, 'reduction_axes', node.attr['reduction_axes']) + helper.copy_attr(quantized_in_node, "reduction_axes", node.attr["reduction_axes"]) + """# 0. - """ - # 0. x + x # 1. scale # 2. offset # 3. mean @@ -106,38 +104,45 @@ def apply_newly_in_relu_fusion(self, match_node_name): # 7. {output_min} # 8. {output_max} """ - helper.set_attr_type_list(quantized_in_node, 'input_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - - - """ - # 0. output + helper.set_attr_type_list( + quantized_in_node, + "input_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + """# 0. + + output # 1. output_min # 2. output_max """ - helper.set_attr_type_list(quantized_in_node, 'out_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) + helper.set_attr_type_list( + quantized_in_node, + "out_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(output_min_node) self.add_output_graph_node(output_max_node) self.add_output_graph_node(quantized_in_node) self._intel_cpu_add_dequantize_result_node( - quantized_output_name = quantized_node_name, - original_node_name = match_node_name[-1], - dtype = dtypes.qint8, - min_tensor_index = 1 - ) + quantized_output_name=quantized_node_name, + original_node_name=match_node_name[-1], + dtype=dtypes.qint8, + min_tensor_index=1, + ) else: new_node = node_def_pb2.NodeDef() @@ -147,7 +152,7 @@ def apply_newly_in_relu_fusion(self, match_node_name): def get_longest_fuse(self): """Get the longest fusion pattern.""" self._get_op_list() - real_patterns = [pattern[1 :-1] for pattern in self.sorted_patterns] + real_patterns = [pattern[1:-1] for pattern in self.sorted_patterns] # Cannot match if: self._is_match([['Q','IN','LeakyRelu','DQ'],['Q','IN','Relu','DQ'],['Q','IN','DQ']]) matched_rule, matched_node_name = self._is_match(real_patterns) return matched_rule, matched_node_name @@ -155,12 +160,12 @@ def get_longest_fuse(self): def apply_the_transform(self): """Quantize FusedInstanceNorm and apply the fusion pattern.""" self._get_op_list() - real_patterns = [pattern[1 :-1] for pattern in self.sorted_patterns] + real_patterns = [pattern[1:-1] for pattern in self.sorted_patterns] # Cannot match if: self._is_match([['Q','IN','LeakyRelu','DQ'],['Q','IN','Relu','DQ'],['Q','IN','DQ']]) matched_rule, matched_node_name = self._is_match(real_patterns) if matched_node_name: self.output_graph = graph_pb2.GraphDef() - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) if fusion_name in self.fusion_mapping: self.fusion_mapping[fusion_name](matched_node_name) else: diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py index d275f6e9ed0..60aaf973767 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py @@ -17,64 +17,61 @@ """Quantize MatMul/BatchMatMul/BatchMatMulV2.""" import numpy as np - -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import dtypes +from tensorflow.core.framework import graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper + from ..quantize_graph_base import QuantizeNodeBase -from tensorflow.python.framework import tensor_util + class FuseNodeStartWithMatmul(QuantizeNodeBase): """Quantize MatMul/BatchMatMul/BatchMatMulV2 and apply the fusion.""" + exclude_matmul_nodes = [] def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) self.fusion_op_type = set(fusion[1] for fusion in self.patterns) self.fusion_mapping = { - 'DequantizeMatMulBiasAddQuantizeV2': self.apply_matmul_biasadd_fusion, - 'DequantizeMatMulQuantizeV2': self.apply_matmul_biasadd_fusion, - 'DequantizeMatMulBiasAddAddQuantizeV2': self.apply_matmul_biasadd_fusion, - 'DequantizeMatMulAddQuantizeV2': self.apply_matmul_biasadd_fusion, - 'DequantizeMatMulBiasAddAddV2QuantizeV2': self.apply_matmul_biasadd_fusion, - 'DequantizeMatMulAddV2QuantizeV2': self.apply_matmul_biasadd_fusion, - 'DequantizeMatMulBiasAddReluQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulBiasAddRelu6QuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulBiasAddLeakyReluQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulBiasAddGeluQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulBiasAddEluQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulBiasAddTanhQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulBiasAddSigmoidQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulReluQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulRelu6QuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulLeakyReluQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulGeluQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulEluQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulTanhQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeMatMulSigmoidQuantizeV2': self.apply_matmul_biasadd_relu_fusion, - 'DequantizeBatchMatMulQuantizeV2': self.apply_batchmatmulv2_fusion, - 'DequantizeBatchMatMulV2QuantizeV2': self.apply_batchmatmulv2_fusion, - 'DequantizeBatchMatMulMulQuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulV2MulQuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulAddQuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulV2AddQuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulAddV2QuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulV2AddV2QuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulMulAddV2QuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulV2MulAddV2QuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulMulAddQuantizeV2': self.apply_batchmatmulv2_mul_add_fusion, - 'DequantizeBatchMatMulV2MulAddQuantizeV2': self.apply_batchmatmulv2_mul_add_fusion + "DequantizeMatMulBiasAddQuantizeV2": self.apply_matmul_biasadd_fusion, + "DequantizeMatMulQuantizeV2": self.apply_matmul_biasadd_fusion, + "DequantizeMatMulBiasAddAddQuantizeV2": self.apply_matmul_biasadd_fusion, + "DequantizeMatMulAddQuantizeV2": self.apply_matmul_biasadd_fusion, + "DequantizeMatMulBiasAddAddV2QuantizeV2": self.apply_matmul_biasadd_fusion, + "DequantizeMatMulAddV2QuantizeV2": self.apply_matmul_biasadd_fusion, + "DequantizeMatMulBiasAddReluQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulBiasAddRelu6QuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulBiasAddLeakyReluQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulBiasAddGeluQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulBiasAddEluQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulBiasAddTanhQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulBiasAddSigmoidQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulReluQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulRelu6QuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulLeakyReluQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulGeluQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulEluQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulTanhQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeMatMulSigmoidQuantizeV2": self.apply_matmul_biasadd_relu_fusion, + "DequantizeBatchMatMulQuantizeV2": self.apply_batchmatmulv2_fusion, + "DequantizeBatchMatMulV2QuantizeV2": self.apply_batchmatmulv2_fusion, + "DequantizeBatchMatMulMulQuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulV2MulQuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulAddQuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulV2AddQuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulAddV2QuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulV2AddV2QuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulMulAddV2QuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulV2MulAddV2QuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulMulAddQuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, + "DequantizeBatchMatMulV2MulAddQuantizeV2": self.apply_batchmatmulv2_mul_add_fusion, } - def apply_matmul_biasadd_relu_fusion(self, match_node_name): """Apply dequantize + matmul + biasadd + activation + quantizev2 fusion. @@ -111,14 +108,12 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): weights_min_name = None weights_max_name = None # no QDQ inserted for 'Enter' node in phase 1 - if weight_node.op == 'Enter': # pragma: no cover - parent_node = self.node_name_mapping[ - helper.node_name_from_input(weight_node.input[0])].node + if weight_node.op == "Enter": # pragma: no cover + parent_node = self.node_name_mapping[helper.node_name_from_input(weight_node.input[0])].node # FIXME We only quantize the MatMul op which second input node type is const. This is a # workaround for RNN model like LTSM. - if parent_node.op != 'Const': - self.logger.debug( \ - 'The weight node of matched_node {} is not Const or Const + Enter, skipped') + if parent_node.op != "Const": + self.logger.debug("The weight node of matched_node {} is not Const or Const + Enter, skipped") self.exclude_matmul_nodes.append(matched_node.node.name) self.output_graph = self.input_graph return [] @@ -136,16 +131,16 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): weight_node = self.node_name_mapping[helper.node_name_from_input(weights_name[0])].node weight_name = weight_node.name - if weight_node.op == 'Const': - weights_content = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) + if weight_node.op == "Const": + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) - if np.any(np.isnan(weights_content)): # pragma: no cover + if np.any(np.isnan(weights_content)): # pragma: no cover self.exclude_matmul_nodes.append(matched_node.node.name) self.output_graph = self.input_graph return [] # If weight node non const, can't insert dummy biasadd to do matmul fusion. - if weight_node.op != 'Const' and len(match_node_name) == 3: + if weight_node.op != "Const" and len(match_node_name) == 3: self.exclude_matmul_nodes.append(matched_node.node.name) self.output_graph = self.input_graph return [] @@ -160,19 +155,19 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): add_a_node = self.node_name_mapping[add_a_node_name].node add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if (add_a_node.op != 'Const' and add_b_node.op == 'Const') or \ - (add_a_node.op != 'Const' and add_b_node.op == 'Enter'): + if (add_a_node.op != "Const" and add_b_node.op == "Const") or ( + add_a_node.op != "Const" and add_b_node.op == "Enter" + ): need_insert_dummy_biasadd = 0 offset = 0 if need_insert_dummy_biasadd: - self.apply_matmul_biasadd_fusion(match_node_name[:2]+[match_node_name[-1]]) + self.apply_matmul_biasadd_fusion(match_node_name[:2] + [match_node_name[-1]]) return match_node_name[1:2] - if weight_node.op == 'Const': - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, - self.per_channel, enter_node) + if weight_node.op == "Const": + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel, enter_node + ) if weights_min_name: skip_node_name.append(weights_min_name) if weights_max_name: @@ -186,7 +181,7 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): q_weights_max_name = q_weights_inputs[2] skip_node_name.append(normal_inputs[0]) - if enter_node: # pragma: no cover + if enter_node: # pragma: no cover skip_node_name.append(enter_node.name) else: skip_node_name.append(normal_inputs[1]) @@ -198,24 +193,23 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): self.logger.debug("Matched node {} with input {}.".format(node.name, node.input)) quantized_node_name = node.name + "_eightbit_quantized_mat_mul" - if need_insert_dummy_biasadd and weight_node.op == 'Const': - t_b_index = 0 if matched_node.node.attr['transpose_b'].b else 1 - weights_content = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) + if need_insert_dummy_biasadd and weight_node.op == "Const": + t_b_index = 0 if matched_node.node.attr["transpose_b"].b else 1 + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) bias_size = weights_content.shape[t_b_index] bias_node_name = node.name + "_fake_bias" bias_node = helper.create_constant_node( bias_node_name, [0] * bias_size, dtypes.float32, shape=[bias_size] ) - if enter_node: # pragma: no cover - bias_enter_node = helper.create_node( - 'Enter', bias_node_name + '_enter', [bias_node_name]) - helper.set_attr_string(bias_enter_node, - 'frame_name', enter_node.attr['frame_name'].s) - helper.set_attr_dtype(bias_enter_node, 'T', dtypes.float32) - helper.set_attr_bool(bias_enter_node, 'is_constant', True) - helper.set_attr_int(bias_enter_node, 'parallel_iterations', - enter_node.attr['parallel_iterations'].i) + if enter_node: # pragma: no cover + bias_enter_node = helper.create_node("Enter", bias_node_name + "_enter", [bias_node_name]) + helper.set_attr_string(bias_enter_node, "frame_name", enter_node.attr["frame_name"].s) + helper.set_attr_dtype(bias_enter_node, "T", dtypes.float32) + helper.set_attr_bool(bias_enter_node, "is_constant", True) + helper.set_attr_int( + bias_enter_node, "parallel_iterations", enter_node.attr["parallel_iterations"].i + ) self.add_output_graph_node(bias_enter_node) bias_node_name = bias_enter_node.name @@ -223,75 +217,76 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): self.add_output_graph_node(bias_node) else: bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] - relu_node_name = match_node_name[3-offset] + relu_node_name = match_node_name[3 - offset] all_input_names = q_inputs[:1] + [q_weights_name] + q_inputs[1:] all_input_names.append(q_weights_min_name) all_input_names.append(q_weights_max_name) - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) quantized_matmul_node = helper.create_node( - "_QuantizedMatMul", quantized_node_name, - quantized_node_input_names) + "_QuantizedMatMul", quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_matmul_node, "transpose_a", node.attr["transpose_a"]) helper.copy_attr(quantized_matmul_node, "transpose_b", node.attr["transpose_b"]) helper.set_attr_dtype(quantized_matmul_node, "T1", dtypes.quint8) helper.set_attr_dtype(quantized_matmul_node, "T2", dtypes.qint8) helper.set_attr_dtype(quantized_matmul_node, "Tout", dtypes.qint32) - helper.set_attr_string(quantized_matmul_node, 'input_quant_mode', - b'MIN_FIRST' if self.is_asymmetric else b'SCALED') - helper.set_attr_string(quantized_matmul_node, 'output_quant_mode', - b'MIN_FIRST' if self.is_asymmetric else b'SCALED') + helper.set_attr_string( + quantized_matmul_node, "input_quant_mode", b"MIN_FIRST" if self.is_asymmetric else b"SCALED" + ) + helper.set_attr_string( + quantized_matmul_node, "output_quant_mode", b"MIN_FIRST" if self.is_asymmetric else b"SCALED" + ) if self.node_name_mapping[relu_node_name].node.op == "Relu": - helper.set_attr_string_list(quantized_matmul_node, - 'fused_ops', [b'BiasAdd', b'Relu']) + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd", b"Relu"]) elif self.node_name_mapping[relu_node_name].node.op == "Relu6": - helper.set_attr_string_list(quantized_matmul_node, - 'fused_ops', [b'BiasAdd', b'Relu6']) + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd", b"Relu6"]) elif self.node_name_mapping[relu_node_name].node.op == "LeakyRelu": - helper.set_attr_string_list(quantized_matmul_node, - 'fused_ops', [b'BiasAdd', b'LeakyRelu']) + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd", b"LeakyRelu"]) elif self.node_name_mapping[relu_node_name].node.op == "Gelu": if self.node_name_mapping[relu_node_name].node.attr["approximate"].b: - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', \ - [b'BiasAdd', b'GeluApproximate']) + helper.set_attr_string_list( + quantized_matmul_node, "fused_ops", [b"BiasAdd", b"GeluApproximate"] + ) else: - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', \ - [b'BiasAdd', b'GeluExact']) + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd", b"GeluExact"]) elif self.node_name_mapping[relu_node_name].node.op == "Elu": - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', - [b'BiasAdd', b'Elu']) + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd", b"Elu"]) elif self.node_name_mapping[relu_node_name].node.op == "Tanh": - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', - [b'BiasAdd', b'Tanh']) + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd", b"Tanh"]) elif self.node_name_mapping[relu_node_name].node.op == "Sigmoid": - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', - [b'BiasAdd', b'Sigmoid']) - helper.set_attr_dtype(quantized_matmul_node, 'Tbias', dtypes.float32) - helper.set_attr_dtype(quantized_matmul_node, 'U', dtypes.float32) + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd", b"Sigmoid"]) + helper.set_attr_dtype(quantized_matmul_node, "Tbias", dtypes.float32) + helper.set_attr_dtype(quantized_matmul_node, "U", dtypes.float32) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_inputs', [ + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_inputs", + [ dtypes.quint8.as_datatype_enum, dtypes.qint8.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum]) + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_outputs", + [dtypes.qint32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum], + ) self.add_output_graph_node(quantized_matmul_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.quint8, False) - self._intel_cpu_add_dequantize_result_node(quantize_down_name, relu_node_name, \ - performance_only=self.performance_only) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.quint8, False) + self._intel_cpu_add_dequantize_result_node( + quantize_down_name, relu_node_name, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -300,8 +295,7 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): if new_node.name in matmul_node_output: for idx, node_input in enumerate(new_node.input): if helper.node_name_from_input(node_input) == matmul_node.name: - new_node.input[idx] = node_input.replace( - matmul_node.name, quantized_node_name) + new_node.input[idx] = node_input.replace(matmul_node.name, quantized_node_name) self.add_output_graph_node(new_node) return match_node_name @@ -317,8 +311,7 @@ def apply_matmul_biasadd_fusion(self, match_node_name): """ skip_node_name = match_node_name[2:] matched_node = self.node_name_mapping[match_node_name[1]] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) # QDQ inserted for input0 in phase 1 _, q_inputs = self._get_node_input(normal_inputs[0]) @@ -331,14 +324,12 @@ def apply_matmul_biasadd_fusion(self, match_node_name): quantizev2_weights_name = None # no QDQ inserted for 'Enter' node in phase 1 - if weight_node.op == 'Enter': # pragma: no cover - parent_node = self.node_name_mapping[ - helper.node_name_from_input(weight_node.input[0])].node + if weight_node.op == "Enter": # pragma: no cover + parent_node = self.node_name_mapping[helper.node_name_from_input(weight_node.input[0])].node # FIXME We only quantize the MatMul op which second input node type is const. This is a # workaround for RNN model like LTSM. - if parent_node.op != 'Const': - self.logger.debug( - 'The weight node of matched_node {} is not Const or Const + Enter, skipped') + if parent_node.op != "Const": + self.logger.debug("The weight node of matched_node {} is not Const or Const + Enter, skipped") self.exclude_matmul_nodes.append(matched_node.node.name) self.output_graph = self.input_graph return [] @@ -358,15 +349,15 @@ def apply_matmul_biasadd_fusion(self, match_node_name): # TODO Remove below two lines once the TF enabled the QuantizedMatMul while # transpose_a could be set to True. - if matched_node.node.attr["transpose_a"].b is True: # pragma: no cover + if matched_node.node.attr["transpose_a"].b is True: # pragma: no cover self.exclude_matmul_nodes.append(matched_node.node.name) self.output_graph = self.input_graph return [] - if weight_node.op == 'Const': - weights_content = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) + if weight_node.op == "Const": + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) - if np.any(np.isnan(weights_content)): # pragma: no cover + if np.any(np.isnan(weights_content)): # pragma: no cover self.exclude_matmul_nodes.append(matched_node.node.name) self.output_graph = self.input_graph return [] @@ -374,8 +365,10 @@ def apply_matmul_biasadd_fusion(self, match_node_name): len_output = len(matched_node.output) is_shared_output = False if len_output == 2: - if self.node_name_mapping[matched_node.output[0]].node.op == 'Reshape' or \ - self.node_name_mapping[matched_node.output[1]].node.op == 'Reshape': + if ( + self.node_name_mapping[matched_node.output[0]].node.op == "Reshape" + or self.node_name_mapping[matched_node.output[1]].node.op == "Reshape" + ): is_shared_output = False else: is_shared_output = True @@ -394,39 +387,33 @@ def apply_matmul_biasadd_fusion(self, match_node_name): add_a_node = self.node_name_mapping[add_a_node_name].node add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if (add_a_node.op != 'Const' and add_b_node.op == 'Const') or\ - (add_a_node.op != 'Const' and add_b_node.op == 'Enter'): + if (add_a_node.op != "Const" and add_b_node.op == "Const") or ( + add_a_node.op != "Const" and add_b_node.op == "Enter" + ): single_matmul_fusion = False else: - return self.apply_matmul_biasadd_fusion(match_node_name[:2]+[match_node_name[-1]]) + return self.apply_matmul_biasadd_fusion(match_node_name[:2] + [match_node_name[-1]]) sum_node_name = "" if len(match_node_name) == 4: if self.node_name_mapping[match_node_name[2]].node.op in ("Add", "AddV2"): - sum_index = 1 \ - if match_node_name[1] == self.node_name_mapping[match_node_name[2]].node.input[0] \ - else 0 + sum_index = 1 if match_node_name[1] == self.node_name_mapping[match_node_name[2]].node.input[0] else 0 sum_node_name = self.node_name_mapping[match_node_name[2]].node.input[sum_index] deq_node = self.node_name_mapping[sum_node_name].node - if deq_node.op != 'Dequantize' or deq_node.op.find("Quantize") != -1: - return self.apply_matmul_biasadd_fusion( - match_node_name[:2]+[match_node_name[-1]]) + if deq_node.op != "Dequantize" or deq_node.op.find("Quantize") != -1: + return self.apply_matmul_biasadd_fusion(match_node_name[:2] + [match_node_name[-1]]) if len(match_node_name) == 5: if self.node_name_mapping[match_node_name[3]].node.op in ("Add", "AddV2"): - sum_index = 1 \ - if match_node_name[2] == self.node_name_mapping[match_node_name[3]].node.input[0] \ - else 0 + sum_index = 1 if match_node_name[2] == self.node_name_mapping[match_node_name[3]].node.input[0] else 0 sum_node_name = self.node_name_mapping[match_node_name[3]].node.input[sum_index] deq_node = self.node_name_mapping[sum_node_name].node - if deq_node.op != 'Dequantize' or deq_node.op.find("Quantize") != -1: - return self.apply_matmul_biasadd_fusion( - match_node_name[:3]+[match_node_name[-1]]) - - if weight_node.op == 'Const': - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, - self.per_channel, enter_node) + if deq_node.op != "Dequantize" or deq_node.op.find("Quantize") != -1: + return self.apply_matmul_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) + + if weight_node.op == "Const": + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel, enter_node + ) if weights_min_name: skip_node_name.append(weights_min_name) if weights_max_name: @@ -441,8 +428,7 @@ def apply_matmul_biasadd_fusion(self, match_node_name): skip_node_name.append(normal_inputs[0]) if enter_node: - if len(self.node_name_mapping[ - helper.node_name_from_input(enter_node.name)].output) == 1: + if len(self.node_name_mapping[helper.node_name_from_input(enter_node.name)].output) == 1: skip_node_name.append(enter_node.name) else: skip_node_name.append(normal_inputs[1]) @@ -460,43 +446,48 @@ def apply_matmul_biasadd_fusion(self, match_node_name): if single_matmul_fusion: if sum_node_name: - quantized_node_input_names = all_input_names[:2] + [ - sum_node_name - ] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [sum_node_name] + all_input_names[2:] + control_inputs + ) else: - quantized_node_input_names = all_input_names[:2] + \ - all_input_names[2:] + control_inputs + quantized_node_input_names = all_input_names[:2] + all_input_names[2:] + control_inputs else: bias_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] if sum_node_name: - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + [ - sum_node_name - ] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + + [bias_node_name] + + [sum_node_name] + + all_input_names[2:] + + control_inputs + ) else: - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) quantized_matmul_node = helper.create_node( - "_QuantizedMatMul", quantized_node_name, - quantized_node_input_names) + "_QuantizedMatMul", quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_matmul_node, "transpose_a", node.attr["transpose_a"]) helper.copy_attr(quantized_matmul_node, "transpose_b", node.attr["transpose_b"]) helper.set_attr_dtype(quantized_matmul_node, "T1", dtypes.quint8) helper.set_attr_dtype(quantized_matmul_node, "T2", dtypes.qint8) helper.set_attr_dtype(quantized_matmul_node, "Tout", dtypes.qint32) - helper.set_attr_dtype(quantized_matmul_node, 'U', dtypes.float32) - helper.set_attr_string(quantized_matmul_node, 'input_quant_mode', - b'MIN_FIRST' if self.is_asymmetric else b'SCALED') - helper.set_attr_string(quantized_matmul_node, 'output_quant_mode', - b'MIN_FIRST' if self.is_asymmetric else b'SCALED') - helper.set_attr_dtype(quantized_matmul_node, 'Tbias', dtypes.float32) + helper.set_attr_dtype(quantized_matmul_node, "U", dtypes.float32) + helper.set_attr_string( + quantized_matmul_node, "input_quant_mode", b"MIN_FIRST" if self.is_asymmetric else b"SCALED" + ) + helper.set_attr_string( + quantized_matmul_node, "output_quant_mode", b"MIN_FIRST" if self.is_asymmetric else b"SCALED" + ) + helper.set_attr_dtype(quantized_matmul_node, "Tbias", dtypes.float32) if sum_node_name: - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', - [b'BiasAdd', b'Add']) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_inputs', [ + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd", b"Add"]) + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_inputs", + [ dtypes.quint8.as_datatype_enum, dtypes.qint8.as_datatype_enum, dtypes.float32.as_datatype_enum, @@ -504,48 +495,62 @@ def apply_matmul_biasadd_fusion(self, match_node_name): dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) + dtypes.float32.as_datatype_enum, + ], + ) else: if not single_matmul_fusion: - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', - [b'BiasAdd']) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_inputs', [ + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"BiasAdd"]) + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_inputs", + [ dtypes.quint8.as_datatype_enum, dtypes.qint8.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) + dtypes.float32.as_datatype_enum, + ], + ) else: - helper.set_attr_type_list(quantized_matmul_node, 'Thost_inputs', [ + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_inputs", + [ dtypes.quint8.as_datatype_enum, dtypes.qint8.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_outputs', [ - dtypes.qint32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum]) + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_outputs", + [dtypes.qint32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum], + ) self.add_output_graph_node(quantized_matmul_node) requantize_type = dtypes.qint8 - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, requantize_type, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, requantize_type, False) if sum_node_name: self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[2] if single_matmul_fusion else \ - match_node_name[3], requantize_type, performance_only=self.performance_only) + quantize_down_name, + match_node_name[2] if single_matmul_fusion else match_node_name[3], + requantize_type, + performance_only=self.performance_only, + ) else: self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[1] if single_matmul_fusion else \ - match_node_name[2], requantize_type, performance_only=self.performance_only) + quantize_down_name, + match_node_name[1] if single_matmul_fusion else match_node_name[2], + requantize_type, + performance_only=self.performance_only, + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -559,8 +564,7 @@ def apply_batchmatmulv2_fusion(self, match_node_name): # pragma: no cover """ skip_node_name = match_node_name[2:] matched_node = self.node_name_mapping[match_node_name[1]] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) _, q_x_inputs = self._get_node_input(normal_inputs[0]) quantizev2_input_name = q_x_inputs[0] @@ -571,14 +575,12 @@ def apply_batchmatmulv2_fusion(self, match_node_name): # pragma: no cover weights_max_name = None quantizev2_y_name = None - if weight_node.op == 'Enter': - parent_node = self.node_name_mapping[ - helper.node_name_from_input(weight_node.input[0])].node + if weight_node.op == "Enter": + parent_node = self.node_name_mapping[helper.node_name_from_input(weight_node.input[0])].node # FIXME We only quantize the MatMul op which second input node type is const. This is a # workaround for RNN model like LTSM. - if parent_node.op != 'Const': - self.logger.debug( - 'The weight node of matched_node {} is not Const or Const + Enter, skipped') + if parent_node.op != "Const": + self.logger.debug("The weight node of matched_node {} is not Const or Const + Enter, skipped") self.exclude_matmul_nodes.append(matched_node.node.name) self.output_graph = self.input_graph return [] @@ -594,8 +596,8 @@ def apply_batchmatmulv2_fusion(self, match_node_name): # pragma: no cover weight_node = self.node_name_mapping[helper.node_name_from_input(weights_name[0])].node weight_name = weight_node.name - if weight_node.op == 'Const': - weights_content = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) + if weight_node.op == "Const": + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) if np.any(np.isnan(weights_content)): self.output_graph = self.input_graph @@ -603,16 +605,18 @@ def apply_batchmatmulv2_fusion(self, match_node_name): # pragma: no cover return [] for i in self.node_name_mapping: - if weight_node.input and not weight_node.input[0].startswith('^') \ - and weight_node.name in self.node_name_mapping[i].output: + if ( + weight_node.input + and not weight_node.input[0].startswith("^") + and weight_node.name in self.node_name_mapping[i].output + ): self.output_graph = self.input_graph self.exclude_matmul_nodes.append(matched_node.node.name) return [] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, - self.per_channel, enter_node) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel, enter_node + ) if weights_min_name: skip_node_name.append(weights_min_name) @@ -648,8 +652,8 @@ def apply_batchmatmulv2_fusion(self, match_node_name): # pragma: no cover quantized_node_input_names = all_input_names + control_inputs quantized_matmul_node = helper.create_node( - "_QuantizedBatchMatMul", quantized_node_name, - quantized_node_input_names) + "_QuantizedBatchMatMul", quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_matmul_node, "adj_x", node.attr["adj_x"]) helper.copy_attr(quantized_matmul_node, "adj_y", node.attr["adj_y"]) @@ -657,19 +661,22 @@ def apply_batchmatmulv2_fusion(self, match_node_name): # pragma: no cover helper.set_attr_dtype(quantized_matmul_node, "T2", dtypes.qint8) helper.set_attr_dtype(quantized_matmul_node, "U", dtypes.float32) helper.set_attr_dtype(quantized_matmul_node, "Tout", dtypes.float32) - helper.set_attr_string(quantized_matmul_node, 'input_quant_mode', b'SCALED') - helper.set_attr_string(quantized_matmul_node, 'output_quant_mode', b'SCALED') - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', [b'Dequantize']) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_inputs', [ + helper.set_attr_string(quantized_matmul_node, "input_quant_mode", b"SCALED") + helper.set_attr_string(quantized_matmul_node, "output_quant_mode", b"SCALED") + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"Dequantize"]) + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_inputs", + [ dtypes.qint8.as_datatype_enum, dtypes.qint8.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_outputs', [ - dtypes.float32.as_datatype_enum]) + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list(quantized_matmul_node, "Thost_outputs", [dtypes.float32.as_datatype_enum]) self.add_output_graph_node(quantized_matmul_node) for i in self.node_name_mapping[node.name].output: @@ -684,7 +691,7 @@ def apply_batchmatmulv2_fusion(self, match_node_name): # pragma: no cover self.add_output_graph_node(new_node) return match_node_name - def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cover + def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cover """Apply dequantize + batchmatmul/batchmatmulv2 + mul + add + quantizev2 fusion. Dequantize + BatchMatMulV2 + Mul + QuantizeV2 @@ -696,21 +703,19 @@ def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cove skip_node_name = match_node_name[2:] matched_node = self.node_name_mapping[match_node_name[1]] # oneDNN limitation: add tensor ndim must be 4 - if len(match_node_name) == 4 and \ - self.node_name_mapping[match_node_name[2]].node.op in ("Add","AddV2"): + if len(match_node_name) == 4 and self.node_name_mapping[match_node_name[2]].node.op in ("Add", "AddV2"): add_node_input_name = self.node_name_mapping[match_node_name[2]].node.input[1] if add_node_input_name == matched_node.node.name: add_node_input_name = self.node_name_mapping[match_node_name[2]].node.input[0] add_input_node = self.node_name_mapping[add_node_input_name].node - if add_input_node.op != 'Const': - return self.apply_batchmatmulv2_fusion(match_node_name[:2]+[match_node_name[-1]]) + if add_input_node.op != "Const": + return self.apply_batchmatmulv2_fusion(match_node_name[:2] + [match_node_name[-1]]) shape = tensor_util.MakeNdarray(add_input_node.attr["value"].tensor) if shape.ndim != 4: - return self.apply_batchmatmulv2_fusion(match_node_name[:2]+[match_node_name[-1]]) + return self.apply_batchmatmulv2_fusion(match_node_name[:2] + [match_node_name[-1]]) - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) weight_name = normal_inputs[1] weight_node = self.node_name_mapping[helper.node_name_from_input(weight_name)].node @@ -719,14 +724,12 @@ def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cove weights_min_name = None weights_max_name = None quantizev2_weights_name = None - if weight_node.op == 'Enter': - parent_node = self.node_name_mapping[ - helper.node_name_from_input(weight_node.input[0])].node + if weight_node.op == "Enter": + parent_node = self.node_name_mapping[helper.node_name_from_input(weight_node.input[0])].node # FIXME We only quantize the MatMul op which second input node type is const. This is a # workaround for RNN model like LTSM. - if parent_node.op != 'Const': - self.logger.debug( - 'The weight node of matched_node {} is not Const or Const + Enter, skipped') + if parent_node.op != "Const": + self.logger.debug("The weight node of matched_node {} is not Const or Const + Enter, skipped") self.exclude_matmul_nodes.append(matched_node.node.name) self.output_graph = self.input_graph return [] @@ -742,8 +745,8 @@ def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cove weight_node = self.node_name_mapping[helper.node_name_from_input(weights_name[0])].node weight_name = weight_node.name - if weight_node.op == 'Const': - weights_content = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) + if weight_node.op == "Const": + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) if np.any(np.isnan(weights_content)): self.exclude_matmul_nodes.append(matched_node.node.name) @@ -751,16 +754,18 @@ def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cove return [] for i in self.node_name_mapping: - if weight_node.input and not weight_node.input[0].startswith('^') \ - and weight_node.name in self.node_name_mapping[i].output: + if ( + weight_node.input + and not weight_node.input[0].startswith("^") + and weight_node.name in self.node_name_mapping[i].output + ): self.output_graph = self.input_graph self.exclude_matmul_nodes.append(matched_node.node.name) return [] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, - self.per_channel, enter_node) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel, enter_node + ) if weights_min_name: skip_node_name.append(weights_min_name) @@ -791,14 +796,12 @@ def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cove if len(match_node_name) == 4: if self.node_name_mapping[match_node_name[2]].node.op == "Mul": mul_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] - all_input_names = q_x_inputs[:1] + [q_weights_name] + [mul_node_name] \ - + q_x_inputs[1:] + all_input_names = q_x_inputs[:1] + [q_weights_name] + [mul_node_name] + q_x_inputs[1:] all_input_names.append(q_weights_min_name) all_input_names.append(q_weights_max_name) else: add_node_name = self.node_name_mapping[match_node_name[2]].node.input[1] - all_input_names = q_x_inputs[:1] + [q_weights_name] + [add_node_name] \ - + q_x_inputs[1:] + all_input_names = q_x_inputs[:1] + [q_weights_name] + [add_node_name] + q_x_inputs[1:] all_input_names.append(q_weights_min_name) all_input_names.append(q_weights_max_name) skip_node_name.append(match_node_name[2]) @@ -807,16 +810,17 @@ def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cove add_node_name = self.node_name_mapping[match_node_name[3]].node.input[1] skip_node_name.append(match_node_name[2]) skip_node_name.append(match_node_name[3]) - all_input_names = q_x_inputs[:1] + [q_weights_name] + [mul_node_name] \ - + [add_node_name] + q_x_inputs[1:] + all_input_names = ( + q_x_inputs[:1] + [q_weights_name] + [mul_node_name] + [add_node_name] + q_x_inputs[1:] + ) all_input_names.append(q_weights_min_name) all_input_names.append(q_weights_max_name) quantized_node_input_names = all_input_names + control_inputs quantized_matmul_node = helper.create_node( - "_QuantizedBatchMatMul", quantized_node_name, - quantized_node_input_names) + "_QuantizedBatchMatMul", quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_matmul_node, "adj_x", node.attr["adj_x"]) helper.copy_attr(quantized_matmul_node, "adj_y", node.attr["adj_y"]) @@ -824,28 +828,32 @@ def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cove helper.set_attr_dtype(quantized_matmul_node, "T2", dtypes.qint8) helper.set_attr_dtype(quantized_matmul_node, "U", dtypes.float32) helper.set_attr_dtype(quantized_matmul_node, "Tout", dtypes.float32) - helper.set_attr_string(quantized_matmul_node, 'input_quant_mode', b'SCALED') - helper.set_attr_string(quantized_matmul_node, 'output_quant_mode', b'SCALED') + helper.set_attr_string(quantized_matmul_node, "input_quant_mode", b"SCALED") + helper.set_attr_string(quantized_matmul_node, "output_quant_mode", b"SCALED") if len(match_node_name) == 4: if self.node_name_mapping[match_node_name[2]].node.op == "Mul": - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', - [b'Mul', b'Dequantize']) + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"Mul", b"Dequantize"]) else: - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', - [b'Add', b'Dequantize']) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_inputs', [ + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"Add", b"Dequantize"]) + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_inputs", + [ dtypes.qint8.as_datatype_enum, dtypes.qint8.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) + dtypes.float32.as_datatype_enum, + ], + ) else: - helper.set_attr_string_list(quantized_matmul_node, 'fused_ops', - [b'Mul', b'Add', b'Dequantize']) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_inputs', [ + helper.set_attr_string_list(quantized_matmul_node, "fused_ops", [b"Mul", b"Add", b"Dequantize"]) + helper.set_attr_type_list( + quantized_matmul_node, + "Thost_inputs", + [ dtypes.qint8.as_datatype_enum, dtypes.qint8.as_datatype_enum, dtypes.float32.as_datatype_enum, @@ -853,15 +861,20 @@ def apply_batchmatmulv2_mul_add_fusion(self, match_node_name): # pragma: no cove dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum - ]) - helper.set_attr_type_list(quantized_matmul_node, 'Thost_outputs', [ - dtypes.float32.as_datatype_enum]) + dtypes.float32.as_datatype_enum, + ], + ) + helper.set_attr_type_list(quantized_matmul_node, "Thost_outputs", [dtypes.float32.as_datatype_enum]) self.add_output_graph_node(quantized_matmul_node) - attr_fused_ops = ''.join( - x for x in quantized_matmul_node.attr["fused_ops"].SerializeToString() - .decode('UTF-8', 'ignore').strip() if x.isprintable()) + attr_fused_ops = "".join( + x + for x in quantized_matmul_node.attr["fused_ops"] + .SerializeToString() + .decode("UTF-8", "ignore") + .strip() + if x.isprintable() + ) if "MulAdd" in attr_fused_ops: for i in self.node_name_mapping[match_node_name[3]].output: batchmatmul_next_node[i] = (quantized_node_name, match_node_name[3]) @@ -890,13 +903,12 @@ def apply_the_transform(self): matched_rule, matched_node_name = self._is_match_matmul(self.sorted_patterns, True) if matched_node_name: _, normal_inputs = self._get_node_input(matched_node_name[1]) - if matched_node_name and \ - self.node_name_mapping[normal_inputs[0]].node.op == matched_node_name[0]: + if matched_node_name and self.node_name_mapping[normal_inputs[0]].node.op == matched_node_name[0]: self.output_graph = graph_pb2.GraphDef() - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) if fusion_name in self.fusion_mapping: _ = self.fusion_mapping[fusion_name](matched_node_name) - else: # pragma: no cover + else: # pragma: no cover self.logger.debug("Unknown fusion pattern {}.".format(fusion_name)) if self.remove_redundant_quant_flag: self.input_graph = self.remove_redundant_quantization(self.input_graph) @@ -923,14 +935,13 @@ def _is_match_matmul(self, patterns, qdq_inserted=False): for k, v in enumerate(self.op_list): if v in set(fusion[1] for fusion in patterns): - cur_node = self.node_name_mapping[list( - self.node_name_mapping.keys())[k]].node + cur_node = self.node_name_mapping[list(self.node_name_mapping.keys())[k]].node if cur_node.name != self.start_node_name: continue # Disable BatchMatMul quantization temporarily for its bad performance # Enable it again when the performance issue is fixed. - if cur_node.op in ('BatchMatMulV2', 'BatchMatMul'): + if cur_node.op in ("BatchMatMulV2", "BatchMatMul"): self.exclude_matmul_nodes.append(cur_node.name) continue @@ -942,22 +953,21 @@ def _is_match_matmul(self, patterns, qdq_inserted=False): # This is a workaround for RNN model like LTSM. parent_node = None if cur_node.op == "MatMul" and not self.itex_mode: - if control_inputs: # pragma: no cover + if control_inputs: # pragma: no cover self.exclude_matmul_nodes.append(cur_node.name) continue - if weight_node.op != 'Const': + if weight_node.op != "Const": if weight_node.input: - parent_node = self.node_name_mapping \ - [helper.node_name_from_input(weight_node.input[0])].node - if weight_node.op == 'Enter': # pragma: no cover - if len(self.node_name_mapping \ - [helper.node_name_from_input(weight_name)].output)>1: + parent_node = self.node_name_mapping[ + helper.node_name_from_input(weight_node.input[0]) + ].node + if weight_node.op == "Enter": # pragma: no cover + if len(self.node_name_mapping[helper.node_name_from_input(weight_name)].output) > 1: self.exclude_matmul_nodes.append(cur_node.name) continue - if parent_node.op == 'Const': + if parent_node.op == "Const": weight_node = parent_node - weights_content = \ - tensor_util.MakeNdarray(weight_node.attr['value'].tensor) + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) if np.any(np.isnan(weights_content)): self.exclude_matmul_nodes.append(cur_node.name) continue @@ -965,15 +975,14 @@ def _is_match_matmul(self, patterns, qdq_inserted=False): self.exclude_matmul_nodes.append(cur_node.name) continue else: - weights_content = \ - tensor_util.MakeNdarray(weight_node.attr['value'].tensor) - if np.any(np.isnan(weights_content)): # pragma: no cover + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) + if np.any(np.isnan(weights_content)): # pragma: no cover self.exclude_matmul_nodes.append(cur_node.name) continue # TODO Remove below two lines once the TF enabled the QuantizedMatMul while # transpose_a could be set to True. - if cur_node.attr["transpose_a"].b is True: # pragma: no cover + if cur_node.attr["transpose_a"].b is True: # pragma: no cover self.exclude_matmul_nodes.append(cur_node.name) continue @@ -986,12 +995,12 @@ def _is_match_matmul(self, patterns, qdq_inserted=False): continue if qdq_inserted: - if control_inputs: # pragma: no cover + if control_inputs: # pragma: no cover self.exclude_matmul_nodes.append(cur_node.name) continue - if self.node_name_mapping[normal_inputs[0]].node.op != "Dequantize" or \ - self.node_name_mapping[normal_inputs[1]].node.op not in \ - ("Dequantize", "Enter"): + if self.node_name_mapping[normal_inputs[0]].node.op != "Dequantize" or self.node_name_mapping[ + normal_inputs[1] + ].node.op not in ("Dequantize", "Enter"): self.exclude_matmul_nodes.append(cur_node.name) continue @@ -1009,11 +1018,9 @@ def _is_match_matmul(self, patterns, qdq_inserted=False): self.logger.debug("Fail to match {}".format(sub_rule)) break - next_node_name = self.node_name_mapping[ - cur_node_name].output[0] + next_node_name = self.node_name_mapping[cur_node_name].output[0] - next_node_op = self.node_name_mapping[ - next_node_name].node.op + next_node_op = self.node_name_mapping[next_node_name].node.op if next_node_op == sub_rule[-sub_rule_len]: matched_node_name.append(next_node_name) @@ -1026,8 +1033,7 @@ def _is_match_matmul(self, patterns, qdq_inserted=False): if sub_rule_len == 1: matched_node_name.append(sub_rule[-1]) - self.logger.debug("Match {} on nodes {}.".format - (sub_rule, matched_node_name)) + self.logger.debug("Match {} on nodes {}.".format(sub_rule, matched_node_name)) return sub_rule, matched_node_name return None, None diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py index 0c9aea4a718..c3477dc945c 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py @@ -20,9 +20,11 @@ from tensorflow.core.framework import node_def_pb2 from tensorflow.python.framework import dtypes -from ..quantize_graph_base import QuantizeNodeBase from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper -from neural_compressor.adaptor.tf_utils.util import version1_gt_version2, version1_lt_version2, version1_eq_version2 +from neural_compressor.adaptor.tf_utils.util import version1_eq_version2, version1_gt_version2, version1_lt_version2 + +from ..quantize_graph_base import QuantizeNodeBase + class FuseNodeStartWithPooling(QuantizeNodeBase): """Quantize the AvgPool and MaxPool.""" @@ -30,14 +32,15 @@ class FuseNodeStartWithPooling(QuantizeNodeBase): def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) def _add_pool_function(self, original_node, quantized_op_node): """Set quantized pooling node attributes.""" - pooling_type = dtypes.quint8 if version1_lt_version2(tf.version.VERSION, '2.6.0') or \ - self._find_relu_node(original_node) else dtypes.qint8 + pooling_type = ( + dtypes.quint8 + if version1_lt_version2(tf.version.VERSION, "2.6.0") or self._find_relu_node(original_node) + else dtypes.qint8 + ) helper.set_attr_dtype(quantized_op_node, "T", pooling_type) helper.copy_attr(quantized_op_node, "ksize", original_node.attr["ksize"]) helper.copy_attr(quantized_op_node, "strides", original_node.attr["strides"]) @@ -74,7 +77,8 @@ def _apply_pool_quantization(self, match_node_name): self.add_output_graph_node(quantized_pool_node) deq_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 self._intel_cpu_add_dequantize_result_node( - quantized_op_name, node.name, dtype=deq_type, performance_only=self.performance_only) + quantized_op_name, node.name, dtype=deq_type, performance_only=self.performance_only + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -87,8 +91,7 @@ def get_longest_fuse(self): for k, v in enumerate(self.op_list): if v in set(fusion[1] for fusion in self.sorted_patterns): - cur_node = self.node_name_mapping[list( - self.node_name_mapping.keys())[k]].node + cur_node = self.node_name_mapping[list(self.node_name_mapping.keys())[k]].node if cur_node.name != self.start_node_name: continue @@ -110,12 +113,14 @@ def apply_the_transform(self): self._get_op_list() matched_rule, matched_node_name = self.get_longest_fuse() if matched_node_name: - fusion_name = ''.join(matched_rule) - if fusion_name == "DequantizeMaxPoolQuantizeV2" or \ - fusion_name == "DequantizeMaxPool3DQuantizeV2" or \ - fusion_name == "DequantizeAvgPoolQuantizeV2": + fusion_name = "".join(matched_rule) + if ( + fusion_name == "DequantizeMaxPoolQuantizeV2" + or fusion_name == "DequantizeMaxPool3DQuantizeV2" + or fusion_name == "DequantizeAvgPoolQuantizeV2" + ): self._apply_pool_quantization(matched_node_name) - else: # pragma: no cover + else: # pragma: no cover self.logger.info("Unknown fusion pattern {}.".format(fusion_name)) if self.remove_redundant_quant_flag: self.input_graph = self.remove_redundant_quantization(self.input_graph) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py index e88b5d4e8bb..7e89d3e23b4 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py @@ -18,23 +18,36 @@ from tensorflow.core.framework import graph_pb2 from tensorflow.python.platform import gfile + +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper from neural_compressor.utils.utility import dump_elapsed_time from ..quantize_graph_base import QuantizeGraphBase -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper -from .fuse_qdq_conv import FuseNodeStartWithConv2d from .fuse_qdq_bn import FuseNodeStartWithFusedBatchNormV3 -from .fuse_qdq_in import FuseNodeStartWithFusedInstanceNorm from .fuse_qdq_concatv2 import FuseNodeStartWithConcatV2 +from .fuse_qdq_conv import FuseNodeStartWithConv2d +from .fuse_qdq_deconv import FuseNodeStartWithDeconv2d +from .fuse_qdq_in import FuseNodeStartWithFusedInstanceNorm from .fuse_qdq_matmul import FuseNodeStartWithMatmul from .fuse_qdq_pooling import FuseNodeStartWithPooling -from .fuse_qdq_deconv import FuseNodeStartWithDeconv2d + class OptimizeQDQGraph(QuantizeGraphBase): """Apply the fusion DQ + OP + Q pattern.""" - def __init__(self, input_graph, input_node_names, output_node_names, op_wise_config, op_wise_sequences, device, \ - fake_quant=False, new_api=False, performance_only=False, itex_mode=False): + def __init__( + self, + input_graph, + input_node_names, + output_node_names, + op_wise_config, + op_wise_sequences, + device, + fake_quant=False, + new_api=False, + performance_only=False, + itex_mode=False, + ): """Optimize QDQ Graph.""" super().__init__(output_node_names) self.op_wise_config = op_wise_config @@ -43,13 +56,14 @@ def __init__(self, input_graph, input_node_names, output_node_names, op_wise_con self.input_graph = input_graph else: self.input_graph = graph_pb2.GraphDef() - with gfile.Open(input_graph, 'rb') as f: + with gfile.Open(input_graph, "rb") as f: self.input_graph.ParseFromString(f.read()) - + input_output_names = input_node_names + output_node_names self.input_graph = QuantizeGraphHelper().remove_training_nodes( - self.input_graph, protected_nodes=input_output_names) - + self.input_graph, protected_nodes=input_output_names + ) + self.op_wise_seq = op_wise_sequences self.device = device @@ -87,22 +101,25 @@ def get_quantized_nodes(self): if count == all_node_length: remove_redundant_quant_flag = True _, quantizable_nodes = self.transformers[node.op]( - input_graph=self.input_graph, - patterns=self.op_wise_seq[node.op], - remove_redundant_quant_flag=remove_redundant_quant_flag, - op_wise_config_name_list=op_wise_config_name_list, - op_wise_cfg=self.op_wise_config[node.name], - start_node_name=node.name, device=self.device, - fake_quant=self.fake_quant, new_api=self.new_api, - performance_only=self.performance_only, - itex_mode=self.itex_mode).get_longest_fuse() - + input_graph=self.input_graph, + patterns=self.op_wise_seq[node.op], + remove_redundant_quant_flag=remove_redundant_quant_flag, + op_wise_config_name_list=op_wise_config_name_list, + op_wise_cfg=self.op_wise_config[node.name], + start_node_name=node.name, + device=self.device, + fake_quant=self.fake_quant, + new_api=self.new_api, + performance_only=self.performance_only, + itex_mode=self.itex_mode, + ).get_longest_fuse() + if quantizable_nodes: if quantizable_nodes[-1] == "QuantizeV2": quantizable_nodes.pop() if quantizable_nodes[0] == "Dequantize": quantizable_nodes.pop(0) - if node.op in ('ConcatV2', 'MaxPool', 'MaxPool3D', 'AvgPool'): + if node.op in ("ConcatV2", "MaxPool", "MaxPool3D", "AvgPool"): self.all_quantizable_node.extend([[i] for i in quantizable_nodes]) else: self.all_quantizable_node.append(quantizable_nodes) @@ -116,8 +133,7 @@ def do_transform(self): op_wise_config_name_list = list(self.op_wise_config.keys()) all_node_length = len(self.op_wise_config) for _, node in enumerate(self.input_graph.node): - if node in self.input_graph.node and node.op in self.transformers \ - and node.name in self.op_wise_config: + if node in self.input_graph.node and node.op in self.transformers and node.name in self.op_wise_config: count += 1 if count == all_node_length: remove_redundant_quant_flag = True @@ -128,14 +144,15 @@ def do_transform(self): remove_redundant_quant_flag=remove_redundant_quant_flag, op_wise_cfg=self.op_wise_config[node.name], op_wise_config_name_list=op_wise_config_name_list, - start_node_name=node.name, device=self.device, + start_node_name=node.name, + device=self.device, fake_quant=self.fake_quant, new_api=self.new_api, performance_only=self.performance_only, - itex_mode=self.itex_mode).apply_the_transform() + itex_mode=self.itex_mode, + ).apply_the_transform() if exclude_nodes: self.exclude_node_list.extend(exclude_nodes) - return self.remove_dead_nodes(self.input_graph, self.output_node_names), self.exclude_node_list diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py index 6c5a17d3fc8..0a1fab07e26 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py @@ -18,16 +18,16 @@ import logging from collections import namedtuple -import tensorflow as tf +import tensorflow as tf from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper -from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 -from neural_compressor.adaptor.tf_utils.util import version1_eq_version2 +from neural_compressor.adaptor.tf_utils.util import version1_eq_version2, version1_gt_version2, version1_lt_version2 -class QuantizeGraphBase(): + +class QuantizeGraphBase: """This is the base class for quantize graph.""" def __init__(self, output_node_names): @@ -51,21 +51,22 @@ def do_transform(self): def remove_dead_nodes(self, input_graph, output_names): """Removes nodes that are no longer needed for inference from the graph.""" - return tf.compat.v1.graph_util.extract_sub_graph( - input_graph, output_names) + return tf.compat.v1.graph_util.extract_sub_graph(input_graph, output_names) + -class QuantizeNodeBase(): +class QuantizeNodeBase: """This is the base class for nodes fusion. Arguments: object {[type]} -- [description] """ - node_details = namedtuple('node_details', ['node', 'output']) + + node_details = namedtuple("node_details", ["node", "output"]) def __init__(self, **kwargs): """Initilizaiton.""" self.logger = logging.getLogger("neural_compressor") - input_graph = kwargs['input_graph'] + input_graph = kwargs["input_graph"] assert isinstance(input_graph, graph_pb2.GraphDef) @@ -75,21 +76,23 @@ def __init__(self, **kwargs): self.output_node_maps = {} self.output_graph = graph_pb2.GraphDef() self.quantized_node_dict = {} - self.patterns = kwargs['patterns'] - self.remove_redundant_quant_flag = kwargs['remove_redundant_quant_flag'] - self.fake_quant = kwargs['fake_quant'] if 'fake_quant' in kwargs else False - self.frame_info = kwargs['frame_info'] if 'frame_info' in kwargs else None - self.per_channel, self.is_asymmetric = kwargs['op_wise_cfg'][0], kwargs['op_wise_cfg'][2] - self.op_wise_config_name_list = kwargs['op_wise_config_name_list'] - self.weight_bit = kwargs['op_wise_cfg'][3] - self.start_node_name = kwargs['start_node_name'] - self.device = kwargs['device'] - self.new_api = kwargs['new_api'] - self.performance_only = kwargs['performance_only'] - self.itex_mode = kwargs['itex_mode'] - self.enable_s8 = bool(version1_gt_version2(tf.version.VERSION, '2.1.0') or \ - version1_eq_version2(tf.version.VERSION, '2.1.0') or \ - tf.version.VERSION.find('1.15.0-up') != -1) + self.patterns = kwargs["patterns"] + self.remove_redundant_quant_flag = kwargs["remove_redundant_quant_flag"] + self.fake_quant = kwargs["fake_quant"] if "fake_quant" in kwargs else False + self.frame_info = kwargs["frame_info"] if "frame_info" in kwargs else None + self.per_channel, self.is_asymmetric = kwargs["op_wise_cfg"][0], kwargs["op_wise_cfg"][2] + self.op_wise_config_name_list = kwargs["op_wise_config_name_list"] + self.weight_bit = kwargs["op_wise_cfg"][3] + self.start_node_name = kwargs["start_node_name"] + self.device = kwargs["device"] + self.new_api = kwargs["new_api"] + self.performance_only = kwargs["performance_only"] + self.itex_mode = kwargs["itex_mode"] + self.enable_s8 = bool( + version1_gt_version2(tf.version.VERSION, "2.1.0") + or version1_eq_version2(tf.version.VERSION, "2.1.0") + or tf.version.VERSION.find("1.15.0-up") != -1 + ) def apply_the_transform(self): """This is the virtual interface to be implemented by derived class. @@ -102,7 +105,7 @@ def get_longest_fuse(self): """This is the virtual interface to be implemented by derived class.""" pass - def _insert_dummy_biasadd(self, match_node_name, matched_node): # pragma: no cover + def _insert_dummy_biasadd(self, match_node_name, matched_node): # pragma: no cover """Insert dummy BiasAdd op.""" target_node_name = matched_node.node.name matmul_a_node_name = helper.node_name_from_input(matched_node.node.input[0]) @@ -110,48 +113,53 @@ def _insert_dummy_biasadd(self, match_node_name, matched_node): # pragma: no cov matmul_b_node_name = helper.node_name_from_input(matched_node.node.input[1]) matmul_b_node = self.node_name_mapping[matmul_b_node_name].node - if matmul_a_node.op == 'Const' and matmul_b_node.op != 'Const': + if matmul_a_node.op == "Const" and matmul_b_node.op != "Const": pass else: from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer + g = GraphAnalyzer() g.graph = self.input_graph graph_info = g.parse_graph() next_node_names = graph_info[matched_node.node.name].outputs - bias_node_name = target_node_name + '_dummy_biasadd' - bias_const_node_name = target_node_name + '_fake_const' - - if matched_node.node.op == 'MatMul': - t_b_index = 0 if matched_node.node.attr['transpose_b'].b else 1 - if matched_node.node.op in ('Conv2D' or 'DepthwiseConv2dNative') and \ - matched_node.node.attr['data_format'].s == b'NHWC': + bias_node_name = target_node_name + "_dummy_biasadd" + bias_const_node_name = target_node_name + "_fake_const" + + if matched_node.node.op == "MatMul": + t_b_index = 0 if matched_node.node.attr["transpose_b"].b else 1 + if ( + matched_node.node.op in ("Conv2D" or "DepthwiseConv2dNative") + and matched_node.node.attr["data_format"].s == b"NHWC" + ): t_b_index = 3 - elif matched_node.node.op in ('Conv2D' or 'DepthwiseConv2dNative') and \ - matched_node.node.op.attr['data_format'].s == b'NCHW': + elif ( + matched_node.node.op in ("Conv2D" or "DepthwiseConv2dNative") + and matched_node.node.op.attr["data_format"].s == b"NCHW" + ): t_b_index = 1 - elif matched_node.node.op == 'Conv3D' and matched_node.node.attr['data_format'].s == b'NDHWC': + elif matched_node.node.op == "Conv3D" and matched_node.node.attr["data_format"].s == b"NDHWC": t_b_index = 4 - elif matched_node.node.op == 'Conv3D' and matched_node.node.attr['data_format'].s == b'NCDHW': + elif matched_node.node.op == "Conv3D" and matched_node.node.attr["data_format"].s == b"NCDHW": t_b_index = 1 - bias_add_length = matmul_b_node.attr['value'].tensor.tensor_shape.dim[t_b_index].size + bias_add_length = matmul_b_node.attr["value"].tensor.tensor_shape.dim[t_b_index].size - bias_add_content = [0.] * bias_add_length + bias_add_content = [0.0] * bias_add_length bias_const_node = helper.create_constant_node( - bias_const_node_name, bias_add_content, dtypes.float32, shape=[bias_add_length]) - bias_node = helper.create_node('BiasAdd', bias_node_name, [target_node_name, bias_const_node_name]) + bias_const_node_name, bias_add_content, dtypes.float32, shape=[bias_add_length] + ) + bias_node = helper.create_node("BiasAdd", bias_node_name, [target_node_name, bias_const_node_name]) helper.set_attr_dtype(bias_node, "T", dtypes.float32) g.add_node(bias_node, target_node_name, next_node_names) g.add_node(bias_const_node, None, [bias_node_name]) self.input_graph = g.dump_graph() self._parse_graph(self.input_graph) - new_match_node_name=match_node_name[:1]+[bias_node_name]+match_node_name[1:] - new_match_node_name=match_node_name[:1]+[bias_node_name]+match_node_name[1:] + new_match_node_name = match_node_name[:1] + [bias_node_name] + match_node_name[1:] + new_match_node_name = match_node_name[:1] + [bias_node_name] + match_node_name[1:] return new_match_node_name - def _is_match(self, patterns): """Detect the rule matched nodes collections. @@ -163,22 +171,21 @@ def _is_match(self, patterns): for k, v in enumerate(self.op_list): if v in set(fusion[0] for fusion in patterns): - cur_node = self.node_name_mapping[list( - self.node_name_mapping.keys())[k]].node + cur_node = self.node_name_mapping[list(self.node_name_mapping.keys())[k]].node if cur_node.name != self.start_node_name: continue - if ((v in ("Conv2D", "DepthwiseConv2dNative") - and not self.enable_s8) - ) and not self._find_relu_node(cur_node): + if ((v in ("Conv2D", "DepthwiseConv2dNative") and not self.enable_s8)) and not self._find_relu_node( + cur_node + ): continue - + for sub_rule in patterns: if v != sub_rule[0]: continue sub_rule_len = len(sub_rule) - check_hardswish = True if sub_rule_len > 4 else False + check_hardswish = True if sub_rule_len > 4 else False self.logger.debug("Try to apply rule: {}".format(sub_rule)) cur_node_name = list(self.node_name_mapping.keys())[k] @@ -192,50 +199,40 @@ def _is_match(self, patterns): self.logger.debug("Fail to match {}".format(sub_rule)) break - next_node_name = self.node_name_mapping[ - cur_node_name].output[0] + next_node_name = self.node_name_mapping[cur_node_name].output[0] - is_shared_output = True if len( - self.node_name_mapping[cur_node_name].output - ) > 1 else False + is_shared_output = True if len(self.node_name_mapping[cur_node_name].output) > 1 else False - add_op_quantizable = True + add_op_quantizable = True is_hardswish = False if is_shared_output: # pragma: no cover - if next_node_name.find('hard_swish') != -1: + if next_node_name.find("hard_swish") != -1: self.logger.debug("Find Hard Swish pattern ......") is_hardswish = True count = count + 1 - if next_node_name.find('add') == -1: - next_node_name = self.node_name_mapping[ - cur_node_name].output[1] + if next_node_name.find("add") == -1: + next_node_name = self.node_name_mapping[cur_node_name].output[1] else: add_op_quantizable = False - next_node_op = self.node_name_mapping[ - next_node_name].node.op + next_node_op = self.node_name_mapping[next_node_name].node.op if next_node_op in ("Add", "AddV2", "AddN"): - next_node = self.node_name_mapping[ - next_node_name].node + next_node = self.node_name_mapping[next_node_name].node next_node_inputs = list(next_node.input) - cur_node_index = next_node_inputs.index( - cur_node_name) - - for index, input_name in enumerate( - next_node_inputs): - node_type = self.node_name_mapping[helper.node_name_from_input( - input_name)].node.op - if input_name != cur_node_name and index < cur_node_index and \ - node_type != 'Dequantize': + cur_node_index = next_node_inputs.index(cur_node_name) + + for index, input_name in enumerate(next_node_inputs): + node_type = self.node_name_mapping[helper.node_name_from_input(input_name)].node.op + if input_name != cur_node_name and index < cur_node_index and node_type != "Dequantize": add_op_quantizable = False break if add_op_quantizable and next_node_op == sub_rule[1 - sub_rule_len]: - if not is_shared_output: + if not is_shared_output: matched_node_name.append(next_node_name) sub_rule_len -= 1 cur_node_name = next_node_name - elif is_hardswish and self.new_api: # pragma: no cover + elif is_hardswish and self.new_api: # pragma: no cover matched_node_name.append(next_node_name) sub_rule_len -= 1 cur_node_name = next_node_name @@ -249,54 +246,77 @@ def _is_match(self, patterns): break if sub_rule_len == 1: - if check_hardswish and sub_rule[-1] == 'Mul' and \ - sub_rule[-2] == 'Mul' and sub_rule[-3] == 'Relu6' and \ - sub_rule[-4] == 'Add' and count != 1: - matched_node_name.clear() - self.logger.debug("Fail to match {}.".format(sub_rule)) - break - self.logger.debug("Match {} on nodes {}.". - format(sub_rule, matched_node_name)) - return sub_rule, matched_node_name + if ( + check_hardswish + and sub_rule[-1] == "Mul" + and sub_rule[-2] == "Mul" + and sub_rule[-3] == "Relu6" + and sub_rule[-4] == "Add" + and count != 1 + ): + matched_node_name.clear() + self.logger.debug("Fail to match {}.".format(sub_rule)) + break + self.logger.debug("Match {} on nodes {}.".format(sub_rule, matched_node_name)) + return sub_rule, matched_node_name return None, None def _need_to_check(self, node_type): """Check op list.""" - op_list = ("ConcatV2", "Conv2D", "Conv3D", "DepthwiseConv2D", "QuantizeV2", "DepthwiseConv2dNative", - "MaxPool", "MaxPool3D", "FusedBatchNormV3", "Requantize", "RequantizePerChannel", "AvgPool", "Pad", - "CropAndResize", "Dequantize", "Mean", "MatMul", "BatchMatMulV2", "FakeQuantWithMinMaxVars", - "_MklFusedInstanceNorm") + op_list = ( + "ConcatV2", + "Conv2D", + "Conv3D", + "DepthwiseConv2D", + "QuantizeV2", + "DepthwiseConv2dNative", + "MaxPool", + "MaxPool3D", + "FusedBatchNormV3", + "Requantize", + "RequantizePerChannel", + "AvgPool", + "Pad", + "CropAndResize", + "Dequantize", + "Mean", + "MatMul", + "BatchMatMulV2", + "FakeQuantWithMinMaxVars", + "_MklFusedInstanceNorm", + ) return any([node_type.find(i) != -1 for i in op_list]) def _find_relu_node(self, node): """Find relu node algorithm to identify the poistive input.""" - #if node.op.find("HardSwish") != -1: + # if node.op.find("HardSwish") != -1: # return False - if (node.op in ("Relu", "Relu6") or \ - (node.op.find("AndRelu") != -1 and \ - ('alpha' not in node.attr or ('alpha' in node.attr and node.attr['alpha'].f == 0)))) \ - and (node.op != "Relu" - or not self.new_api - or not self.performance_only - or self.node_name_mapping \ - [helper.node_name_from_input(node.input[0])].node.op.find("FusedBatchNorm") == -1 - or self.node_name_mapping \ - [helper.node_name_from_input(node.input[0])].node.attr['is_training'].b - or len(self.node_name_mapping \ - [helper.node_name_from_input(node.input[0])].output) > 1): - return True - elif 'T' in node.attr and node.attr['T'].type in (dtypes.quint8, dtypes.uint8): + if ( + node.op in ("Relu", "Relu6") + or ( + node.op.find("AndRelu") != -1 + and ("alpha" not in node.attr or ("alpha" in node.attr and node.attr["alpha"].f == 0)) + ) + ) and ( + node.op != "Relu" + or not self.new_api + or not self.performance_only + or self.node_name_mapping[helper.node_name_from_input(node.input[0])].node.op.find("FusedBatchNorm") == -1 + or self.node_name_mapping[helper.node_name_from_input(node.input[0])].node.attr["is_training"].b + or len(self.node_name_mapping[helper.node_name_from_input(node.input[0])].output) > 1 + ): return True - elif (node.op.find("QuantizedConv") != -1 - or node.op.find("QuantizedDepthwiseConv") != -1 or - node.op.find("QuantizedMatMul") != -1 - ) and (node.op.find("Relu") == -1 or \ - ('alpha' in node.attr and node.attr['alpha'].f > 0)): + elif "T" in node.attr and node.attr["T"].type in (dtypes.quint8, dtypes.uint8): + return True + elif ( + node.op.find("QuantizedConv") != -1 + or node.op.find("QuantizedDepthwiseConv") != -1 + or node.op.find("QuantizedMatMul") != -1 + ) and (node.op.find("Relu") == -1 or ("alpha" in node.attr and node.attr["alpha"].f > 0)): return False elif self._need_to_check(node.op): - input_node = self.node_name_mapping[helper.node_name_from_input( - node.input[0])] + input_node = self.node_name_mapping[helper.node_name_from_input(node.input[0])] return self._find_relu_node(input_node.node) else: return False @@ -313,66 +333,53 @@ def _get_op_list(self): def _get_node_input(self, node_name): """Return control_input name, non-control_input node name.""" - return [ - i for i in self.node_name_mapping[node_name].node.input - if i[0] == '^' - ], [ - i for i in self.node_name_mapping[node_name].node.input - if i[0] != '^' + return [i for i in self.node_name_mapping[node_name].node.input if i[0] == "^"], [ + i for i in self.node_name_mapping[node_name].node.input if i[0] != "^" ] - def _intel_cpu_add_dequantize_result_node(self, - quantized_output_name, - original_node_name, - dtype=dtypes.quint8, - min_tensor_index=1, - performance_only=False): + def _intel_cpu_add_dequantize_result_node( + self, quantized_output_name, original_node_name, dtype=dtypes.quint8, min_tensor_index=1, performance_only=False + ): """Add Dequantize node after the quantized node.""" min_max_inputs = [ "%s:%s" % (quantized_output_name, min_tensor_index), - "%s:%s" % (quantized_output_name, min_tensor_index + 1) + "%s:%s" % (quantized_output_name, min_tensor_index + 1), ] dequantize_name = original_node_name dequantize_node = helper.create_node( - "Dequantize", dequantize_name, - [quantized_output_name, min_max_inputs[0], min_max_inputs[1]]) + "Dequantize", dequantize_name, [quantized_output_name, min_max_inputs[0], min_max_inputs[1]] + ) helper.set_attr_dtype(dequantize_node, "T", dtype) if performance_only: helper.set_attr_string(dequantize_node, "mode", b"SCALED") else: - helper.set_attr_string(dequantize_node, "mode", - b"MIN_FIRST" if self.is_asymmetric else b"SCALED") + helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST" if self.is_asymmetric else b"SCALED") self.add_output_graph_node(dequantize_node) - def eightbitize_single_input_tensor_node(self, original_node, - add_op_function): + def eightbitize_single_input_tensor_node(self, original_node, add_op_function): """Quantize the single input tensor node.""" quantized_op_name = original_node.name + "_eightbit_quantized" quantized_op_type = "Quantized" + original_node.op - if version1_gt_version2(tf.version.VERSION, '2.7.0') and original_node.op == "MaxPool3D": + if version1_gt_version2(tf.version.VERSION, "2.7.0") and original_node.op == "MaxPool3D": quantized_op_type = "_Quantized" + original_node.op all_input_names = self._add_eightbit_prologue_nodes(original_node.name) - quantized_op_node = helper.create_node(quantized_op_type, - quantized_op_name, - all_input_names) + quantized_op_node = helper.create_node(quantized_op_type, quantized_op_name, all_input_names) add_op_function(original_node, quantized_op_node) self.add_output_graph_node(quantized_op_node) deq_type = dtypes.quint8 if self._find_relu_node(original_node) else dtypes.qint8 - self._intel_cpu_add_dequantize_result_node(quantized_op_name, - original_node.name, - dtype=deq_type) + self._intel_cpu_add_dequantize_result_node(quantized_op_name, original_node.name, dtype=deq_type) def _add_eightbit_prologue_nodes(self, original_node): """Add quantized prologue nodes.""" namespace_prefix = original_node + "_eightbit" reshape_dims_name, reduction_dims_name = self._add_common_quantization_nodes( - namespace_prefix, helper.node_name_from_input( - self.node_name_mapping[original_node].node.input[0])) + namespace_prefix, helper.node_name_from_input(self.node_name_mapping[original_node].node.input[0]) + ) input_names = [] min_max_names = [] for each_input_name in self.node_name_mapping[original_node].node.input[:1]: - if each_input_name[0] == '^': + if each_input_name[0] == "^": continue if self.node_name_mapping[original_node].node.op == "MatMul": @@ -383,24 +390,22 @@ def _add_eightbit_prologue_nodes(self, original_node): input_node_name = helper.node_name_from_input(each_input_name) if input_node_name in self.output_node_maps: if self.output_node_maps[input_node_name].op == "Dequantize": - dtype = dtypes.DType( - self.output_node_maps[input_node_name].attr["T"].type) + dtype = dtypes.DType(self.output_node_maps[input_node_name].attr["T"].type) elif self._find_relu_node(self.node_name_mapping[original_node].node): dtype = dtypes.quint8 else: dtype = dtypes.qint8 else: - dtype = dtypes.quint8 if self._find_relu_node( - self.node_name_mapping[original_node].node - ) else dtypes.qint8 - if 'FusedBatchNorm' in self.node_name_mapping[original_node].node.op: + dtype = ( + dtypes.quint8 + if self._find_relu_node(self.node_name_mapping[original_node].node) + else dtypes.qint8 + ) + if "FusedBatchNorm" in self.node_name_mapping[original_node].node.op: dtype = dtypes.qint8 - quantize_input_name, min_input_name, max_input_name = ( - self._eightbitize_input_to_node(namespace_prefix, - each_input_name, - reshape_dims_name, - reduction_dims_name, - dtype=dtype)) + quantize_input_name, min_input_name, max_input_name = self._eightbitize_input_to_node( + namespace_prefix, each_input_name, reshape_dims_name, reduction_dims_name, dtype=dtype + ) input_names.append(quantize_input_name) min_max_names.append(min_input_name) min_max_names.append(max_input_name) @@ -409,9 +414,8 @@ def _add_eightbit_prologue_nodes(self, original_node): if min_max_names: all_input_names.extend(min_max_names) - for original_input_name in self.node_name_mapping[ - original_node].node.input: - if original_input_name[0] == '^': + for original_input_name in self.node_name_mapping[original_node].node.input: + if original_input_name[0] == "^": all_input_names.append(original_input_name) return all_input_names @@ -419,12 +423,14 @@ def _add_eightbit_prologue_nodes_for_enter(self, original_node, enter_node=None) """Add quantized prologue nodes for control edge case.""" namespace_prefix = original_node + "_eightbit" reshape_dims_name, reduction_dims_name = self._add_common_quantization_nodes( - namespace_prefix, helper.node_name_from_input( - self.node_name_mapping[original_node].node.input[0]), enter_node=enter_node) + namespace_prefix, + helper.node_name_from_input(self.node_name_mapping[original_node].node.input[0]), + enter_node=enter_node, + ) input_names = [] min_max_names = [] enter_input_name = self.node_name_mapping[original_node].node.input[1] - if enter_input_name[0] != '^': + if enter_input_name[0] != "^": if self.node_name_mapping[original_node].node.op == "MatMul": # mkl ops _MklQuantizedMatMulWithBiasAndRelu|AndRequantize # requires the T1 data type as quint8 @@ -433,24 +439,22 @@ def _add_eightbit_prologue_nodes_for_enter(self, original_node, enter_node=None) input_node_name = helper.node_name_from_input(enter_input_name) if input_node_name in self.output_node_maps: if self.output_node_maps[input_node_name].op == "Dequantize": - dtype = dtypes.DType( - self.output_node_maps[input_node_name].attr["T"].type) + dtype = dtypes.DType(self.output_node_maps[input_node_name].attr["T"].type) elif self._find_relu_node(self.node_name_mapping[original_node].node): dtype = dtypes.quint8 else: dtype = dtypes.qint8 else: - dtype = dtypes.quint8 if self._find_relu_node( - self.node_name_mapping[original_node].node - ) else dtypes.qint8 - if 'FusedBatchNorm' in self.node_name_mapping[original_node].node.op: + dtype = ( + dtypes.quint8 + if self._find_relu_node(self.node_name_mapping[original_node].node) + else dtypes.qint8 + ) + if "FusedBatchNorm" in self.node_name_mapping[original_node].node.op: dtype = dtypes.qint8 - quantize_input_name, min_input_name, max_input_name = ( - self._eightbitize_input_to_node(namespace_prefix, - enter_input_name, - reshape_dims_name, - reduction_dims_name, - dtype=dtype)) + quantize_input_name, min_input_name, max_input_name = self._eightbitize_input_to_node( + namespace_prefix, enter_input_name, reshape_dims_name, reduction_dims_name, dtype=dtype + ) input_names.append(quantize_input_name) min_max_names.append(min_input_name) min_max_names.append(max_input_name) @@ -460,52 +464,48 @@ def _add_eightbit_prologue_nodes_for_enter(self, original_node, enter_node=None) if min_max_names: all_input_names.extend(min_max_names) - for original_input_name in self.node_name_mapping[ - original_node].node.input: - if original_input_name[0] == '^': + for original_input_name in self.node_name_mapping[original_node].node.input: + if original_input_name[0] == "^": all_input_names.append(original_input_name) return all_input_names - def _add_common_quantization_nodes(self, - namespace_prefix, - control_input_names=None, - enter_node=None): + def _add_common_quantization_nodes(self, namespace_prefix, control_input_names=None, enter_node=None): """Builds constant nodes needed for quantization of inputs.""" reshape_dims_name = namespace_prefix + "_reshape_dims" reduction_dims_name = namespace_prefix + "_reduction_dims" - reshape_dims_node = helper.create_constant_node( - reshape_dims_name, -1, dtypes.int32, [1]) - - if enter_node: # pragma: no cover - reshape_dims_enter_node = helper.create_node( - 'Enter', reshape_dims_name+'_enter', [reshape_dims_name]) - helper.set_attr_string(reshape_dims_enter_node, - 'frame_name', enter_node.attr['frame_name'].s) - helper.set_attr_dtype(reshape_dims_enter_node, 'T', dtypes.int32) - helper.set_attr_bool(reshape_dims_enter_node, 'is_constant', True) - helper.set_attr_int(reshape_dims_enter_node, 'parallel_iterations', \ - enter_node.attr['parallel_iterations'].i) + reshape_dims_node = helper.create_constant_node(reshape_dims_name, -1, dtypes.int32, [1]) + + if enter_node: # pragma: no cover + reshape_dims_enter_node = helper.create_node("Enter", reshape_dims_name + "_enter", [reshape_dims_name]) + helper.set_attr_string(reshape_dims_enter_node, "frame_name", enter_node.attr["frame_name"].s) + helper.set_attr_dtype(reshape_dims_enter_node, "T", dtypes.int32) + helper.set_attr_bool(reshape_dims_enter_node, "is_constant", True) + helper.set_attr_int( + reshape_dims_enter_node, "parallel_iterations", enter_node.attr["parallel_iterations"].i + ) self.add_output_graph_node(reshape_dims_enter_node) self.add_output_graph_node(reshape_dims_node) - reduction_dims_node = helper.create_constant_node( - reduction_dims_name, 0, dtypes.int32, [1]) + reduction_dims_node = helper.create_constant_node(reduction_dims_name, 0, dtypes.int32, [1]) - if enter_node: # pragma: no cover + if enter_node: # pragma: no cover reduction_dims_enter_node = helper.create_node( - 'Enter', reduction_dims_name+'_enter', [reduction_dims_name]) - helper.set_attr_string(reduction_dims_enter_node, - 'frame_name', enter_node.attr['frame_name'].s) - helper.set_attr_dtype(reduction_dims_enter_node, 'T', dtypes.int32) - helper.set_attr_bool(reduction_dims_enter_node, 'is_constant', True) - helper.set_attr_int(reduction_dims_enter_node, 'parallel_iterations', \ - enter_node.attr['parallel_iterations'].i) + "Enter", reduction_dims_name + "_enter", [reduction_dims_name] + ) + helper.set_attr_string(reduction_dims_enter_node, "frame_name", enter_node.attr["frame_name"].s) + helper.set_attr_dtype(reduction_dims_enter_node, "T", dtypes.int32) + helper.set_attr_bool(reduction_dims_enter_node, "is_constant", True) + helper.set_attr_int( + reduction_dims_enter_node, "parallel_iterations", enter_node.attr["parallel_iterations"].i + ) self.add_output_graph_node(reduction_dims_enter_node) self.add_output_graph_node(reduction_dims_node) - return reshape_dims_enter_node.name if enter_node else reshape_dims_name, reduction_dims_enter_node.name \ - if enter_node else reduction_dims_name + return ( + reshape_dims_enter_node.name if enter_node else reshape_dims_name, + reduction_dims_enter_node.name if enter_node else reduction_dims_name, + ) def add_output_graph_node(self, output_node): """Inserts one node into the new graph.""" @@ -523,14 +523,11 @@ def _parse_graph(self, input_graph=None): for node in graph.node: # each_node = self.node_details(node=node, output=[]) if node.name in self.node_name_mapping: - raise ValueError( - "Duplicate Node Found when _parse_graph, the node name is {}" .format( - node.name)) + raise ValueError("Duplicate Node Found when _parse_graph, the node name is {}".format(node.name)) self.node_name_mapping[node.name] = self.node_details(node=node, output=[]) for node_name in self.node_name_mapping: for each_input in self.node_name_mapping[node_name].node.input: - self.node_name_mapping[helper.node_name_from_input(each_input)].output.\ - append(node_name) + self.node_name_mapping[helper.node_name_from_input(each_input)].output.append(node_name) def remove_redundant_quantization(self, old_graph): """Remove the redundant Quantize/QuantizeV2 nodes.""" @@ -547,8 +544,9 @@ def remove_redundant_quantization(self, old_graph): dequantize_node_name = helper.node_name_from_input(node.input[0]) - assert dequantize_node_name in old_nodes_map, "Input node name '" + \ - dequantize_node_name + "' not found in node '" + node.name + "'" + assert dequantize_node_name in old_nodes_map, ( + "Input node name '" + dequantize_node_name + "' not found in node '" + node.name + "'" + ) dequantize_node = old_nodes_map[dequantize_node_name] # Do we have a Dequantize feeding in, with the same type as the @@ -564,16 +562,13 @@ def remove_redundant_quantization(self, old_graph): max_node_name = helper.node_name_from_input(node.input[2]) min_node = old_nodes_map[min_node_name] max_node = old_nodes_map[max_node_name] - is_min_right_type = (min_node.op in ["Min", "Dequantize"]) - is_max_right_type = (max_node.op in ["Max", "Dequantize"]) + is_min_right_type = min_node.op in ["Min", "Dequantize"] + is_max_right_type = max_node.op in ["Max", "Dequantize"] if not is_min_right_type or not is_max_right_type: - self.logger.info("Not find expected types on inputs {}, {}.". - format(min_node.op, max_node.op)) + self.logger.info("Not find expected types on inputs {}, {}.".format(min_node.op, max_node.op)) continue - min_node_input_name = helper.node_name_from_input( - min_node.input[0]) - max_node_input_name = helper.node_name_from_input( - max_node.input[0]) + min_node_input_name = helper.node_name_from_input(min_node.input[0]) + max_node_input_name = helper.node_name_from_input(max_node.input[0]) # There are two different patterns for Min nodes we can recognize, one # where the input comes directly from the same one as the Max, and # another where we run it through another Min first, so check for @@ -584,21 +579,17 @@ def remove_redundant_quantization(self, old_graph): else: first_min_node_input = old_nodes_map[min_node_input_name] if first_min_node_input.op == "Concat": - second_min_node_name = helper.node_name_from_input( - first_min_node_input.input[1]) + second_min_node_name = helper.node_name_from_input(first_min_node_input.input[1]) second_min_node = old_nodes_map[second_min_node_name] if second_min_node.op == "Min": - second_min_node_input_name = helper.node_name_from_input( - second_min_node.input[0]) - is_same_input = ( - second_min_node_input_name == max_node_input_name) + second_min_node_input_name = helper.node_name_from_input(second_min_node.input[0]) + is_same_input = second_min_node_input_name == max_node_input_name if not is_same_input: self.logger.info("Different min/max inputs {}.".format(min_node_input_name)) continue # We recognize this pattern, so mark the graph edges to be rewired to # route around it entirely, since we know it's a no-op. - dequantize_source_name = helper.node_name_from_input( - dequantize_node.input[0]) + dequantize_source_name = helper.node_name_from_input(dequantize_node.input[0]) node_tensor_name = helper.ensure_tensor_name_has_port(node.name) min_tensor_name = node.name + ":1" max_tensor_name = node.name + ":2" @@ -609,8 +600,7 @@ def remove_redundant_quantization(self, old_graph): # Finally we apply all the rewiring we've marked to the graph. for node in old_graph.node: for index, input_full_name in enumerate(node.input): - input_name = helper.ensure_tensor_name_has_port( - input_full_name) + input_name = helper.ensure_tensor_name_has_port(input_full_name) if input_name in inputs_to_rename: node.input[index] = inputs_to_rename[input_name] self.add_output_graph_node(node) @@ -625,59 +615,49 @@ def create_nodes_map(self, graph): return nodes_map - def _add_quantize_down_nodes(self, - original_node, - quantized_output_name, - requantize_type=dtypes.quint8, - is_relu6=False): + def _add_quantize_down_nodes( + self, original_node, quantized_output_name, requantize_type=dtypes.quint8, is_relu6=False + ): """Add quantize down nodes.""" - quantized_outputs = [ - quantized_output_name, quantized_output_name + ":1", - quantized_output_name + ":2" - ] + quantized_outputs = [quantized_output_name, quantized_output_name + ":1", quantized_output_name + ":2"] if not self.fake_quant: # Add a RequantizationRange node for finding the min and max values. requant_range_node = helper.create_node( "RequantizationRangePerChannel" - if self.per_channel or original_node.op == 'DepthwiseConv2dNative' else "RequantizationRange", - original_node.name + "_eightbit_requant_range", quantized_outputs) + if self.per_channel or original_node.op == "DepthwiseConv2dNative" + else "RequantizationRange", + original_node.name + "_eightbit_requant_range", + quantized_outputs, + ) - if self.per_channel or original_node.op == 'DepthwiseConv2dNative': + if self.per_channel or original_node.op == "DepthwiseConv2dNative": helper.set_attr_dtype(requant_range_node, "T", dtypes.qint32) if is_relu6: - helper.set_attr_float(requant_range_node, "clip_value_max", - 6.0) + helper.set_attr_float(requant_range_node, "clip_value_max", 6.0) else: - helper.set_attr_float(requant_range_node, "clip_value_max", - 1e30) + helper.set_attr_float(requant_range_node, "clip_value_max", 1e30) else: helper.set_attr_dtype(requant_range_node, "Tinput", dtypes.qint32) self.add_output_graph_node(requant_range_node) - min_max_inputs = [ - requant_range_node.name + ":0", requant_range_node.name + ":1" - ] + min_max_inputs = [requant_range_node.name + ":0", requant_range_node.name + ":1"] else: max_input_name = original_node.name + "_max" - max_node = helper.create_constant_node( - max_input_name, 1., dtypes.float32) + max_node = helper.create_constant_node(max_input_name, 1.0, dtypes.float32) self.add_output_graph_node(max_node) min_input_name = original_node.name + "_min" - min_node = helper.create_constant_node( - min_input_name, -1., dtypes.float32) + min_node = helper.create_constant_node(min_input_name, -1.0, dtypes.float32) self.add_output_graph_node(min_node) - min_max_inputs = [ - min_input_name, max_input_name - ] + min_max_inputs = [min_input_name, max_input_name] requantize_node = helper.create_node( - "RequantizePerChannel" if self.per_channel or original_node.op == 'DepthwiseConv2dNative' \ - else "Requantize", + "RequantizePerChannel" if self.per_channel or original_node.op == "DepthwiseConv2dNative" else "Requantize", original_node.name + "_eightbit_requantize", - quantized_outputs + min_max_inputs) - if self.per_channel or original_node.op == 'DepthwiseConv2dNative': + quantized_outputs + min_max_inputs, + ) + if self.per_channel or original_node.op == "DepthwiseConv2dNative": helper.set_attr_dtype(requantize_node, "T", dtypes.qint32) else: helper.set_attr_dtype(requantize_node, "Tinput", dtypes.qint32) @@ -686,61 +666,52 @@ def _add_quantize_down_nodes(self, self.add_output_graph_node(requantize_node) return requantize_node.name - def _eightbitize_input_to_node(self, - namespace_prefix, - original_input_name, - reshape_dims_name, - reduction_dims_name, - dtype=dtypes.quint8): + def _eightbitize_input_to_node( + self, namespace_prefix, original_input_name, reshape_dims_name, reduction_dims_name, dtype=dtypes.quint8 + ): """Takes one float input to an op, and converts it to quantized form.""" - unique_input_name = helper.unique_node_name_from_input( - original_input_name) + unique_input_name = helper.unique_node_name_from_input(original_input_name) if unique_input_name in self.quantized_node_dict: quantized_tuple = self.quantized_node_dict[unique_input_name] return quantized_tuple[0], quantized_tuple[1], quantized_tuple[2] if self.fake_quant: min_input_name = namespace_prefix + "_min_" + unique_input_name - min_node = helper.create_constant_node( - min_input_name, -1., dtypes.float32) + min_node = helper.create_constant_node(min_input_name, -1.0, dtypes.float32) self.add_output_graph_node(min_node) max_input_name = namespace_prefix + "_max_" + unique_input_name - max_node = helper.create_constant_node( - max_input_name, 1., dtypes.float32) + max_node = helper.create_constant_node(max_input_name, 1.0, dtypes.float32) self.add_output_graph_node(max_node) quantize_input_name = namespace_prefix + "_quantize_" + unique_input_name quantize_input_node = helper.create_node( - "QuantizeV2", quantize_input_name, - [original_input_name, min_input_name, max_input_name]) + "QuantizeV2", quantize_input_name, [original_input_name, min_input_name, max_input_name] + ) else: reshape_input_name = namespace_prefix + "_reshape_" + unique_input_name min_input_name = namespace_prefix + "_min_" + unique_input_name max_input_name = namespace_prefix + "_max_" + unique_input_name quantize_input_name = namespace_prefix + "_quantize_" + unique_input_name reshape_input_node = helper.create_node( - "Reshape", reshape_input_name, - [original_input_name, reshape_dims_name]) + "Reshape", reshape_input_name, [original_input_name, reshape_dims_name] + ) helper.set_attr_dtype(reshape_input_node, "T", dtypes.float32) self.add_output_graph_node(reshape_input_node) - min_input_node = helper.create_node( - "Min", min_input_name, [reshape_input_name, reduction_dims_name]) + min_input_node = helper.create_node("Min", min_input_name, [reshape_input_name, reduction_dims_name]) helper.set_attr_dtype(min_input_node, "T", dtypes.float32) helper.set_attr_dtype(min_input_node, "Tidx", dtypes.int32) helper.set_attr_bool(min_input_node, "keep_dims", False) self.add_output_graph_node(min_input_node) - max_input_node = helper.create_node( - "Max", max_input_name, [reshape_input_name, reduction_dims_name]) + max_input_node = helper.create_node("Max", max_input_name, [reshape_input_name, reduction_dims_name]) helper.set_attr_dtype(max_input_node, "T", dtypes.float32) helper.set_attr_dtype(max_input_node, "Tidx", dtypes.int32) helper.set_attr_bool(max_input_node, "keep_dims", False) self.add_output_graph_node(max_input_node) quantize_input_node = helper.create_node( - "QuantizeV2", quantize_input_name, - [original_input_name, min_input_name, max_input_name]) + "QuantizeV2", quantize_input_name, [original_input_name, min_input_name, max_input_name] + ) helper.set_attr_dtype(quantize_input_node, "T", dtype) - helper.set_attr_string(quantize_input_node, "mode", - b"MIN_FIRST" if self.is_asymmetric else b"SCALED") + helper.set_attr_string(quantize_input_node, "mode", b"MIN_FIRST" if self.is_asymmetric else b"SCALED") if not self.is_asymmetric: helper.set_attr_string(quantize_input_node, "round_mode", b"HALF_TO_EVEN") # if FLAGS.model_name in ["wide_deep_large_ds"]: @@ -754,22 +725,22 @@ def _eightbitize_input_to_node(self, self.add_output_graph_node(quantize_input_node) min_output_name = quantize_input_name + ":1" max_output_name = quantize_input_name + ":2" - self.quantized_node_dict[unique_input_name] = (quantize_input_name, - min_output_name, - max_output_name) + self.quantized_node_dict[unique_input_name] = (quantize_input_name, min_output_name, max_output_name) return quantize_input_name, min_output_name, max_output_name - - def _intel_cpu_quantize_weight_eightbit(self, - parent, - input_node, - per_channel, - enter_node=None): + def _intel_cpu_quantize_weight_eightbit(self, parent, input_node, per_channel, enter_node=None): """Quantize weight node.""" - qint8_const_node, min_node, max_node, qint8_const_enter_node, min_enter_node, max_enter_node = \ - helper.generate_quantized_weight_node(parent, input_node, per_channel, - self.weight_bit, self.device, enter_node) - if enter_node: # pragma: no cover + ( + qint8_const_node, + min_node, + max_node, + qint8_const_enter_node, + min_enter_node, + max_enter_node, + ) = helper.generate_quantized_weight_node( + parent, input_node, per_channel, self.weight_bit, self.device, enter_node + ) + if enter_node: # pragma: no cover self.add_output_graph_node(qint8_const_node) self.add_output_graph_node(qint8_const_enter_node) self.add_output_graph_node(min_node) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py index cf456c93592..31b5a7915d6 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py @@ -16,27 +16,26 @@ # limitations under the License. """Quantize FusedBatchNormV3 to int8 op.""" -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import graph_pb2, node_def_pb2 from tensorflow.python.framework import dtypes from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper + from .quantize_graph_base import QuantizeNodeBase -class FuseNodeStartWithFusedBatchNormV3(QuantizeNodeBase): # pragma: no cover + +class FuseNodeStartWithFusedBatchNormV3(QuantizeNodeBase): # pragma: no cover """Quantize FusedBatchNormV3 to int8 op and apply the fusion.""" def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) if self.new_api: self.fusion_mapping = { - 'FusedBatchNormV3': self.apply_newly_bn_relu_fusion, - 'FusedBatchNormV3Relu': self.apply_newly_bn_relu_fusion, - 'FusedBatchNormV3LeakyRelu': self.apply_newly_bn_leakyrelu_fusion + "FusedBatchNormV3": self.apply_newly_bn_relu_fusion, + "FusedBatchNormV3Relu": self.apply_newly_bn_relu_fusion, + "FusedBatchNormV3LeakyRelu": self.apply_newly_bn_leakyrelu_fusion, } else: self.fusion_mapping = {} @@ -46,22 +45,21 @@ def apply_newly_bn_relu_fusion(self, match_node_name): """Apply the FusedBatchNormV3 Relu fusion.""" matched_node = self.node_name_mapping[match_node_name[0]] skip_node_name = match_node_name[1:] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) scale_name = normal_inputs[1] offset_name = normal_inputs[2] mean_name = normal_inputs[3] - variance_name = normal_inputs[4] + variance_name = normal_inputs[4] all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = [ - all_input_names[0], + all_input_names[0], scale_name, offset_name, mean_name, variance_name, all_input_names[1], - all_input_names[2] + all_input_names[2], ] for _, node in enumerate(self.input_graph.node): @@ -70,19 +68,20 @@ def apply_newly_bn_relu_fusion(self, match_node_name): elif node.name == match_node_name[0]: self.logger.debug("Matched node {} with input {}.".format(node.name, node.input)) - relu_node_name = match_node_name[1] if len(match_node_name)==2 else None + relu_node_name = match_node_name[1] if len(match_node_name) == 2 else None - node_op = '_QuantizedFusedBatchNorm' + node_op = "_QuantizedFusedBatchNorm" quantized_node_name = node.name + "_eightbit_quantized_bn" output_min_node_name = quantized_node_name + "_input7_output_min" output_max_node_name = quantized_node_name + "_input8_output_max" - quantized_node_input_names = all_input_names + \ - [output_min_node_name] + [output_max_node_name] + control_inputs - output_min_node = helper.create_constant_node(output_min_node_name, -1., dtypes.float32) - output_max_node = helper.create_constant_node(output_max_node_name, 1., dtypes.float32) + quantized_node_input_names = ( + all_input_names + [output_min_node_name] + [output_max_node_name] + control_inputs + ) + output_min_node = helper.create_constant_node(output_min_node_name, -1.0, dtypes.float32) + output_max_node = helper.create_constant_node(output_max_node_name, 1.0, dtypes.float32) quantized_bn_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) if relu_node_name is not None: - helper.set_attr_string(quantized_bn_node, "activation_mode", b'Relu') + helper.set_attr_string(quantized_bn_node, "activation_mode", b"Relu") if self.node_name_mapping[offset_name].node.op == "Const": helper.set_attr_bool(quantized_bn_node, "is_offset_const", True) else: @@ -94,9 +93,9 @@ def apply_newly_bn_relu_fusion(self, match_node_name): helper.set_attr_dtype(quantized_bn_node, "T", dtypes.qint8) helper.set_attr_dtype(quantized_bn_node, "U", dtypes.float32) helper.set_attr_dtype(quantized_bn_node, "Tout", dtypes.qint8) + """# 0. - """ - # 0. x + x # 1. scale # 2. offset # 3. mean @@ -106,38 +105,45 @@ def apply_newly_bn_relu_fusion(self, match_node_name): # 7. {output_min} # 8. {output_max} """ - helper.set_attr_type_list(quantized_bn_node, 'input_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - + helper.set_attr_type_list( + quantized_bn_node, + "input_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + """# 0. - """ - # 0. output + output # 1. output_min # 2. output_max """ - helper.set_attr_type_list(quantized_bn_node, 'out_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) + helper.set_attr_type_list( + quantized_bn_node, + "out_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(output_min_node) self.add_output_graph_node(output_max_node) self.add_output_graph_node(quantized_bn_node) self._intel_cpu_add_dequantize_result_node( - quantized_output_name = quantized_node_name, - original_node_name = match_node_name[-1], - dtype = dtypes.qint8, - min_tensor_index = 1 - ) + quantized_output_name=quantized_node_name, + original_node_name=match_node_name[-1], + dtype=dtypes.qint8, + min_tensor_index=1, + ) else: new_node = node_def_pb2.NodeDef() @@ -148,8 +154,7 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): """Apply the FusedBatchNormV3 LeakyRelu fusion.""" matched_node = self.node_name_mapping[match_node_name[0]] skip_node_name = match_node_name[1:] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) scale_name = normal_inputs[1] offset_name = normal_inputs[2] mean_name = normal_inputs[3] @@ -163,7 +168,7 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): mean_name, variance_name, all_input_names[1], - all_input_names[2] + all_input_names[2], ] for _, node in enumerate(self.input_graph.node): @@ -172,19 +177,21 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): elif node.name == match_node_name[0]: self.logger.debug("Matched node {} with input {}.".format(node.name, node.input)) leakyrelu_node_name = match_node_name[1] - node_op = '_QuantizedFusedBatchNorm' + node_op = "_QuantizedFusedBatchNorm" quantized_node_name = node.name + "_eightbit_quantized_bn" output_min_node_name = quantized_node_name + "_input7_output_min" output_max_node_name = quantized_node_name + "_input8_output_max" - quantized_node_input_names = all_input_names + \ - [output_min_node_name] + [output_max_node_name] + control_inputs - output_min_node = helper.create_constant_node(output_min_node_name, -1., dtypes.float32) - output_max_node = helper.create_constant_node(output_max_node_name, 1., dtypes.float32) + quantized_node_input_names = ( + all_input_names + [output_min_node_name] + [output_max_node_name] + control_inputs + ) + output_min_node = helper.create_constant_node(output_min_node_name, -1.0, dtypes.float32) + output_max_node = helper.create_constant_node(output_max_node_name, 1.0, dtypes.float32) quantized_bn_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) - helper.set_attr_string(quantized_bn_node, "activation_mode", b'LeakyRelu') - helper.copy_attr(quantized_bn_node, "alpha", \ - self.node_name_mapping[leakyrelu_node_name].node.attr["alpha"]) + helper.set_attr_string(quantized_bn_node, "activation_mode", b"LeakyRelu") + helper.copy_attr( + quantized_bn_node, "alpha", self.node_name_mapping[leakyrelu_node_name].node.attr["alpha"] + ) if self.node_name_mapping[offset_name].node.op == "Const": helper.set_attr_bool(quantized_bn_node, "is_offset_const", True) else: @@ -196,9 +203,9 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): helper.set_attr_dtype(quantized_bn_node, "T", dtypes.qint8) helper.set_attr_dtype(quantized_bn_node, "U", dtypes.float32) helper.set_attr_dtype(quantized_bn_node, "Tout", dtypes.qint8) + """# 0. - """ - # 0. x + x # 1. scale # 2. offset # 3. mean @@ -208,39 +215,46 @@ def apply_newly_bn_leakyrelu_fusion(self, match_node_name): # 7. {output_min} # 8. {output_max} """ - helper.set_attr_type_list(quantized_bn_node, 'input_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) - + helper.set_attr_type_list( + quantized_bn_node, + "input_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) + """# 0. - """ - # 0. output + output # 1. output_min # 2. output_max """ - helper.set_attr_type_list(quantized_bn_node, 'out_types', [ - dtypes.qint8.as_datatype_enum, - dtypes.float32.as_datatype_enum, - dtypes.float32.as_datatype_enum, - ]) + helper.set_attr_type_list( + quantized_bn_node, + "out_types", + [ + dtypes.qint8.as_datatype_enum, + dtypes.float32.as_datatype_enum, + dtypes.float32.as_datatype_enum, + ], + ) self.add_output_graph_node(output_min_node) self.add_output_graph_node(output_max_node) self.add_output_graph_node(quantized_bn_node) self._intel_cpu_add_dequantize_result_node( - quantized_output_name = quantized_node_name, - original_node_name = match_node_name[-1], - dtype = dtypes.qint8, - min_tensor_index = 1, - performance_only=self.performance_only - ) + quantized_output_name=quantized_node_name, + original_node_name=match_node_name[-1], + dtype=dtypes.qint8, + min_tensor_index=1, + performance_only=self.performance_only, + ) else: new_node = node_def_pb2.NodeDef() @@ -259,16 +273,16 @@ def apply_the_transform(self): matched_rule, matched_node_name = self._is_match(self.sorted_patterns) if matched_node_name: self.output_graph = graph_pb2.GraphDef() - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) bn_node = self.node_name_mapping[matched_node_name[0]].node - is_training = bn_node.attr['is_training'].b - if fusion_name in self.fusion_mapping and is_training == False: + is_training = bn_node.attr["is_training"].b + if fusion_name in self.fusion_mapping and is_training is False: self.fusion_mapping[fusion_name](matched_node_name) else: - if is_training == True: - self.logger.info \ - ("Skip quantizing the BN node '{}' due to the attr 'is_training == true'." \ - .format(bn_node.name)) + if is_training is True: + self.logger.info( + "Skip quantizing the BN node '{}' due to the attr 'is_training == true'.".format(bn_node.name) + ) self.exclude_bn_nodes.append(bn_node.name) elif self.new_api: self.logger.info("Unknown fusion pattern {} .".format(fusion_name)) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py index bb9ed6f6ec1..2e3fcba4400 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py @@ -18,11 +18,13 @@ import re -from tensorflow.python.framework import dtypes from tensorflow.core.framework import node_def_pb2 -from .quantize_graph_base import QuantizeNodeBase +from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper +from .quantize_graph_base import QuantizeNodeBase + class FuseNodeStartWithConcatV2(QuantizeNodeBase): """Quantize ConcatV2 to int8 op QuantizedConcatV2.""" @@ -32,22 +34,20 @@ def _apply_concatv2_transform(self, original_node): namespace_prefix = original_node.name + "_eightbit" quantized_concat_name = namespace_prefix + "_quantized_concatv2" reshape_dims_name, reduction_dims_name = self._add_common_quantization_nodes( - namespace_prefix, helper.node_name_from_input(original_node.input[-1])) + namespace_prefix, helper.node_name_from_input(original_node.input[-1]) + ) num_input = len(original_node.input) shape_input_name = original_node.input[num_input - 1] - original_inputs = original_node.input[0:num_input - 1] + original_inputs = original_node.input[0 : num_input - 1] input_names = [] min_names = [] max_names = [] for original_input_name in original_inputs: original_input_node = self.node_name_mapping[original_input_name].node input_data_type = dtypes.quint8 if self._find_relu_node(original_input_node) else dtypes.qint8 - quantize_input_name, min_input_name, max_input_name = ( - self._eightbitize_input_to_node(namespace_prefix, - original_input_name, - reshape_dims_name, - reduction_dims_name, - dtype=input_data_type)) + quantize_input_name, min_input_name, max_input_name = self._eightbitize_input_to_node( + namespace_prefix, original_input_name, reshape_dims_name, reduction_dims_name, dtype=input_data_type + ) input_names.append(quantize_input_name) min_names.append(min_input_name) max_names.append(max_input_name) @@ -55,9 +55,7 @@ def _apply_concatv2_transform(self, original_node): all_input_names.append(shape_input_name) all_input_names.extend(min_names) all_input_names.extend(max_names) - quantized_concat_node = helper.create_node("QuantizedConcatV2", - quantized_concat_name, - all_input_names) + quantized_concat_node = helper.create_node("QuantizedConcatV2", quantized_concat_name, all_input_names) helper.set_attr_int(quantized_concat_node, "N", len(original_inputs)) helper.set_attr_dtype(quantized_concat_node, "T", input_data_type) self.add_output_graph_node(quantized_concat_node) @@ -66,12 +64,12 @@ def _apply_concatv2_transform(self, original_node): def _quantizable_concat(self, node): """Check if the ConcatV2 is quantizable.""" deq_type = [] - for input_node_name in node.input[:node.attr['N'].i]: + for input_node_name in node.input[: node.attr["N"].i]: node_name = helper.node_name_from_input(input_node_name) if self.node_name_mapping[node_name].node.op != "Dequantize": return False - deq_type.append(self.node_name_mapping[node_name].node.attr['T'].type) + deq_type.append(self.node_name_mapping[node_name].node.attr["T"].type) if len(set(deq_type)) != 1: return False @@ -81,9 +79,12 @@ def _quantizable_concat(self, node): def _apply_concatv2_quantization(self): """Quantize ConcatV2 if it's quantizable.""" for _, v in self.node_name_mapping.items(): - if v.node.op in ("ConcatV2",) and self._quantizable_concat(v.node) and \ - dtypes.as_dtype(v.node.attr["T"].type) == dtypes.float32 and \ - not re.search(r'map(_\d+)?/while', v.node.name): + if ( + v.node.op in ("ConcatV2",) + and self._quantizable_concat(v.node) + and dtypes.as_dtype(v.node.attr["T"].type) == dtypes.float32 + and not re.search(r"map(_\d+)?/while", v.node.name) + ): self._apply_concatv2_transform(v.node) self.quantizable_node_names.append(v.node.name) else: diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py index c79d3d589d0..0da8b43734c 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py @@ -16,16 +16,16 @@ # limitations under the License. """Quantize Conv2D/DepthwiseConv2dNative.""" +import numpy as np import tensorflow as tf -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util +from tensorflow.core.framework import graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper -from .quantize_graph_base import QuantizeNodeBase from neural_compressor.adaptor.tf_utils.util import version1_gte_version2 -import numpy as np + +from .quantize_graph_base import QuantizeNodeBase + class FuseNodeStartWithConv2d(QuantizeNodeBase): """Quantize Conv2D/DepthwiseConv2dNative to int8 op.""" @@ -33,33 +33,28 @@ class FuseNodeStartWithConv2d(QuantizeNodeBase): def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) self.exclude_conv_name = [] self.fusion_mapping = { - 'Conv2DBiasAdd': self.apply_conv_biasadd_fusion, - 'Conv2DBiasAddAddNRelu': self.apply_conv_biasadd_addn_relu_fusion, - 'Conv2DBiasAddAddNRelu6': self.apply_conv_biasadd_addn_relu_fusion, - 'Conv2DBiasAddAddV2Relu': self.apply_conv_biasadd_addn_relu_fusion, - 'Conv2DBiasAddAddV2Relu6': self.apply_conv_biasadd_addn_relu_fusion, - 'Conv2DBiasAddAddRelu': self.apply_conv_biasadd_addn_relu_fusion, - 'Conv2DBiasAddRelu6': self.apply_conv_biasadd_relu_fusion, - 'Conv2DBiasAddRelu': self.apply_conv_biasadd_relu_fusion, - 'Conv2DBiasAddLeakyRelu': self.apply_conv_biasadd_relu_fusion, - 'Conv2DBiasAddLeakyReluAddV2': self.apply_conv_biasadd_addn_relu_fusion, - 'Conv2DAddRelu6': self.apply_conv_biasadd_relu_fusion, - 'Conv2DAddRelu': self.apply_conv_biasadd_relu_fusion, - 'DepthwiseConv2dNativeAddRelu6': - self.apply_conv_biasadd_relu_fusion, - 'DepthwiseConv2dNativeBiasAddRelu': - self.apply_conv_biasadd_relu_fusion, - 'DepthwiseConv2dNativeBiasAdd': self.apply_conv_biasadd_fusion, - 'DepthwiseConv2dNativeBiasAddRelu6': - self.apply_conv_biasadd_relu_fusion, - 'Conv2D': self.apply_conv_single_fusion, - 'DepthwiseConv2dNative': self.apply_conv_single_fusion + "Conv2DBiasAdd": self.apply_conv_biasadd_fusion, + "Conv2DBiasAddAddNRelu": self.apply_conv_biasadd_addn_relu_fusion, + "Conv2DBiasAddAddNRelu6": self.apply_conv_biasadd_addn_relu_fusion, + "Conv2DBiasAddAddV2Relu": self.apply_conv_biasadd_addn_relu_fusion, + "Conv2DBiasAddAddV2Relu6": self.apply_conv_biasadd_addn_relu_fusion, + "Conv2DBiasAddAddRelu": self.apply_conv_biasadd_addn_relu_fusion, + "Conv2DBiasAddRelu6": self.apply_conv_biasadd_relu_fusion, + "Conv2DBiasAddRelu": self.apply_conv_biasadd_relu_fusion, + "Conv2DBiasAddLeakyRelu": self.apply_conv_biasadd_relu_fusion, + "Conv2DBiasAddLeakyReluAddV2": self.apply_conv_biasadd_addn_relu_fusion, + "Conv2DAddRelu6": self.apply_conv_biasadd_relu_fusion, + "Conv2DAddRelu": self.apply_conv_biasadd_relu_fusion, + "DepthwiseConv2dNativeAddRelu6": self.apply_conv_biasadd_relu_fusion, + "DepthwiseConv2dNativeBiasAddRelu": self.apply_conv_biasadd_relu_fusion, + "DepthwiseConv2dNativeBiasAdd": self.apply_conv_biasadd_fusion, + "DepthwiseConv2dNativeBiasAddRelu6": self.apply_conv_biasadd_relu_fusion, + "Conv2D": self.apply_conv_single_fusion, + "DepthwiseConv2dNative": self.apply_conv_single_fusion, } def apply_conv_single_fusion(self, match_node_name): @@ -75,10 +70,9 @@ def apply_conv_single_fusion(self, match_node_name): self.output_graph = self.input_graph return - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit(matched_node.node.op, - self.node_name_mapping[weight_name].node, - self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel + ) all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = all_input_names[:1] + [q_weights_name] + all_input_names[1:] @@ -96,37 +90,28 @@ def apply_conv_single_fusion(self, match_node_name): quantized_node_name = node.name + postfix if node.op == "Conv2D": quantized_conv_node = helper.create_node( - "QuantizedConv2DPerChannel" - if self.per_channel else "QuantizedConv2D", - quantized_node_name, all_input_names) + "QuantizedConv2DPerChannel" if self.per_channel else "QuantizedConv2D", + quantized_node_name, + all_input_names, + ) elif node.op == "DepthwiseConv2dNative": quantized_conv_node = helper.create_node( - "QuantizedDepthwiseConv2D", quantized_node_name, - all_input_names) - - helper.copy_attr(quantized_conv_node, "strides", - node.attr["strides"]) - helper.copy_attr(quantized_conv_node, "padding", - node.attr["padding"]) + "QuantizedDepthwiseConv2D", quantized_node_name, all_input_names + ) + + helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) + helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) if "padding_list" in node.attr: - helper.copy_attr(quantized_conv_node, "padding_list", - node.attr["padding_list"]) - helper.copy_attr(quantized_conv_node, "dilations", - node.attr["dilations"]) - input_data_type = dtypes.quint8 if self._find_relu_node( - node) else dtypes.qint8 - helper.set_attr_dtype(quantized_conv_node, "Tinput", - input_data_type) - helper.set_attr_dtype(quantized_conv_node, "Tfilter", - dtypes.qint8) - helper.set_attr_dtype(quantized_conv_node, "out_type", - dtypes.qint32) + helper.copy_attr(quantized_conv_node, "padding_list", node.attr["padding_list"]) + helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) + input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 + helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) + helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) + helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) self.add_output_graph_node(quantized_conv_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, node.name, dtypes.qint8) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, node.name, dtypes.qint8) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -139,9 +124,9 @@ def apply_conv_biasadd_relu_fusion(self, match_node_name): control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) weight_name = normal_inputs[1] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel + ) all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = all_input_names[:1] + [q_weights_name] + all_input_names[1:] @@ -153,7 +138,6 @@ def apply_conv_biasadd_relu_fusion(self, match_node_name): if node.name in skip_node_name: self.logger.debug("Skip node {}.".format(node.name)) elif node.name == match_node_name[0]: - postfix = "_eightbit_quantized_depthwise_conv" if node.op == "Conv2D": postfix = "_eightbit_quantized_conv" @@ -161,39 +145,38 @@ def apply_conv_biasadd_relu_fusion(self, match_node_name): bias_node_name = self.node_name_mapping[match_node_name[1]].node.input[1] relu_node_name = match_node_name[2] is_relu6 = self.node_name_mapping[relu_node_name].node.op == "Relu6" - quantized_node_input_names = all_input_names[:2] + \ - [bias_node_name] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) is_leakyrelu = self.node_name_mapping[relu_node_name].node.op == "LeakyRelu" - quantized_conv_node_op = 'QuantizedDepthwiseConv2DWithBiasAndRelu' + quantized_conv_node_op = "QuantizedDepthwiseConv2DWithBiasAndRelu" if node.op == "Conv2D" or is_leakyrelu: quantized_conv_node_op = "QuantizedConv2DWithBiasAndRelu" quantized_conv_node = helper.create_node( - quantized_conv_node_op, - quantized_node_name, quantized_node_input_names) + quantized_conv_node_op, quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) if "alpha" in self.node_name_mapping[relu_node_name].node.attr: - helper.copy_attr(quantized_conv_node, "alpha", - self.node_name_mapping[relu_node_name].node.attr["alpha"]) + helper.copy_attr( + quantized_conv_node, "alpha", self.node_name_mapping[relu_node_name].node.attr["alpha"] + ) if "padding_list" in node.attr: - helper.copy_attr(quantized_conv_node, "padding_list", - node.attr["padding_list"]) + helper.copy_attr(quantized_conv_node, "padding_list", node.attr["padding_list"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) - helper.set_attr_dtype(quantized_conv_node, "Tfilter",dtypes.qint8) + helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) self.add_output_graph_node(quantized_conv_node) if not is_leakyrelu: quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.quint8, is_relu6) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name) + node, quantized_node_name, dtypes.quint8, is_relu6 + ) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, relu_node_name) else: - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name, dtype=dtypes.qint8) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, relu_node_name, dtype=dtypes.qint8) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -203,13 +186,12 @@ def apply_conv_biasadd_fusion(self, match_node_name): """Apply Conv BiasAdd fusion.""" skip_node_name = match_node_name[1:] matched_node = self.node_name_mapping[match_node_name[0]] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) weight_name = normal_inputs[1] - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel + ) all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = all_input_names[:1] + [q_weights_name] + all_input_names[1:] @@ -227,17 +209,16 @@ def apply_conv_biasadd_fusion(self, match_node_name): if node.op == "DepthwiseConv2dNative": quantized_node_name = node.name + "_eightbit_quantized_depthwise_conv" - bias_node_name = self.node_name_mapping[ - match_node_name[1]].node.input[1] - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + control_inputs + bias_node_name = self.node_name_mapping[match_node_name[1]].node.input[1] + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) quantized_conv_node = helper.create_node( - "QuantizedConv2DWithBias" if node.op == 'Conv2D' \ - else 'QuantizedDepthwiseConv2DWithBias', + "QuantizedConv2DWithBias" if node.op == "Conv2D" else "QuantizedDepthwiseConv2DWithBias", quantized_node_name, - quantized_node_input_names) + quantized_node_input_names, + ) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) @@ -253,10 +234,8 @@ def apply_conv_biasadd_fusion(self, match_node_name): self.add_output_graph_node(quantized_conv_node) requantize_type = dtypes.qint8 - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, requantize_type, False) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[1], requantize_type) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, requantize_type, False) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, match_node_name[1], requantize_type) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -266,20 +245,19 @@ def apply_conv_biasadd_addn_relu_fusion(self, match_node_name): """Apply Conv BiasAdd AddN Relu fusion.""" skip_node_name = match_node_name[1:] matched_node = self.node_name_mapping[match_node_name[0]] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) weight_name = normal_inputs[1] third_node = self.node_name_mapping[match_node_name[2]].node forth_node = self.node_name_mapping[match_node_name[3]].node - if third_node.op != 'LeakyRelu' and not self._find_relu_node(matched_node.node): + if third_node.op != "LeakyRelu" and not self._find_relu_node(matched_node.node): return self.apply_conv_biasadd_fusion(match_node_name[:2]) - is_leakyrelu_add_fusion = third_node.op == 'LeakyRelu' and forth_node.op.find('Add') != -1 + is_leakyrelu_add_fusion = third_node.op == "LeakyRelu" and forth_node.op.find("Add") != -1 - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel + ) all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = all_input_names[:1] + [q_weights_name] + all_input_names[1:] @@ -303,51 +281,57 @@ def apply_conv_biasadd_addn_relu_fusion(self, match_node_name): relu_node_name = match_node_name[3] is_relu6 = self.node_name_mapping[relu_node_name].node.op == "Relu6" - sum_index = 1 if match_node_name[1 + leaky_offset] == self.node_name_mapping[ - match_node_name[2 + leaky_offset]].node.input[0] else 0 - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + [ - self.node_name_mapping[ - match_node_name[2 + leaky_offset]].node.input[sum_index] - ] + control_inputs - - node_op = "QuantizedConv2DWithBiasReluAndSum" if is_leakyrelu_add_fusion \ - else "QuantizedConv2DWithBiasSumAndRelu" - - quantized_conv_node = helper.create_node(node_op, quantized_node_name, - quantized_node_input_names) + sum_index = ( + 1 + if match_node_name[1 + leaky_offset] + == self.node_name_mapping[match_node_name[2 + leaky_offset]].node.input[0] + else 0 + ) + quantized_node_input_names = ( + all_input_names[:2] + + [bias_node_name] + + all_input_names[2:] + + [self.node_name_mapping[match_node_name[2 + leaky_offset]].node.input[sum_index]] + + control_inputs + ) + + node_op = ( + "QuantizedConv2DWithBiasReluAndSum" + if is_leakyrelu_add_fusion + else "QuantizedConv2DWithBiasSumAndRelu" + ) + + quantized_conv_node = helper.create_node(node_op, quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) if "padding_list" in node.attr: - helper.copy_attr(quantized_conv_node, "padding_list", - node.attr["padding_list"]) + helper.copy_attr(quantized_conv_node, "padding_list", node.attr["padding_list"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) - input_data_type = dtypes.quint8 if self._find_relu_node( - node) else dtypes.qint8 + input_data_type = dtypes.quint8 if self._find_relu_node(node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) if "alpha" in self.node_name_mapping[relu_node_name].node.attr: - helper.copy_attr(quantized_conv_node, "alpha", - self.node_name_mapping[relu_node_name].node.attr["alpha"]) + helper.copy_attr( + quantized_conv_node, "alpha", self.node_name_mapping[relu_node_name].node.attr["alpha"] + ) self.add_output_graph_node(quantized_conv_node) if is_leakyrelu_add_fusion: - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.qint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[3], dtype=dtypes.qint8) + quantize_down_name, match_node_name[3], dtype=dtypes.qint8 + ) else: dtype = dtypes.quint8 - if [i for i in self.node_name_mapping[relu_node_name].output \ - if 'FusedBatchNorm' in self.node_name_mapping[i].node.op and \ - i in self.op_wise_config_name_list]: - dtype = dtypes.qint8 - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtype, is_relu6) - self._intel_cpu_add_dequantize_result_node( - quantize_down_name, relu_node_name, dtype) + if [ + i + for i in self.node_name_mapping[relu_node_name].output + if "FusedBatchNorm" in self.node_name_mapping[i].node.op and i in self.op_wise_config_name_list + ]: + dtype = dtypes.qint8 + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtype, is_relu6) + self._intel_cpu_add_dequantize_result_node(quantize_down_name, relu_node_name, dtype) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -366,20 +350,19 @@ def apply_the_transform(self): matched_rule, matched_node_name = self._is_match(self.sorted_patterns) if matched_node_name: self.output_graph = graph_pb2.GraphDef() - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) if fusion_name in self.fusion_mapping: - if fusion_name.find('Conv2DAddRelu') != -1: + if fusion_name.find("Conv2DAddRelu") != -1: for input_name in self.node_name_mapping[matched_node_name[1]].node.input: input_node_name = helper.node_name_from_input(input_name) if input_node_name != matched_node_name[0]: add_const_input_node = self.node_name_mapping[input_node_name].node - add_node_content = tensor_util.MakeNdarray( - add_const_input_node.attr["value"].tensor) + add_node_content = tensor_util.MakeNdarray(add_const_input_node.attr["value"].tensor) if add_node_content.ndim != 1: - fusion_name = 'Conv2D' + fusion_name = "Conv2D" matched_node_name = matched_node_name[:1] self.fusion_mapping[fusion_name](matched_node_name) - else: # pragma: no cover + else: # pragma: no cover self.logger.info("Unknown fusion pattern {}.".format(fusion_name)) if self.remove_redundant_quant_flag: self.input_graph = self.remove_redundant_quantization(self.input_graph) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py index b6a2c61cdf6..4fd235500f6 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py @@ -18,22 +18,35 @@ from tensorflow.core.framework import graph_pb2 from tensorflow.python.platform import gfile -from neural_compressor.utils.utility import dump_elapsed_time + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper +from neural_compressor.utils.utility import dump_elapsed_time from .quantize_graph_base import QuantizeGraphBase -from .quantize_graph_conv import FuseNodeStartWithConv2d +from .quantize_graph_bn import FuseNodeStartWithFusedBatchNormV3 from .quantize_graph_concatv2 import FuseNodeStartWithConcatV2 +from .quantize_graph_conv import FuseNodeStartWithConv2d from .quantize_graph_matmul import FuseNodeStartWithMatmul from .quantize_graph_pooling import FuseNodeStartWithPooling -from .quantize_graph_bn import FuseNodeStartWithFusedBatchNormV3 + class QuantizeGraphForIntel(QuantizeGraphBase): """Quantize the graph.""" - def __init__(self, input_graph, input_node_names, output_node_names, op_wise_config, op_wise_sequences, device, \ - fake_quant=False, new_api=False, performance_only=False, itex_mode=False): + def __init__( + self, + input_graph, + input_node_names, + output_node_names, + op_wise_config, + op_wise_sequences, + device, + fake_quant=False, + new_api=False, + performance_only=False, + itex_mode=False, + ): """Quantize Graph For Intel Cpu.""" super().__init__(output_node_names) self.op_wise_config = op_wise_config @@ -42,12 +55,13 @@ def __init__(self, input_graph, input_node_names, output_node_names, op_wise_con self.input_graph = input_graph else: self.input_graph = graph_pb2.GraphDef() - with gfile.Open(input_graph, 'rb') as f: + with gfile.Open(input_graph, "rb") as f: self.input_graph.ParseFromString(f.read()) - + input_output_names = input_node_names + output_node_names self.input_graph = QuantizeGraphHelper().remove_training_nodes( - self.input_graph, protected_nodes=input_output_names) + self.input_graph, protected_nodes=input_output_names + ) self.graph_analyzer = GraphAnalyzer() self.graph_analyzer.graph = self.input_graph @@ -83,29 +97,34 @@ def do_transform(self): op_wise_config_name_list = list(self.op_wise_config.keys()) all_node_length = len(self.op_wise_config) for _, node in enumerate(self.input_graph.node): - if node in self.input_graph.node and node.op in self.transformers \ - and node.name in self.op_wise_config: + if node in self.input_graph.node and node.op in self.transformers and node.name in self.op_wise_config: count += 1 if count == all_node_length: remove_redundant_quant_flag = True - self.input_graph, quantizable_node_names, exclude_node_names= self.transformers[node.op]( + self.input_graph, quantizable_node_names, exclude_node_names = self.transformers[node.op]( input_graph=self.input_graph, patterns=self.op_wise_seq[node.op], remove_redundant_quant_flag=remove_redundant_quant_flag, op_wise_cfg=self.op_wise_config[node.name], op_wise_config_name_list=op_wise_config_name_list, - start_node_name=node.name, device=self.device, \ - fake_quant=self.fake_quant, new_api=self.new_api, + start_node_name=node.name, + device=self.device, + fake_quant=self.fake_quant, + new_api=self.new_api, performance_only=self.performance_only, itex_mode=self.itex_mode, - frame_info=self.graph_analyzer.parent_frame_details).apply_the_transform() + frame_info=self.graph_analyzer.parent_frame_details, + ).apply_the_transform() if quantizable_node_names: - if node.op in ('ConcatV2', 'MaxPool', 'MaxPool3D', 'AvgPool'): + if node.op in ("ConcatV2", "MaxPool", "MaxPool3D", "AvgPool"): self.all_quantizable_node.extend([[i] for i in quantizable_node_names]) else: self.all_quantizable_node.append(quantizable_node_names) if exclude_node_names: self.exclude_node_names.extend(exclude_node_names) - return self.remove_dead_nodes(self.input_graph, self.output_node_names), \ - self.all_quantizable_node, self.exclude_node_names + return ( + self.remove_dead_nodes(self.input_graph, self.output_node_names), + self.all_quantizable_node, + self.exclude_node_names, + ) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py index 1bd33de9357..e2473482295 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py @@ -17,14 +17,13 @@ """Quantize MatMul.""" import numpy as np - -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import dtypes +from tensorflow.core.framework import graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper + from .quantize_graph_base import QuantizeNodeBase -from tensorflow.python.framework import tensor_util + class FuseNodeStartWithMatmul(QuantizeNodeBase): """Quantize MatMul and apply the fusion.""" @@ -33,16 +32,14 @@ def __init__(self, **kwargs): """Initilization.""" super().__init__(**kwargs) - self.sorted_patterns = sorted(self.patterns, - key=lambda i: len(i), - reverse=True) + self.sorted_patterns = sorted(self.patterns, key=lambda i: len(i), reverse=True) self.exclude_matmul_name = [] self.fusion_op_type = set(fusion[0] for fusion in self.patterns) self.fusion_mapping = { - 'MatMulBiasAdd': self.apply_matmul_biasadd_fusion, - 'MatMul': self.apply_matmul_biasadd_fusion, - 'MatMulBiasAddRelu': self.apply_matmul_biasadd_relu_fusion, - 'MatMulRelu': self.apply_matmul_biasadd_relu_fusion, + "MatMulBiasAdd": self.apply_matmul_biasadd_fusion, + "MatMul": self.apply_matmul_biasadd_fusion, + "MatMulBiasAddRelu": self.apply_matmul_biasadd_relu_fusion, + "MatMulRelu": self.apply_matmul_biasadd_relu_fusion, } def apply_matmul_biasadd_relu_fusion(self, match_node_name): @@ -54,11 +51,11 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): # FIXME We only quantize the MatMul op which second input node type is const. This is a # workaround for RNN model like LTSM. - if weight_node.op != 'Const': + if weight_node.op != "Const": self.output_graph = self.input_graph return [] - weights_content = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) if np.any(np.isnan(weights_content)): self.output_graph = self.input_graph @@ -74,20 +71,20 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): need_insert_dummy_biasadd = 1 offset = 1 if len(match_node_name) == 3: - add_a_node_name = helper.node_name_from_input(second_node.input[0]) - add_a_node = self.node_name_mapping[add_a_node_name].node - add_b_node_name = helper.node_name_from_input(second_node.input[1]) - add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op == 'Const': - need_insert_dummy_biasadd = 0 - offset = 0 - if need_insert_dummy_biasadd: - self.apply_matmul_biasadd_fusion(match_node_name[:1]) - return match_node_name[:1] - - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) + add_a_node_name = helper.node_name_from_input(second_node.input[0]) + add_a_node = self.node_name_mapping[add_a_node_name].node + add_b_node_name = helper.node_name_from_input(second_node.input[1]) + add_b_node = self.node_name_mapping[add_b_node_name].node + if add_a_node.op != "Const" and add_b_node.op == "Const": + need_insert_dummy_biasadd = 0 + offset = 0 + if need_insert_dummy_biasadd: + self.apply_matmul_biasadd_fusion(match_node_name[:1]) + return match_node_name[:1] + + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel + ) skip_node_name.append(weight_name) @@ -99,7 +96,7 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_mat_mul" if need_insert_dummy_biasadd: - t_b_index = 0 if matched_node.node.attr['transpose_b'].b else 1 + t_b_index = 0 if matched_node.node.attr["transpose_b"].b else 1 bias_size = weights_content.shape[t_b_index] bias_node_name = node.name + "_fake_bias" bias_node = helper.create_constant_node( @@ -109,32 +106,31 @@ def apply_matmul_biasadd_relu_fusion(self, match_node_name): else: bias_node_name = self.node_name_mapping[match_node_name[1]].node.input[1] - relu_node_name = match_node_name[2-offset] - all_input_names = self._add_eightbit_prologue_nodes( - matched_node.node.name) + relu_node_name = match_node_name[2 - offset] + all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = all_input_names[:1] + [q_weights_name] + all_input_names[1:] all_input_names.append(q_weights_min_name) all_input_names.append(q_weights_max_name) - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) quantized_matmul_node = helper.create_node( - "QuantizedMatMulWithBiasAndRelu", quantized_node_name, - quantized_node_input_names) + "QuantizedMatMulWithBiasAndRelu", quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_matmul_node, "transpose_a", node.attr["transpose_a"]) helper.copy_attr(quantized_matmul_node, "transpose_b", node.attr["transpose_b"]) helper.set_attr_dtype(quantized_matmul_node, "T1", dtypes.quint8) helper.set_attr_dtype(quantized_matmul_node, "T2", dtypes.qint8) helper.set_attr_dtype(quantized_matmul_node, "Toutput", dtypes.qint32) - helper.set_attr_string(quantized_matmul_node, 'input_quant_mode', - b'MIN_FIRST' if self.is_asymmetric else b'SCALED') + helper.set_attr_string( + quantized_matmul_node, "input_quant_mode", b"MIN_FIRST" if self.is_asymmetric else b"SCALED" + ) self.add_output_graph_node(quantized_matmul_node) - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, dtypes.quint8, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, dtypes.quint8, False) self._intel_cpu_add_dequantize_result_node(quantize_down_name, relu_node_name) else: new_node = node_def_pb2.NodeDef() @@ -146,57 +142,58 @@ def apply_matmul_biasadd_fusion(self, match_node_name): """Apply MatMul BiasAdd fusion.""" skip_node_name = match_node_name[1:] matched_node = self.node_name_mapping[match_node_name[0]] - control_inputs, normal_inputs = self._get_node_input( - matched_node.node.name) + control_inputs, normal_inputs = self._get_node_input(matched_node.node.name) weight_name = normal_inputs[1] weight_node = self.node_name_mapping[helper.node_name_from_input(weight_name)].node enter_node = None - if weight_node.op == 'Enter': - parent_node = self.node_name_mapping[ - helper.node_name_from_input(weight_node.input[0])].node + if weight_node.op == "Enter": + parent_node = self.node_name_mapping[helper.node_name_from_input(weight_node.input[0])].node # FIXME We only quantize the MatMul op which second input node type is const. This is a # workaround for RNN model like LTSM. - if parent_node.op != 'Const': - self.logger.debug( - 'The weight node of matched_node {} is not Const or Const + Enter, skipped') + if parent_node.op != "Const": + self.logger.debug("The weight node of matched_node {} is not Const or Const + Enter, skipped") self.output_graph = self.input_graph return [] enter_node = weight_node weight_node = parent_node weight_name = weight_node.name - if weight_node.op != 'Const': + if weight_node.op != "Const": self.output_graph = self.input_graph return [] - #TODO Remove below two lines once the TF enabled the old QuantizedMatMul while + # TODO Remove below two lines once the TF enabled the old QuantizedMatMul while # transpose_a/transpose_b could be set to True. - if matched_node.node.attr["transpose_a"].b == True or \ - matched_node.node.attr["transpose_b"].b == True: + if matched_node.node.attr["transpose_a"].b is True or matched_node.node.attr["transpose_b"].b is True: self.exclude_matmul_name.append(match_node_name[0]) self.output_graph = self.input_graph return [] - if weight_node.op == 'Const': - weights_content = tensor_util.MakeNdarray(weight_node.attr['value'].tensor) + if weight_node.op == "Const": + weights_content = tensor_util.MakeNdarray(weight_node.attr["value"].tensor) if np.any(np.isnan(weights_content)): self.output_graph = self.input_graph return [] for i in self.node_name_mapping: - if weight_node.input and not weight_node.input[0].startswith('^') \ - and weight_node.name in self.node_name_mapping[i].output: + if ( + weight_node.input + and not weight_node.input[0].startswith("^") + and weight_node.name in self.node_name_mapping[i].output + ): self.output_graph = self.input_graph return [] len_output = len(matched_node.output) is_shared_output = False if len_output == 2: - if self.node_name_mapping[matched_node.output[0]].node.op == 'Reshape' or \ - self.node_name_mapping[matched_node.output[1]].node.op == 'Reshape': + if ( + self.node_name_mapping[matched_node.output[0]].node.op == "Reshape" + or self.node_name_mapping[matched_node.output[1]].node.op == "Reshape" + ): is_shared_output = False else: is_shared_output = True @@ -214,29 +211,28 @@ def apply_matmul_biasadd_fusion(self, match_node_name): add_a_node = self.node_name_mapping[add_a_node_name].node add_b_node_name = helper.node_name_from_input(second_node.input[1]) add_b_node = self.node_name_mapping[add_b_node_name].node - if add_a_node.op != 'Const' and add_b_node.op in ('Const', 'Enter'): - need_insert_dummy_biasadd = 0 + if add_a_node.op != "Const" and add_b_node.op in ("Const", "Enter"): + need_insert_dummy_biasadd = 0 if need_insert_dummy_biasadd: - self.apply_matmul_biasadd_fusion(match_node_name[:1]) - return match_node_name[:1] + self.apply_matmul_biasadd_fusion(match_node_name[:1]) + return match_node_name[:1] if self.frame_info and not enter_node: from collections import OrderedDict + frame_info = OrderedDict(self.frame_info) if match_node_name[0] in frame_info and frame_info[match_node_name[0]]: - enter_node = helper.create_node( - 'Enter', weight_name+'_enter', [weight_name]) - helper.set_attr_string(enter_node, - 'frame_name', frame_info[weight_name].attr['frame_name'].s) - helper.set_attr_dtype(enter_node, 'T', dtypes.float32) - helper.set_attr_bool(enter_node, 'is_constant', True) - helper.set_attr_int(enter_node, 'parallel_iterations', - frame_info[weight_name].attr['parallel_iterations'].i) - - q_weights_name, q_weights_min_name, q_weights_max_name = \ - self._intel_cpu_quantize_weight_eightbit( - matched_node.node.op, self.node_name_mapping[weight_name].node, - self.per_channel, enter_node) + enter_node = helper.create_node("Enter", weight_name + "_enter", [weight_name]) + helper.set_attr_string(enter_node, "frame_name", frame_info[weight_name].attr["frame_name"].s) + helper.set_attr_dtype(enter_node, "T", dtypes.float32) + helper.set_attr_bool(enter_node, "is_constant", True) + helper.set_attr_int( + enter_node, "parallel_iterations", frame_info[weight_name].attr["parallel_iterations"].i + ) + + q_weights_name, q_weights_min_name, q_weights_max_name = self._intel_cpu_quantize_weight_eightbit( + matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel, enter_node + ) skip_node_name.append(weight_name) if enter_node: @@ -251,21 +247,20 @@ def apply_matmul_biasadd_fusion(self, match_node_name): quantized_node_name = node.name + "_eightbit_quantized_mat_mul" if need_insert_dummy_biasadd: - t_b_index = 0 if matched_node.node.attr['transpose_b'].b else 1 + t_b_index = 0 if matched_node.node.attr["transpose_b"].b else 1 bias_size = weights_content.shape[t_b_index] bias_node_name = node.name + "_fake_bias" bias_node = helper.create_constant_node( bias_node_name, [0] * bias_size, dtypes.float32, shape=[bias_size] ) if enter_node: - bias_enter_node = helper.create_node( - 'Enter', bias_node_name + '_enter', [bias_node_name]) - helper.set_attr_string(bias_enter_node, - 'frame_name', enter_node.attr['frame_name'].s) - helper.set_attr_dtype(bias_enter_node, 'T', dtypes.float32) - helper.set_attr_bool(bias_enter_node, 'is_constant', True) - helper.set_attr_int(bias_enter_node, 'parallel_iterations', - enter_node.attr['parallel_iterations'].i) + bias_enter_node = helper.create_node("Enter", bias_node_name + "_enter", [bias_node_name]) + helper.set_attr_string(bias_enter_node, "frame_name", enter_node.attr["frame_name"].s) + helper.set_attr_dtype(bias_enter_node, "T", dtypes.float32) + helper.set_attr_bool(bias_enter_node, "is_constant", True) + helper.set_attr_int( + bias_enter_node, "parallel_iterations", enter_node.attr["parallel_iterations"].i + ) self.add_output_graph_node(bias_enter_node) bias_node_name = bias_enter_node.name @@ -273,28 +268,33 @@ def apply_matmul_biasadd_fusion(self, match_node_name): self.add_output_graph_node(bias_node) else: bias_node_name = self.node_name_mapping[match_node_name[1]].node.input[1] - if self.node_name_mapping[bias_node_name].node.op == 'Enter': - bias_enter_node = helper.create_node( - 'Enter', bias_node_name+'_enter', [bias_node_name]) - helper.set_attr_string(bias_enter_node, 'frame_name', - self.node_name_mapping[bias_node_name].node.attr['frame_name'].s) - helper.set_attr_dtype(bias_enter_node, 'T', dtypes.float32) - helper.set_attr_bool(bias_enter_node, 'is_constant', True) - helper.set_attr_int(bias_enter_node, 'parallel_iterations', \ - self.node_name_mapping[bias_node_name].node.attr['parallel_iterations'].i) + if self.node_name_mapping[bias_node_name].node.op == "Enter": + bias_enter_node = helper.create_node("Enter", bias_node_name + "_enter", [bias_node_name]) + helper.set_attr_string( + bias_enter_node, + "frame_name", + self.node_name_mapping[bias_node_name].node.attr["frame_name"].s, + ) + helper.set_attr_dtype(bias_enter_node, "T", dtypes.float32) + helper.set_attr_bool(bias_enter_node, "is_constant", True) + helper.set_attr_int( + bias_enter_node, + "parallel_iterations", + self.node_name_mapping[bias_node_name].node.attr["parallel_iterations"].i, + ) self.add_output_graph_node(bias_enter_node) all_input_names = self._add_eightbit_prologue_nodes(matched_node.node.name) all_input_names = all_input_names[:1] + [q_weights_name] + all_input_names[1:] all_input_names.append(q_weights_min_name) all_input_names.append(q_weights_max_name) - quantized_node_input_names = all_input_names[:2] + [ - bias_node_name - ] + all_input_names[2:] + control_inputs + quantized_node_input_names = ( + all_input_names[:2] + [bias_node_name] + all_input_names[2:] + control_inputs + ) quantized_matmul_node = helper.create_node( - "QuantizedMatMulWithBias", quantized_node_name, - quantized_node_input_names) + "QuantizedMatMulWithBias", quantized_node_name, quantized_node_input_names + ) helper.copy_attr(quantized_matmul_node, "transpose_a", node.attr["transpose_a"]) helper.copy_attr(quantized_matmul_node, "transpose_b", node.attr["transpose_b"]) @@ -302,17 +302,19 @@ def apply_matmul_biasadd_fusion(self, match_node_name): helper.set_attr_dtype(quantized_matmul_node, "T2", dtypes.qint8) helper.set_attr_dtype(quantized_matmul_node, "Toutput", dtypes.qint32) helper.set_attr_dtype(quantized_matmul_node, "Tbias", dtypes.float32) - helper.set_attr_string(quantized_matmul_node, 'input_quant_mode', - b'MIN_FIRST' if self.is_asymmetric else b'SCALED') + helper.set_attr_string( + quantized_matmul_node, "input_quant_mode", b"MIN_FIRST" if self.is_asymmetric else b"SCALED" + ) self.add_output_graph_node(quantized_matmul_node) requantize_type = dtypes.qint8 - quantize_down_name = self._add_quantize_down_nodes( - node, quantized_node_name, requantize_type, False) + quantize_down_name = self._add_quantize_down_nodes(node, quantized_node_name, requantize_type, False) self._intel_cpu_add_dequantize_result_node( - quantize_down_name, match_node_name[0] if need_insert_dummy_biasadd else \ - match_node_name[1], requantize_type) + quantize_down_name, + match_node_name[0] if need_insert_dummy_biasadd else match_node_name[1], + requantize_type, + ) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) @@ -332,10 +334,10 @@ def apply_the_transform(self): if matched_node_name: self.output_graph = graph_pb2.GraphDef() - fusion_name = ''.join(matched_rule) + fusion_name = "".join(matched_rule) if fusion_name in self.fusion_mapping: matched_nodes = self.fusion_mapping[fusion_name](matched_node_name) - else: # pragma: no cover + else: # pragma: no cover self.logger.debug("Unknown fusion pattern {}.".format(fusion_name)) if self.remove_redundant_quant_flag: self.input_graph = self.remove_redundant_quantization(self.input_graph) diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py index 6bc2e2d396c..79cebad9d28 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py @@ -20,19 +20,22 @@ from tensorflow.core.framework import node_def_pb2 from tensorflow.python.framework import dtypes -from .quantize_graph_base import QuantizeNodeBase from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper as helper -from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 -from neural_compressor.adaptor.tf_utils.util import version1_eq_version2 +from neural_compressor.adaptor.tf_utils.util import version1_eq_version2, version1_gt_version2, version1_lt_version2 + +from .quantize_graph_base import QuantizeNodeBase + class FuseNodeStartWithPooling(QuantizeNodeBase): """Quantize the AvgPool and MaxPool.""" def _add_pool_function(self, original_node, quantized_op_node): """Set quantized pooling node attributes.""" - pooling_type = dtypes.quint8 if version1_lt_version2(tf.version.VERSION, '2.6.0') or \ - self._find_relu_node(original_node) else dtypes.qint8 + pooling_type = ( + dtypes.quint8 + if version1_lt_version2(tf.version.VERSION, "2.6.0") or self._find_relu_node(original_node) + else dtypes.qint8 + ) helper.set_attr_dtype(quantized_op_node, "T", pooling_type) helper.copy_attr(quantized_op_node, "ksize", original_node.attr["ksize"]) helper.copy_attr(quantized_op_node, "strides", original_node.attr["strides"]) @@ -44,11 +47,12 @@ def _apply_pool_quantization(self): # Tensorflow 2.5.0 enabled the s8 input for pooling op. # If the tf version is lower than 2.5.0, we need to confirm the input # data type of pooling is unsigned. - if v.node.name == self.start_node_name and \ - (version1_gt_version2(tf.version.VERSION, '2.5.0') or - version1_lt_version2(tf.version.VERSION, '2.6.0') and self._find_relu_node(v.node)): - self.eightbitize_single_input_tensor_node( - v.node, self._add_pool_function) + if v.node.name == self.start_node_name and ( + version1_gt_version2(tf.version.VERSION, "2.5.0") + or version1_lt_version2(tf.version.VERSION, "2.6.0") + and self._find_relu_node(v.node) + ): + self.eightbitize_single_input_tensor_node(v.node, self._add_pool_function) self.quantizable_node_names.append(v.node.name) else: new_node = node_def_pb2.NodeDef() diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph_common.py b/neural_compressor/adaptor/tf_utils/quantize_graph_common.py index 033b5a07f9d..5db1ce8bf00 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph_common.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph_common.py @@ -17,16 +17,15 @@ """Quantize Graph Common Utils Herlper Class.""" import re + import numpy as np +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util -from tensorflow.core.framework import node_def_pb2 -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes -class QuantizeGraphHelper(): +class QuantizeGraphHelper: """This class contains several staticmethod functions.""" + node_name_cache = {} node_name_port_cache = {} @@ -36,8 +35,7 @@ def __init__(self): def _recursive_graph_sorting(self, node_name): """Recursive sort the graph.""" - if node_name in self.op_list or not self.node_name_mapping[ - node_name].input: + if node_name in self.op_list or not self.node_name_mapping[node_name].input: return for input_name in self.node_name_mapping[node_name].input: @@ -106,8 +104,7 @@ def split_shared_inputs(input_graph_def): for node_name in node_map.keys(): node = node_map[node_name] for input_idx, input_node_name in enumerate(node.input): - if node_map[QuantizeGraphHelper.node_name_from_input( - input_node_name)].op == 'Const': + if node_map[QuantizeGraphHelper.node_name_from_input(input_node_name)].op == "Const": # is shared and current node is not the first one # sharing the input if input_node_name in input_map.keys(): @@ -115,8 +112,7 @@ def split_shared_inputs(input_graph_def): input_map[input_node_name].append(node.name) new_input_node = node_def_pb2.NodeDef() new_input_node.CopyFrom(node_map[input_node_name]) - new_input_node.name = input_node_name + '_' + str( - len(input_map[input_node_name])) + new_input_node.name = input_node_name + "_" + str(len(input_map[input_node_name])) node.input[input_idx] = new_input_node.name output_graph_def.node.extend([new_input_node]) else: @@ -126,8 +122,7 @@ def split_shared_inputs(input_graph_def): return output_graph_def if is_shared_input else input_graph_def @staticmethod - def remove_training_nodes(input_graph, protected_nodes=[], - types_to_splice=['Identity', 'CheckNumerics']): + def remove_training_nodes(input_graph, protected_nodes=[], types_to_splice=["Identity", "CheckNumerics"]): """Prunes out nodes that aren't needed for inference. Args: @@ -158,11 +153,7 @@ def remove_training_nodes(input_graph, protected_nodes=[], names_to_splice[node.name] = node.input[0] # We also don't want to remove nodes which are used as control edge inputs. - names_to_splice = { - name: value - for name, value in names_to_splice.items() - if name not in control_input_names - } + names_to_splice = {name: value for name, value in names_to_splice.items() if name not in control_input_names} nodes_after_splicing = [] @@ -210,7 +201,7 @@ def create_node(op, name, inputs): return new_node @staticmethod - def create_constant_node(name, value, dtype, shape=None, device='cpu'): + def create_constant_node(name, value, dtype, shape=None, device="cpu"): """Create constant node. Args: @@ -224,8 +215,7 @@ def create_constant_node(name, value, dtype, shape=None, device='cpu'): Returns: [type]: [description] """ - node = QuantizeGraphHelper.create_node("Const" if device == 'cpu' else "HostConst", name, - []) + node = QuantizeGraphHelper.create_node("Const" if device == "cpu" else "HostConst", name, []) QuantizeGraphHelper.set_attr_dtype(node, "dtype", dtype) QuantizeGraphHelper.set_attr_tensor(node, "value", value, dtype, shape) return node @@ -258,8 +248,8 @@ def set_attr_tensor(node, key, value, dtype, shape=None): shape (int list, optional): the input tensor's shape. Defaults to None. """ node.attr[key].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape))) + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape)) + ) @staticmethod def set_attr_string_list(node, key, value): @@ -334,12 +324,9 @@ def ensure_tensor_name_has_port(node_name): return QuantizeGraphHelper.node_name_port_cache[node_name] @staticmethod - def generate_quantized_weight_node(host_op_type, - input_node, - per_channel, - weight_bit=7.0, - device='cpu', - enter_node=None): + def generate_quantized_weight_node( + host_op_type, input_node, per_channel, weight_bit=7.0, device="cpu", enter_node=None + ): """Generated the quantized weight node.""" base_name = input_node.name + "_" qint8_const_name = base_name + "qint8_const" @@ -347,16 +334,22 @@ def generate_quantized_weight_node(host_op_type, max_name = base_name + "max" float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor) epsilon = 1e-4 # Needs to be set empirically if accuracy is not satisfactory - range_coefficent = 127 / (2 ** weight_bit - 1) - if host_op_type in ("Conv2D", "MatMul", "Conv3D", "BatchMatMulV2", \ - "Conv2DBackpropInput", "Conv3DBackpropInputV2"): + range_coefficent = 127 / (2**weight_bit - 1) + if host_op_type in ( + "Conv2D", + "MatMul", + "Conv3D", + "BatchMatMulV2", + "Conv2DBackpropInput", + "Conv3DBackpropInputV2", + ): if per_channel: - if host_op_type in ('Conv3D', 'Conv3DBackpropInputV2'): + if host_op_type in ("Conv3D", "Conv3DBackpropInputV2"): ranges = np.abs(float_tensor).max(axis=(0, 1, 2, 3)) - elif host_op_type in ('Conv2D', 'Conv2DBackpropInput'): + elif host_op_type in ("Conv2D", "Conv2DBackpropInput"): ranges = np.abs(float_tensor).max(axis=(0, 1, 2)) - elif host_op_type in ('MatMul'): - if 'transpose_b' in input_node.attr and input_node.attr["transpose_b"].b: # pragma: no cover + elif host_op_type in ("MatMul"): + if "transpose_b" in input_node.attr and input_node.attr["transpose_b"].b: # pragma: no cover ranges = np.abs(float_tensor).max(axis=(1)) else: ranges = np.abs(float_tensor).max(axis=(0)) @@ -370,13 +363,13 @@ def generate_quantized_weight_node(host_op_type, ranges[ranges < epsilon] = epsilon min_value[np.abs(min_value) < epsilon] = -epsilon max_value[np.abs(max_value) < epsilon] = epsilon - if 'transpose_b' in input_node.attr and input_node.attr["transpose_b"].b: # pragma: no cover + if "transpose_b" in input_node.attr and input_node.attr["transpose_b"].b: # pragma: no cover # transpose for broadcasting float_tensor = np.transpose(float_tensor, [1, 0]) - qint8_tensor = (np.around(float_tensor *127.0/ranges)).astype(np.int8) + qint8_tensor = (np.around(float_tensor * 127.0 / ranges)).astype(np.int8) qint8_tensor = np.transpose(qint8_tensor, [1, 0]) else: - qint8_tensor = (np.around(float_tensor *127.0/ranges)).astype(np.int8) + qint8_tensor = (np.around(float_tensor * 127.0 / ranges)).astype(np.int8) else: min_value = np.min(float_tensor) max_value = np.max(float_tensor) @@ -410,49 +403,46 @@ def generate_quantized_weight_node(host_op_type, # When divide by range, qint8_tensor needs to be 3 dim # where, 3rd dim should be same dim of ranges a, b, c, d = float_tensor.shape - qint8_tensor = (np.around(float_tensor.reshape(a, b, c * d) * 127.0 / - ranges)).astype(np.int8) + qint8_tensor = (np.around(float_tensor.reshape(a, b, c * d) * 127.0 / ranges)).astype(np.int8) # get the shape back to 4 dim qint8_tensor = qint8_tensor.reshape(a, b, c, d) shape = tensor_util.TensorShapeProtoToList(input_node.attr["value"].tensor.tensor_shape) - qint8_const_node = QuantizeGraphHelper.create_constant_node(qint8_const_name, - qint8_tensor, - dtypes.qint8, - shape=shape) - min_node = QuantizeGraphHelper.create_constant_node(min_name, min_value, - dtypes.float32, device="cpu") + qint8_const_node = QuantizeGraphHelper.create_constant_node( + qint8_const_name, qint8_tensor, dtypes.qint8, shape=shape + ) + min_node = QuantizeGraphHelper.create_constant_node(min_name, min_value, dtypes.float32, device="cpu") - max_node = QuantizeGraphHelper.create_constant_node(max_name, max_value, - dtypes.float32, device="cpu") + max_node = QuantizeGraphHelper.create_constant_node(max_name, max_value, dtypes.float32, device="cpu") qint8_const_enter_node = None min_enter_node = None max_enter_node = None if enter_node: - qint8_const_enter_node = QuantizeGraphHelper.create_node('Enter', \ - qint8_const_name + '_enter', [qint8_const_name]) - QuantizeGraphHelper.set_attr_string(qint8_const_enter_node, - 'frame_name', enter_node.attr['frame_name'].s) - QuantizeGraphHelper.set_attr_dtype(qint8_const_enter_node, 'T', dtypes.qint8) - QuantizeGraphHelper.set_attr_bool(qint8_const_enter_node, 'is_constant', True) - QuantizeGraphHelper.set_attr_int(qint8_const_enter_node, \ - 'parallel_iterations', enter_node.attr['parallel_iterations'].i) - - min_enter_node = QuantizeGraphHelper.create_node('Enter', min_name + '_enter', [min_name]) - QuantizeGraphHelper.set_attr_string(min_enter_node, - 'frame_name', enter_node.attr['frame_name'].s) - QuantizeGraphHelper.set_attr_dtype(min_enter_node, 'T', dtypes.float32) - QuantizeGraphHelper.set_attr_bool(min_enter_node, 'is_constant', True) - QuantizeGraphHelper.set_attr_int(min_enter_node, 'parallel_iterations', \ - enter_node.attr['parallel_iterations'].i) - - max_enter_node = QuantizeGraphHelper.create_node('Enter', max_name + '_enter', [max_name]) - QuantizeGraphHelper.set_attr_string(max_enter_node, - 'frame_name', enter_node.attr['frame_name'].s) - QuantizeGraphHelper.set_attr_dtype(max_enter_node, 'T', dtypes.float32) - QuantizeGraphHelper.set_attr_bool(max_enter_node, 'is_constant', True) - QuantizeGraphHelper.set_attr_int(max_enter_node, 'parallel_iterations',\ - enter_node.attr['parallel_iterations'].i) + qint8_const_enter_node = QuantizeGraphHelper.create_node( + "Enter", qint8_const_name + "_enter", [qint8_const_name] + ) + QuantizeGraphHelper.set_attr_string(qint8_const_enter_node, "frame_name", enter_node.attr["frame_name"].s) + QuantizeGraphHelper.set_attr_dtype(qint8_const_enter_node, "T", dtypes.qint8) + QuantizeGraphHelper.set_attr_bool(qint8_const_enter_node, "is_constant", True) + QuantizeGraphHelper.set_attr_int( + qint8_const_enter_node, "parallel_iterations", enter_node.attr["parallel_iterations"].i + ) + + min_enter_node = QuantizeGraphHelper.create_node("Enter", min_name + "_enter", [min_name]) + QuantizeGraphHelper.set_attr_string(min_enter_node, "frame_name", enter_node.attr["frame_name"].s) + QuantizeGraphHelper.set_attr_dtype(min_enter_node, "T", dtypes.float32) + QuantizeGraphHelper.set_attr_bool(min_enter_node, "is_constant", True) + QuantizeGraphHelper.set_attr_int( + min_enter_node, "parallel_iterations", enter_node.attr["parallel_iterations"].i + ) + + max_enter_node = QuantizeGraphHelper.create_node("Enter", max_name + "_enter", [max_name]) + QuantizeGraphHelper.set_attr_string(max_enter_node, "frame_name", enter_node.attr["frame_name"].s) + QuantizeGraphHelper.set_attr_dtype(max_enter_node, "T", dtypes.float32) + QuantizeGraphHelper.set_attr_bool(max_enter_node, "is_constant", True) + QuantizeGraphHelper.set_attr_int( + max_enter_node, "parallel_iterations", enter_node.attr["parallel_iterations"].i + ) return qint8_const_node, min_node, max_node, qint8_const_enter_node, min_enter_node, max_enter_node diff --git a/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py b/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py index 1fd0890f772..2e7dad4afab 100644 --- a/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py +++ b/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py @@ -16,18 +16,21 @@ # limitations under the License. """Tensorflow model calibration process for Smooth Quantization.""" -import os import logging -import numpy as np +import os from collections import OrderedDict, UserDict + +import numpy as np from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import tensor_util + from .quantize_graph_common import QuantizeGraphHelper from .util import iterator_sess_run logger = logging.getLogger("neural_compressor") debug = bool(logger.level == logging.DEBUG) + class SmoothQuantCalibration: """A class for performing smooth quantization calibration on a Tensorflow model. @@ -39,6 +42,7 @@ class SmoothQuantCalibration: percentile (float): The percentile of calibration to remove outliers. black_nodes (List[str]): A list of node names to be ignored during calibration. """ + def __init__(self, model, dataloader, iterations, op_types, percentile, black_nodes): """Initializes a SmoothQuantCalibration object.""" self.model = model @@ -50,7 +54,7 @@ def __init__(self, model, dataloader, iterations, op_types, percentile, black_no self.black_nodes = black_nodes self._sq_input_node_names = [] self._sq_output_tensor_dict = {} - self._sq_weight_node_names = {} # mapping from its weight node name to the concrete output node name + self._sq_weight_node_names = {} # mapping from its weight node name to the concrete output node name def _inference_for_calibration(self, model): """Run the calibration on the input graph. @@ -65,25 +69,28 @@ def _inference_for_calibration(self, model): sess = model.sess iter_op = model.iter_op input_tensor = model.input_tensor - output_tensor = [item + ':0' for item in self._sq_input_node_names] + output_tensor = [item + ":0" for item in self._sq_input_node_names] # TF table initialization: https://github.com/tensorflow/tensorflow/issues/8665 node_names = [node.name for node in sess.graph.as_graph_def().node] - if 'init_all_tables' in node_names: # pragma: no cover - init_table_op = sess.graph.get_operation_by_name('init_all_tables') + if "init_all_tables" in node_names: # pragma: no cover + init_table_op = sess.graph.get_operation_by_name("init_all_tables") sess.run(init_table_op) logger.info("Start sampling on calibration dataset for Smooth Quantization.") if hasattr(self.dataloader, "__len__") and len(self.dataloader) == 0: # pragma: no cover feed_dict = {} - for output_idx, output in enumerate(sess.run(output_tensor, feed_dict) if iter_op==[] \ - else iterator_sess_run(sess, iter_op, feed_dict, output_tensor, self.iterations)): - self._sq_output_tensor_dict.setdefault( - self._sq_input_node_names[output_idx], []).append(output) + for output_idx, output in enumerate( + sess.run(output_tensor, feed_dict) + if iter_op == [] + else iterator_sess_run(sess, iter_op, feed_dict, output_tensor, self.iterations) + ): + self._sq_output_tensor_dict.setdefault(self._sq_input_node_names[output_idx], []).append(output) for idx, (inputs, labels) in enumerate(self.dataloader): if len(input_tensor) == 1: feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): # pragma: no cover + if ( + isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict) + ): # pragma: no cover for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -93,12 +100,10 @@ def _inference_for_calibration(self, model): break else: feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] - else: # pragma: no cover - assert len(input_tensor) == len(inputs), \ - 'inputs len must equal with input_tensor' + else: # pragma: no cover + assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -111,9 +116,7 @@ def _inference_for_calibration(self, model): # we should check and pair them def check_shape(tensor, data): # scalar or 1 dim default True - if tensor.shape == None or \ - len(tensor.shape.dims) == 1 or \ - not hasattr(data, 'shape'): + if tensor.shape is None or len(tensor.shape.dims) == 1 or not hasattr(data, "shape"): return True tensor_shape = tuple(tensor.shape) data_shape = tuple(data.shape) @@ -125,7 +128,7 @@ def check_shape(tensor, data): disorder_tensors = [] disorder_inputs = [] for idx, sort_tensor in enumerate(input_tensor): - sort_input = inputs[idx] + sort_input = inputs[idx] if check_shape(sort_tensor, sort_input): feed_dict.update({sort_tensor: sort_input}) else: @@ -136,10 +139,12 @@ def check_shape(tensor, data): if check_shape(dis_tensor, dis_input): feed_dict.update({dis_tensor: dis_input}) break - for output_idx, output in enumerate(sess.run(output_tensor, feed_dict) if iter_op==[] \ - else iterator_sess_run(sess, iter_op, feed_dict, output_tensor, self.iterations)): - self._sq_output_tensor_dict.setdefault( - self._sq_input_node_names[output_idx], []).append(output) + for output_idx, output in enumerate( + sess.run(output_tensor, feed_dict) + if iter_op == [] + else iterator_sess_run(sess, iter_op, feed_dict, output_tensor, self.iterations) + ): + self._sq_output_tensor_dict.setdefault(self._sq_input_node_names[output_idx], []).append(output) if idx + 1 == self.iterations: break os.environ["ITEX_REMAPPER"] = "1" @@ -147,15 +152,14 @@ def check_shape(tensor, data): def _generate_calibration_data(self): """Generate the calibration data.""" sorted_graph = QuantizeGraphHelper().get_sorted_graph( - self.model.graph_def, - self.model.input_node_names, - self.model.output_node_names) + self.model.graph_def, self.model.input_node_names, self.model.output_node_names + ) for node in sorted_graph.node: if node.op not in self.op_types or node.name in self.black_nodes: continue # Fix retval already been set issue - if 'while' in node.input[0]: # pragma: no cover + if "while" in node.input[0]: # pragma: no cover continue self._sq_input_node_names.append(node.input[0]) self._sq_weight_node_names[node.input[1]] = node.name @@ -173,19 +177,19 @@ def _get_maxval_per_channel(self, tensor_data, percentile): The max values per input channel """ permute_datas = [] - for data in tensor_data: # iteration_num * (N, H, W, C) + for data in tensor_data: # iteration_num * (N, H, W, C) if len(data.shape) == 3: # pragma: no cover # TODO matmul batchsize*seq*inchannel tensor = np.abs(np.reshape(data, (-1, data.shape[-1]))) permute_datas.append(tensor) - elif len(data.shape) == 4: # already NHWC + elif len(data.shape) == 4: # already NHWC # tensor = np.transpose(data, [0, 3, 1, 2]) tensor = data tensor = np.abs(np.reshape(tensor, (-1, tensor.shape[-1]))) permute_datas.append(tensor) elif len(data.shape) == 2: # (?, ic) permute_datas.append(np.abs(data)) - else: # pragma: no cover + else: # pragma: no cover assert False, "not supported" permute_datas = np.concatenate(permute_datas, axis=0) permute_datas = permute_datas.reshape(-1, permute_datas.shape[-1]) @@ -210,7 +214,7 @@ def __call__(self): max_vals_per_channel = {} for key in self._sq_output_tensor_dict.keys(): max_val_per_channel = self._get_maxval_per_channel( - self._sq_output_tensor_dict[key], percentile=self.percentile) + self._sq_output_tensor_dict[key], percentile=self.percentile + ) max_vals_per_channel[key] = max_val_per_channel return max_vals_per_channel, self._sq_weight_node_names - diff --git a/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py b/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py index dd7eb203c80..be5152dd7e7 100644 --- a/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py +++ b/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py @@ -16,18 +16,20 @@ # limitations under the License. """Tensorflow scaling model weights and activations for Smooth Quantization.""" +import logging + import numpy as np import tensorflow as tf -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_util +from tensorflow.python.framework import dtypes, tensor_util + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -import logging logger = logging.getLogger("neural_compressor") + class SmoothQuantScaler: """A class for scaling model weights using Smooth Quantization method. - + Args: model: Tensorflow model to be scaled dataloader: Tensorflow dataloader for the dataset @@ -56,10 +58,14 @@ def _adjust_activation(self, scale, input_node_name, output_node_name, w_i): w_i: distinguish between different output weight nodes on different branches when naming """ from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper + node_suffix = str(w_i) mul_const_node = Helper.create_constant_node(input_node_name + "/scale_mul" + node_suffix, scale, tf.float32) - mul_node = Helper.create_node('Mul', input_node_name + "_mul" + node_suffix, - [input_node_name + "/scale_mul" + node_suffix, input_node_name]) + mul_node = Helper.create_node( + "Mul", + input_node_name + "_mul" + node_suffix, + [input_node_name + "/scale_mul" + node_suffix, input_node_name], + ) Helper.set_attr_dtype(mul_node, "T", dtypes.float32) self.mul_list.append(mul_node.name) self.g_analyzer.add_node(mul_node, input_node_name, [output_node_name]) @@ -75,16 +81,16 @@ def _adjust_weight(self, scale, weight_node, original_weight): """ # scale: (ic,) original_shape = original_weight.shape - if len(original_shape) == 4: # (fh, hw, ic, oc) - W = np.transpose(original_weight, [0, 1, 3, 2]) # move input channel to last dimension + if len(original_shape) == 4: # (fh, hw, ic, oc) + W = np.transpose(original_weight, [0, 1, 3, 2]) # move input channel to last dimension W *= scale - W = np.transpose(W, [0, 1, 3, 2]) # move input channel back - weight_node.attr['value'].tensor.CopyFrom(tensor_util.make_tensor_proto(W)) + W = np.transpose(W, [0, 1, 3, 2]) # move input channel back + weight_node.attr["value"].tensor.CopyFrom(tensor_util.make_tensor_proto(W)) elif len(original_shape) == 2: # (ic, oc) if transpose_a == transpose_b == false W = np.transpose(original_weight, [1, 0]) W *= scale W = np.transpose(W, [1, 0]) - weight_node.attr['value'].tensor.CopyFrom(tensor_util.make_tensor_proto(W)) + weight_node.attr["value"].tensor.CopyFrom(tensor_util.make_tensor_proto(W)) def transform(self, max_vals_per_channel, sq_weight_tensors, sq_weights_nodes, sq_weight_node_names): """Apply scaling to weights and activations based on the maximum values per channel. @@ -94,7 +100,7 @@ def transform(self, max_vals_per_channel, sq_weight_tensors, sq_weights_nodes, s sq_weight_tensors (dict): A dictionary containing the name -> weight tensors mapping for each input node. sq_weights_nodes (dict): A dictionary containing the name -> constant nodes mapping for each input node. sq_weight_node_names (dict): A dictionary from weight node name to the its concrete output node name. - + Returns: tuple: A tuple containing the modified model and a list of the inserted multiplication nodes. """ @@ -118,17 +124,18 @@ def transform(self, max_vals_per_channel, sq_weight_tensors, sq_weights_nodes, s tensor = np.abs(np.transpose(W, [0, 1, 3, 2])) # reduce weight max to (in_channel, ), aligned with activation max W_max_per_in_channel = np.max(np.reshape(tensor, (-1, tensor.shape[-1])), axis=0) - elif len(W.shape) == 2: # matmul + elif len(W.shape) == 2: # matmul # reduce weight max to (in_channel, ), aligned with activation max tensor = np.abs(W) W_max_per_in_channel = np.max(tensor, axis=1) - else: # pragma: no cover + else: # pragma: no cover assert False, "not supported" cur_const_node = W_const_node_dict[W_name] try: - scale = np.power(A_max_per_in_channel, self.alpha) / \ - np.power(W_max_per_in_channel, (1-self.alpha)) - except ValueError as e: # pragma: no cover + scale = np.power(A_max_per_in_channel, self.alpha) / np.power( + W_max_per_in_channel, (1 - self.alpha) + ) + except ValueError as e: # pragma: no cover logger.info(e) logger.info("Skip smoothing the node: {}".format(cur_const_node.name)) continue diff --git a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py index 0f3fe3cfdc4..fa923cfe411 100644 --- a/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +++ b/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py @@ -18,26 +18,36 @@ """Tensorflow QDQ model convert to ONNX QDQ model.""" import logging -import tensorflow as tf + import numpy as np +import tensorflow as tf from onnx import helper from packaging.version import Version -from tensorflow.core.framework import tensor_pb2, node_def_pb2 +from tensorflow.core.framework import node_def_pb2, tensor_pb2 from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from neural_compressor.utils.utility import dump_elapsed_time, LazyImport +from neural_compressor.utils.utility import LazyImport, dump_elapsed_time + from .graph_rewriter.onnx import tf2onnx_utils as utils from .graph_rewriter.onnx.onnx_graph import OnnxGraph -t2o = LazyImport('tf2onnx') +t2o = LazyImport("tf2onnx") logger = logging.getLogger("neural_compressor") class TensorflowQDQToOnnxQDQConverter: """Convert tensorflow QDQ graph to ONNX QDQ graph.""" - def __init__(self, model, input_names, output_names, shape_override, - inputs_as_nchw=None, opset_version=utils.DEFAULT_OPSET_VERSION): + + def __init__( + self, + model, + input_names, + output_names, + shape_override, + inputs_as_nchw=None, + opset_version=utils.DEFAULT_OPSET_VERSION, + ): """Constructor, initilization. Args: @@ -52,7 +62,7 @@ def __init__(self, model, input_names, output_names, shape_override, graph = tf.Graph() with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") self.graph = graph self.opset_version = opset_version @@ -66,12 +76,11 @@ def __init__(self, model, input_names, output_names, shape_override, for name, shape in self.shape_override.items(): logger.info("\tSet %s shape to %s", name, shape) self.graph.get_tensor_by_name(name).set_shape(shape) - graph_def = self.graph.as_graph_def(add_shapes=True) + graph_def = self.graph.as_graph_def(add_shapes=True) with tf.Graph().as_default() as inferred_graph: tf.import_graph_def(graph_def, name="") self.graph = inferred_graph - def duplicate_tf_quantizev2_nodes(self, model): """Duplicate QuantizeV2 nodes if the Dequantize nodes share the same QuantizeV2.""" cur_graph = GraphAnalyzer() @@ -79,7 +88,7 @@ def duplicate_tf_quantizev2_nodes(self, model): graph_info = cur_graph.parse_graph() # Scan the QDQ pairs - patterns = [['QuantizeV2'], ['Dequantize']] + patterns = [["QuantizeV2"], ["Dequantize"]] matched_nodes = cur_graph.query_fusion_pattern_nodes(patterns) # Append the QDQ pairs to QuantizeV2 nodes map and Dequantize nodes map @@ -118,13 +127,12 @@ def duplicate_tf_quantizev2_nodes(self, model): dequantize_node = dequantize_nodes[index + 1] new_quantizev2_node = node_def_pb2.NodeDef() new_quantizev2_node.CopyFrom(quantizev2_nodes[0]) - new_quantizev2_node.name = quantizev2_nodes[0].name + '_copy_' + str(index+1) - cur_graph.add_node( - new_quantizev2_node, input_map_node_name, [dequantize_node.name]) - cur_graph.node_name_details[dequantize_node.name].node.ClearField('input') - cur_graph.node_name_details[dequantize_node.name].node.input.extend([ - new_quantizev2_node.name, new_quantizev2_node.name + ':1', - new_quantizev2_node.name + ':2']) + new_quantizev2_node.name = quantizev2_nodes[0].name + "_copy_" + str(index + 1) + cur_graph.add_node(new_quantizev2_node, input_map_node_name, [dequantize_node.name]) + cur_graph.node_name_details[dequantize_node.name].node.ClearField("input") + cur_graph.node_name_details[dequantize_node.name].node.input.extend( + [new_quantizev2_node.name, new_quantizev2_node.name + ":1", new_quantizev2_node.name + ":2"] + ) return cur_graph.dump_graph() @@ -132,8 +140,8 @@ def tf_graph_optimize(self, model): """Pre optimize the tensorflow graphdef to make ONNX QDQ model convert more easier.""" # Convert HostConst to Const for node in model.node: - if node.op == 'HostConst': - node.op = 'Const' + if node.op == "HostConst": + node.op = "Const" # Duplicate the QuantizeV2 node if it has multi Dequantize nodes model = self.duplicate_tf_quantizev2_nodes(model) @@ -186,8 +194,7 @@ def convert(self, save_path): node_list = self.graph.get_operations() - outputs_to_values, _ = utils.compute_const_folding_using_tf( - self.graph, None, self.output_names) + outputs_to_values, _ = utils.compute_const_folding_using_tf(self.graph, None, self.output_names) # Create dict with output to shape mappings for node in node_list: @@ -218,8 +225,7 @@ def convert(self, save_path): attr_dict[each_attr] = nattr.name functions[nattr.name] = input_shapes elif isinstance(value, tensor_pb2.TensorProto): - onnx_tensor = utils.convert_tensorflow_tensor_to_onnx( - value, name=utils.add_port_to_name(node.name)) + onnx_tensor = utils.convert_tensorflow_tensor_to_onnx(value, name=utils.add_port_to_name(node.name)) attr_dict[each_attr] = onnx_tensor node_type = node.type node_input_names = [i.name for i in node.inputs] @@ -227,16 +233,18 @@ def convert(self, save_path): if convert_to_onnx: try: - onnx_node = helper.make_node(node_type, node_input_names, node_output_names, - name=node.name, **attr_dict) + onnx_node = helper.make_node( + node_type, node_input_names, node_output_names, name=node.name, **attr_dict + ) onnx_nodes.append(onnx_node) except Exception as ex: logger.error("tensorflow node convert to onnx failed for %s, ex=%s", node.name, ex) raise # Build ONNX Graph using onnx_nodes, output_shapes and dtypes - onnx_graph = OnnxGraph(onnx_nodes, output_shapes, dtypes, input_names=self.input_names, - output_names=self.output_names) + onnx_graph = OnnxGraph( + onnx_nodes, output_shapes, dtypes, input_names=self.input_names, output_names=self.output_names + ) t2o.tfonnx.fold_constants_using_tf(onnx_graph, outputs_to_values) if self.inputs_as_nchw: @@ -244,10 +252,10 @@ def convert(self, save_path): # Convert TF QDQ pattern to ONNX QDQ format for node in onnx_graph.get_nodes(): - if node.type == 'Dequantize': - parent_node = onnx_graph.get_node_by_name(node.input[0].rsplit(':', 1)[0]) + if node.type == "Dequantize": + parent_node = onnx_graph.get_node_by_name(node.input[0].rsplit(":", 1)[0]) if parent_node: - if parent_node.type == 'QuantizeV2': + if parent_node.type == "QuantizeV2": onnx_graph.convert_qdq_nodes(parent_node, node) # Create ops mapping for the desired opsets @@ -279,7 +287,7 @@ def convert(self, save_path): t2o.rewriter.rewrite_generic_loop, t2o.rewriter.rewrite_cond, # rewrite_biasadd_with_conv2d introduces accuracy issue - #t2o.rewriter.rewrite_biasadd_with_conv2d, + # t2o.rewriter.rewrite_biasadd_with_conv2d, t2o.rewriter.rewrite_layer_normalization, t2o.rewriter.rewrite_gemm, t2o.rewriter.rewrite_ragged_variant_shape, @@ -291,8 +299,7 @@ def convert(self, save_path): onnx_graph.delete_unused_nodes(onnx_graph.outputs) t2o.tfonnx.topological_sort(onnx_graph, False) - mapped_op, unmapped_op, exceptions = \ - t2o.tfonnx.tensorflow_onnx_mapping(onnx_graph, ops_mapping) + mapped_op, unmapped_op, exceptions = t2o.tfonnx.tensorflow_onnx_mapping(onnx_graph, ops_mapping) if unmapped_op: logger.error("Unsupported ops: %s", unmapped_op) if exceptions: @@ -309,7 +316,8 @@ def convert(self, save_path): "\ttensorflow ops: {}\n" "\ttensorflow attr: {}\n" "\tonnx mapped: {}\n" - "\tonnx unmapped: {}".format(op_cnt, attr_cnt, mapped_op, unmapped_op)) + "\tonnx unmapped: {}".format(op_cnt, attr_cnt, mapped_op, unmapped_op) + ) onnx_graph = t2o.optimizer.optimize_graph(onnx_graph) diff --git a/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py b/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py index b93b5b8b808..9ae6e793406 100644 --- a/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py +++ b/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py @@ -17,14 +17,12 @@ # """Bias correction graph transform.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import numpy as np -from tensorflow.python.framework import tensor_util from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import dtypes +from tensorflow.python.framework import dtypes, tensor_util + from .graph_transform_base import GraphTransformBase @@ -42,12 +40,12 @@ class BiasCorrection(GraphTransformBase): scale_c = r * scale and shift = u with this we don't change the min/max value, and correct the weight """ - def __init__(self, input_graph, fp32_graph, method='weight_empirical', new_api=False): + + def __init__(self, input_graph, fp32_graph, method="weight_empirical", new_api=False): """Initilization.""" # only support weight_empirical now - self.bias_correct_map = {'weight_empirical': self._weight_empirical} - assert method in self.bias_correct_map, \ - 'only support weight empirical correction method' + self.bias_correct_map = {"weight_empirical": self._weight_empirical} + assert method in self.bias_correct_map, "only support weight empirical correction method" super(BiasCorrection, self).__init__(input_graph) self.fp32_graph = fp32_graph @@ -67,76 +65,71 @@ def _weight_empirical(self): node = self.node_mapping[node_name] node_op = node.op if self.new_api: - if '_FusedQuantizedConv2D' not in node_op: + if "_FusedQuantizedConv2D" not in node_op: continue else: - if 'QuantizedConv2D' not in node_op: + if "QuantizedConv2D" not in node_op: continue - int8_filter = self.node_mapping[self.get_node_name_from_input( - node.input[1])] + int8_filter = self.node_mapping[self.get_node_name_from_input(node.input[1])] - int8_value = tensor_util.MakeNdarray( - int8_filter.attr['value'].tensor) + int8_value = tensor_util.MakeNdarray(int8_filter.attr["value"].tensor) tr_int8_value = int8_value.transpose([3, 0, 1, 2]) - fp32_filter_name = self.get_node_name_from_input( - node.input[1]).split('_qint8_const')[0] + fp32_filter_name = self.get_node_name_from_input(node.input[1]).split("_qint8_const")[0] fp32_filter = self.fp32_node_mapping[fp32_filter_name] - fp32_value = tensor_util.MakeNdarray( - fp32_filter.attr['value'].tensor) + fp32_value = tensor_util.MakeNdarray(fp32_filter.attr["value"].tensor) tr_fp32_value = fp32_value.transpose([3, 0, 1, 2]) # if bias fused, then offset to min/max filter should be 5 if self.new_api: offset = 5 else: - offset = 5 if 'Bias' in node_op else 4 - min_filter_node = self.node_mapping[ - node.input[offset]] - max_filter_node = self.node_mapping[ - node.input[offset + 1]] + offset = 5 if "Bias" in node_op else 4 + min_filter_node = self.node_mapping[node.input[offset]] + max_filter_node = self.node_mapping[node.input[offset + 1]] - channel_size = 1 if not min_filter_node.attr[ - 'value'].tensor.tensor_shape.dim else min_filter_node.attr[ - 'value'].tensor.tensor_shape.dim[0].size + channel_size = ( + 1 + if not min_filter_node.attr["value"].tensor.tensor_shape.dim + else min_filter_node.attr["value"].tensor.tensor_shape.dim[0].size + ) if channel_size == 1: max_filter_tensor = [] min_filter_tensor = [] - max_filter_tensor.append( - (max_filter_node.attr['value'].tensor.float_val)[0]) - min_filter_tensor.append( - (min_filter_node.attr['value'].tensor.float_val)[0]) + max_filter_tensor.append((max_filter_node.attr["value"].tensor.float_val)[0]) + min_filter_tensor.append((min_filter_node.attr["value"].tensor.float_val)[0]) else: - max_filter_tensor = tensor_util.MakeNdarray( - max_filter_node.attr['value'].tensor) - min_filter_tensor = tensor_util.MakeNdarray( - min_filter_node.attr['value'].tensor) + max_filter_tensor = tensor_util.MakeNdarray(max_filter_node.attr["value"].tensor) + min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) tr_quantized_fp32_value = np.zeros_like(tr_fp32_value) tr_corrected_int8_value = np.zeros_like(tr_int8_value) for i in range(channel_size): - scale = max(abs(max_filter_tensor[i]), - abs(min_filter_tensor[i])) / 127 + scale = max(abs(max_filter_tensor[i]), abs(min_filter_tensor[i])) / 127 tr_quantized_fp32_value[i] = tr_int8_value[i].astype(np.float64) * scale delta_mean = np.mean((tr_fp32_value[i] - tr_quantized_fp32_value[i]).flatten()) - var_ratio = np.std(tr_fp32_value[i].flatten()) / \ - np.std(tr_quantized_fp32_value[i].flatten()) if \ - np.std(tr_quantized_fp32_value[i].flatten()) != 0 else 1 + var_ratio = ( + np.std(tr_fp32_value[i].flatten()) / np.std(tr_quantized_fp32_value[i].flatten()) + if np.std(tr_quantized_fp32_value[i].flatten()) != 0 + else 1 + ) tr_corrected_int8_value[i] = (var_ratio / scale) * (tr_fp32_value[i] + delta_mean) correct_int8_value = tr_int8_value.transpose([1, 2, 3, 0]) - assert int8_value.shape == correct_int8_value.shape, \ - 'correct filter shape should equal with origin filter shape' + assert ( + int8_value.shape == correct_int8_value.shape + ), "correct filter shape should equal with origin filter shape" bias = int8_value.astype(np.float32) - correct_int8_value.astype(np.float32) - if np.sum(bias) != 0 : - int8_filter.attr['value'].CopyFrom( + if np.sum(bias) != 0: + int8_filter.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - correct_int8_value, dtypes.qint8, int8_value.shape))) + tensor=tensor_util.make_tensor_proto(correct_int8_value, dtypes.qint8, int8_value.shape) + ) + ) return self.input_graph def do_transformation(self): diff --git a/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py b/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py index 9a885341d6c..09d393fab4e 100644 --- a/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py +++ b/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py @@ -17,17 +17,17 @@ # """GraphTransform Base Class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import logging import re + from tensorflow.core.framework import graph_pb2 from tensorflow.python.platform import gfile logger = logging.getLogger("neural_compressor") + class GraphTransformBase(object): """GraphTransform Base Class.""" @@ -41,11 +41,10 @@ def __init__(self, input_pb): self.input_graph = input_pb else: try: - with gfile.Open(input_pb, 'rb') as f: + with gfile.Open(input_pb, "rb") as f: self.input_graph.ParseFromString(f.read()) except Exception as e: - logger.error("Fail to read input pb from {} due to {}.".format( - input_pb, str(e))) + logger.error("Fail to read input pb from {} due to {}.".format(input_pb, str(e))) self.node_mapping = {} self.node_name_list = [] @@ -74,8 +73,7 @@ def generate_input_map(self): if node_name not in self.input_node_map: self.input_node_map[node_name] = node else: - raise ValueError("Duplicate node names detected for ", - node.name) + raise ValueError("Duplicate node names detected for ", node.name) def node_name_from_input(self, node_name): """Get the original node name from input string. @@ -86,7 +84,7 @@ def node_name_from_input(self, node_name): Returns: node's name """ - if node_name.startswith("^"): # pragma: no cover + if node_name.startswith("^"): # pragma: no cover node_name = node_name[1:] m = re.search(r"(.*):\d+$", node_name) if m: @@ -102,9 +100,12 @@ def get_node_name_from_input(self, node_name): Returns: node's name """ - node_names = node_name.split(':') + node_names = node_name.split(":") return node_names[0] def do_transformation(self): - """Virtual Interface. Each transformation should implement it.""" + """Virtual Interface. + + Each transformation should implement it. + """ pass diff --git a/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py b/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py index 712cef8a4c2..4f2a721431f 100644 --- a/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py +++ b/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py @@ -17,34 +17,26 @@ # """Insert logging graph transformation.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import attr_value_pb2, node_def_pb2 from tensorflow.python.framework import dtypes + from .graph_transform_base import GraphTransformBase class InsertLogging(GraphTransformBase): """Insert logging graph transformation.""" + op_output_type_mapping = { - "RequantizationRange": - [dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum], - "RequantizationRangePerChannel": - [dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum], + "RequantizationRange": [dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum], + "RequantizationRangePerChannel": [dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum], "QuantizedConv2DWithBiasAndRelu": [dtypes.qint32.as_datatype_enum], - "QuantizedConv2DWithBiasAndReluAndRequantize": - [dtypes.quint8.as_datatype_enum], - "QuantizedConv2DWithBiasAndRequantize": - [dtypes.qint8.as_datatype_enum], - "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize": - [dtypes.qint8.as_datatype_enum], - "QuantizedConv2DWithBiasSumAndReluAndRequantize": - [dtypes.quint8.as_datatype_enum], - "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize": - [dtypes.quint8.as_datatype_enum], + "QuantizedConv2DWithBiasAndReluAndRequantize": [dtypes.quint8.as_datatype_enum], + "QuantizedConv2DWithBiasAndRequantize": [dtypes.qint8.as_datatype_enum], + "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize": [dtypes.qint8.as_datatype_enum], + "QuantizedConv2DWithBiasSumAndReluAndRequantize": [dtypes.quint8.as_datatype_enum], + "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize": [dtypes.quint8.as_datatype_enum], "QuantizedConv2DWithBias": [dtypes.qint32.as_datatype_enum], "Relu": [dtypes.float32.as_datatype_enum], "Relu6": [dtypes.float32.as_datatype_enum], @@ -52,19 +44,21 @@ class InsertLogging(GraphTransformBase): "MaxPool": [dtypes.float32.as_datatype_enum], "BiasAdd": [dtypes.float32.as_datatype_enum], "Max": [dtypes.float32.as_datatype_enum], - "Min": [dtypes.float32.as_datatype_enum] + "Min": [dtypes.float32.as_datatype_enum], } - def __init__(self, - input_pb, - ops=[], - node_name_list=[], - show_name=True, - show_op=False, - first_n=-1, - summarize=1024, - message="", - dump_fp32=False): + def __init__( + self, + input_pb, + ops=[], + node_name_list=[], + show_name=True, + show_op=False, + first_n=-1, + summarize=1024, + message="", + dump_fp32=False, + ): """Initilization.""" super(InsertLogging, self).__init__(input_pb) @@ -87,7 +81,7 @@ def _get_suffix(self, input_str): Pure string name without suffix Index of the node """ - splitted_str = input_str.split(':') + splitted_str = input_str.split(":") if len(splitted_str) < 2: return input_str, 0 @@ -108,20 +102,20 @@ def _insert_node(self): """Insert the Print OP into the graph.""" for node_name in self.node_mapping: if node_name not in self.output_name_index_mapping or ( - not self.dump_fp32 and node_name.find("eightbit") == -1): + not self.dump_fp32 and node_name.find("eightbit") == -1 + ): continue - if self.ops and self.node_mapping[ - node_name].op in self.ops or node_name in self.node_name_list: + if self.ops and self.node_mapping[node_name].op in self.ops or node_name in self.node_name_list: name_suffix = "__print__" print_node = node_def_pb2.NodeDef() print_node.op = "Print" print_node.name = node_name + name_suffix - node_message = '' + node_message = "" if self.show_op: - node_message += ';' + self.node_mapping[node_name].op + ';' + node_message += ";" + self.node_mapping[node_name].op + ";" if self.show_name: - node_message += ';' + print_node.name + ';' + node_message += ";" + print_node.name + ";" node_message += self.message print_node.attr["message"].s = node_message.encode() @@ -130,84 +124,68 @@ def _insert_node(self): print_node.input.append(node_name + ":0") print_node.attr["T"].CopyFrom( - attr_value_pb2.AttrValue(type=self.op_output_type_mapping[ - self.node_mapping[node_name].op][0])) - - if self.node_mapping[node_name].op in ( - "QuantizedConv2DWithBias", - "QuantizedConv2DWithBiasAndRelu"): + attr_value_pb2.AttrValue(type=self.op_output_type_mapping[self.node_mapping[node_name].op][0]) + ) - for index in sorted( - self.output_name_index_mapping[node_name])[:1]: + if self.node_mapping[node_name].op in ("QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRelu"): + for index in sorted(self.output_name_index_mapping[node_name])[:1]: print_node.input.append(node_name + ":" + str(index)) print_node_1 = node_def_pb2.NodeDef() print_node_1.op = "Print" print_node_1.name = node_name + name_suffix + "_min_output" - print_node_1.attr["message"].s = ( - node_message + "_min_output").encode() + print_node_1.attr["message"].s = (node_message + "_min_output").encode() print_node_1.attr["first_n"].i = self.first_n print_node_1.attr["summarize"].i = self.summarize print_node_1.attr["U"].list.CopyFrom( - attr_value_pb2.AttrValue.ListValue( - type=[dtypes.float32.as_datatype_enum])) - print_node_1.attr["T"].CopyFrom( - attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + attr_value_pb2.AttrValue.ListValue(type=[dtypes.float32.as_datatype_enum]) + ) + print_node_1.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) print_node_1.input.append(node_name + ":1") print_node_1.input.append(node_name + ":1") self.input_graph.node.extend([print_node_1]) - self.input_rename[node_name + - ':1'] = print_node_1.name + ':0' + self.input_rename[node_name + ":1"] = print_node_1.name + ":0" print_node_2 = node_def_pb2.NodeDef() print_node_2.op = "Print" print_node_2.name = node_name + name_suffix + "_max_output" - print_node_2.attr["message"].s = ( - node_message + "_max_output").encode() + print_node_2.attr["message"].s = (node_message + "_max_output").encode() print_node_2.attr["first_n"].i = self.first_n print_node_2.attr["summarize"].i = self.summarize print_node_2.attr["U"].list.CopyFrom( - attr_value_pb2.AttrValue.ListValue( - type=[dtypes.float32.as_datatype_enum])) - print_node_2.attr["T"].CopyFrom( - attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + attr_value_pb2.AttrValue.ListValue(type=[dtypes.float32.as_datatype_enum]) + ) + print_node_2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) print_node_2.input.append(node_name + ":2") print_node_2.input.append(node_name + ":2") self.input_graph.node.extend([print_node_2]) - self.input_rename[node_name + - ':2'] = print_node_2.name + ':0' + self.input_rename[node_name + ":2"] = print_node_2.name + ":0" else: - for index in range( - len(self.op_output_type_mapping[ - self.node_mapping[node_name].op])): - print_node.input.append(node_name + ":" + str( - sorted(self.output_name_index_mapping[node_name]) - [index])) + for index in range(len(self.op_output_type_mapping[self.node_mapping[node_name].op])): + print_node.input.append( + node_name + ":" + str(sorted(self.output_name_index_mapping[node_name])[index]) + ) print_node.attr["U"].list.CopyFrom( attr_value_pb2.AttrValue.ListValue( - type=self.op_output_type_mapping[ - self.node_mapping[node_name].op])) + type=self.op_output_type_mapping[self.node_mapping[node_name].op] + ) + ) self.input_graph.node.extend([print_node]) - self.input_rename[node_name + ':0'] = print_node.name + ':0' + self.input_rename[node_name + ":0"] = print_node.name + ":0" def _rename_node(self): """Rename the original input node and connect to new added print node.""" for node_name in self.node_mapping: - for index, input_name in enumerate( - self.node_mapping[node_name].input): + for index, input_name in enumerate(self.node_mapping[node_name].input): if input_name in self.input_rename: - self.node_mapping[node_name].input[ - index] = self.input_rename[input_name] - elif input_name + ':0' in self.input_rename: - self.node_mapping[node_name].input[ - index] = self.input_rename[input_name + ':0'] + self.node_mapping[node_name].input[index] = self.input_rename[input_name] + elif input_name + ":0" in self.input_rename: + self.node_mapping[node_name].input[index] = self.input_rename[input_name + ":0"] def do_transformation(self): """Execute the insert logging transformation. diff --git a/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py b/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py index f317c625bce..9a60dadc0d5 100644 --- a/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py +++ b/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py @@ -17,17 +17,16 @@ # """ConcatV2 rerange transform.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import numpy as np -from tensorflow.python.framework import tensor_util from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python.framework import dtypes -from .graph_transform_base import GraphTransformBase +from tensorflow.python.framework import dtypes, tensor_util + from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from .graph_transform_base import GraphTransformBase + class RerangeQuantizedConcat(GraphTransformBase): """This class implements the rerange_quantize concat graph transform.""" @@ -36,20 +35,21 @@ class RerangeQuantizedConcat(GraphTransformBase): "QuantizedConv2DWithBiasAndRequantize", "QuantizedConv2DWithBiasAndReluAndRequantize", "QuantizedConv2DWithBiasSumAndReluAndRequantize", - "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize") - fuse_requantized_bias_op_new_api =( - [b'BiasAdd', b'Requantize'], - [b'BiasAdd', b'Relu', b'Requantize'], - [b'BiasAdd', b'LeakyRelu', b'Requantize'], - [b'BiasAdd', b'Sum', b'Relu', b'Requantize'], - [b'BiasAdd', b'Sum', b'Requantize'], - [b'BiasAdd', b'Elu', b'Requantize'], + "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize", + ) + fuse_requantized_bias_op_new_api = ( + [b"BiasAdd", b"Requantize"], + [b"BiasAdd", b"Relu", b"Requantize"], + [b"BiasAdd", b"LeakyRelu", b"Requantize"], + [b"BiasAdd", b"Sum", b"Relu", b"Requantize"], + [b"BiasAdd", b"Sum", b"Requantize"], + [b"BiasAdd", b"Elu", b"Requantize"], ) - fuse_requantized_relu_op_new_api =( - [b'BiasAdd', b'Relu', b'Requantize'], - [b'BiasAdd', b'LeakyRelu', b'Requantize'], - #[b'BiasAdd', b'Sum', b'Relu', b'Requantize'] - [b'BiasAdd', b'Elu', b'Requantize'], + fuse_requantized_relu_op_new_api = ( + [b"BiasAdd", b"Relu", b"Requantize"], + [b"BiasAdd", b"LeakyRelu", b"Requantize"], + # [b'BiasAdd', b'Sum', b'Relu', b'Requantize'] + [b"BiasAdd", b"Elu", b"Requantize"], ) offset_map = { "QuantizedConv2DAndRequantize": 6, @@ -57,20 +57,19 @@ class RerangeQuantizedConcat(GraphTransformBase): "QuantizedConv2DWithBiasAndRequantize": 7, "QuantizedConv2DWithBiasAndReluAndRequantize": 7, "QuantizedConv2DWithBiasSumAndReluAndRequantize": 7, - "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize": 7 - } + "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize": 7, + } offset_map_new_api = { - str([b'Requantize']): 6, - str([b'Relu', b'Requantize']): 6, - str([b'LeakyRelu', b'Requantize']): 6, - str([b'BiasAdd', b'Requantize']): 7, - str([b'BiasAdd', b'Relu', b'Requantize']): 7, - str([b'BiasAdd', b'LeakyRelu', b'Requantize']): 7, - str([b'BiasAdd', b'Elu', b'Requantize']): 7, - str([b'BiasAdd', b'Sum', b'Relu', b'Requantize']): 10, - str([b'BiasAdd', b'Sum', b'Requantize']): 10 + str([b"Requantize"]): 6, + str([b"Relu", b"Requantize"]): 6, + str([b"LeakyRelu", b"Requantize"]): 6, + str([b"BiasAdd", b"Requantize"]): 7, + str([b"BiasAdd", b"Relu", b"Requantize"]): 7, + str([b"BiasAdd", b"LeakyRelu", b"Requantize"]): 7, + str([b"BiasAdd", b"Elu", b"Requantize"]): 7, + str([b"BiasAdd", b"Sum", b"Relu", b"Requantize"]): 10, + str([b"BiasAdd", b"Sum", b"Requantize"]): 10, } - def __init__(self, input_pb, device, performance_only=False): """Initilization.""" @@ -82,41 +81,38 @@ def __init__(self, input_pb, device, performance_only=False): self.device = device self.performance_only = performance_only - def _analyze_concat_node_recursively(self, quantized_conv_nodes, - input_node): + def _analyze_concat_node_recursively(self, quantized_conv_nodes, input_node): """Analyze all the ConcatV2 nodes recursively.""" op_type = input_node.op if op_type == "QuantizedConcatV2": can_rerange = True - concat_input_num = input_node.attr['N'].i + concat_input_num = input_node.attr["N"].i for index in range(concat_input_num): - concat_input_node = self.node_mapping[ - self.get_node_name_from_input(input_node.input[index])] + concat_input_node = self.node_mapping[self.get_node_name_from_input(input_node.input[index])] concat_input_node_op_type = concat_input_node.op if concat_input_node_op_type in self.offset_map: quantized_conv_nodes.append(concat_input_node) - elif concat_input_node.op == "_FusedQuantizedConv2D" and \ - 'fused_ops' in concat_input_node.attr and \ - str(concat_input_node.attr['fused_ops'].list.s) in self.offset_map_new_api: - quantized_conv_nodes.append(concat_input_node) - elif concat_input_node_op_type in ("QuantizedMaxPool", - "QuantizedAvgPool"): - another_concat_node = self.node_mapping[ - self.get_node_name_from_input( - concat_input_node.input[0])] + elif ( + concat_input_node.op == "_FusedQuantizedConv2D" + and "fused_ops" in concat_input_node.attr + and str(concat_input_node.attr["fused_ops"].list.s) in self.offset_map_new_api + ): + quantized_conv_nodes.append(concat_input_node) + elif concat_input_node_op_type in ("QuantizedMaxPool", "QuantizedAvgPool"): + another_concat_node = self.node_mapping[self.get_node_name_from_input(concat_input_node.input[0])] if self.performance_only: - if another_concat_node.op == "_FusedQuantizedConv2D" and \ - 'fused_ops' in another_concat_node.attr and \ - str(another_concat_node.attr['fused_ops'].list.s) in self.offset_map_new_api: + if ( + another_concat_node.op == "_FusedQuantizedConv2D" + and "fused_ops" in another_concat_node.attr + and str(another_concat_node.attr["fused_ops"].list.s) in self.offset_map_new_api + ): quantized_conv_nodes.append(another_concat_node) else: - if not self._analyze_concat_node_recursively( - quantized_conv_nodes, another_concat_node): + if not self._analyze_concat_node_recursively(quantized_conv_nodes, another_concat_node): can_rerange = False break elif concat_input_node_op_type == "QuantizedConcatV2": - if not self._analyze_concat_node_recursively( - quantized_conv_nodes, concat_input_node): + if not self._analyze_concat_node_recursively(quantized_conv_nodes, concat_input_node): can_rerange = False break elif self.performance_only and concat_input_node_op_type == "QuantizeV2": @@ -126,10 +122,11 @@ def _analyze_concat_node_recursively(self, quantized_conv_nodes, break return can_rerange - elif op_type == "QuantizedConv2DWithBiasAndReluAndRequantize" or \ - (input_node.op == "_FusedQuantizedConv2D" and \ - 'fused_ops' in input_node.attr and \ - input_node.attr['fused_ops'].list.s in self.fuse_requantized_relu_op_new_api): + elif op_type == "QuantizedConv2DWithBiasAndReluAndRequantize" or ( + input_node.op == "_FusedQuantizedConv2D" + and "fused_ops" in input_node.attr + and input_node.attr["fused_ops"].list.s in self.fuse_requantized_relu_op_new_api + ): can_rerange = True quantized_conv_nodes.append(input_node) return can_rerange @@ -142,8 +139,7 @@ def do_transformation(self): if node.op != "QuantizedConcatV2": continue quantized_conv_nodes = [] - can_rerange = self._analyze_concat_node_recursively( - quantized_conv_nodes, node) + can_rerange = self._analyze_concat_node_recursively(quantized_conv_nodes, node) if not can_rerange: continue @@ -152,21 +148,21 @@ def do_transformation(self): combined_max = -combined_min for node in quantized_conv_nodes: - - offset_value = 6 - if node.op == "_FusedQuantizedConv2D" and \ - 'fused_ops' in node.attr and \ - str(node.attr['fused_ops'].list.s) in self.offset_map_new_api: - offset_value = self.offset_map_new_api[str(node.attr['fused_ops'].list.s)] + offset_value = 6 + if ( + node.op == "_FusedQuantizedConv2D" + and "fused_ops" in node.attr + and str(node.attr["fused_ops"].list.s) in self.offset_map_new_api + ): + offset_value = self.offset_map_new_api[str(node.attr["fused_ops"].list.s)] elif self.performance_only and node.op == "QuantizeV2": offset_value = 1 else: offset_value = self.offset_map[node.op] min_value_node = self.node_mapping[node.input[offset_value]] - max_value_node = self.node_mapping[node.input[offset_value + - 1]] - min_value = min_value_node.attr['value'].tensor.float_val[0] - max_value = max_value_node.attr['value'].tensor.float_val[0] + max_value_node = self.node_mapping[node.input[offset_value + 1]] + min_value = min_value_node.attr["value"].tensor.float_val[0] + max_value = max_value_node.attr["value"].tensor.float_val[0] if min_value < combined_min: combined_min = min_value if max_value > combined_max: @@ -179,28 +175,31 @@ def do_transformation(self): for node in quantized_conv_nodes: offset_value = 6 - if node.op == "_FusedQuantizedConv2D" and \ - 'fused_ops' in node.attr and \ - str(node.attr['fused_ops'].list.s) in self.offset_map_new_api: - offset_value = self.offset_map_new_api[str(node.attr['fused_ops'].list.s)] + if ( + node.op == "_FusedQuantizedConv2D" + and "fused_ops" in node.attr + and str(node.attr["fused_ops"].list.s) in self.offset_map_new_api + ): + offset_value = self.offset_map_new_api[str(node.attr["fused_ops"].list.s)] elif self.performance_only and node.op == "QuantizeV2": offset_value = 1 else: offset_value = self.offset_map[node.op] min_value_node = self.node_mapping[node.input[offset_value]] - max_value_node = self.node_mapping[node.input[offset_value + - 1]] + max_value_node = self.node_mapping[node.input[offset_value + 1]] min_value_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - float(combined_min), dtypes.float32, []))) + tensor=tensor_util.make_tensor_proto(float(combined_min), dtypes.float32, []) + ) + ) max_value_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - float(combined_max), dtypes.float32, []))) - if self.device == 'cpu': + tensor=tensor_util.make_tensor_proto(float(combined_max), dtypes.float32, []) + ) + ) + if self.device == "cpu": self._update_bias() return self.input_graph @@ -209,105 +208,106 @@ def _update_bias(self): for node_name in self.node_mapping: current_node = self.node_mapping[node_name] current_node_op = current_node.op - if (current_node_op in self.fused_requantized_bias_op) or \ - (current_node_op == "_FusedQuantizedConv2D" and \ - 'fused_ops' in current_node.attr and \ - current_node.attr['fused_ops'].list.s in self.fuse_requantized_bias_op_new_api): + if (current_node_op in self.fused_requantized_bias_op) or ( + current_node_op == "_FusedQuantizedConv2D" + and "fused_ops" in current_node.attr + and current_node.attr["fused_ops"].list.s in self.fuse_requantized_bias_op_new_api + ): done = False another_conv_node = None original_conv_node = current_node while not done: - current_node = self.node_mapping[ - self.get_node_name_from_input(current_node.input[0])] + current_node = self.node_mapping[self.get_node_name_from_input(current_node.input[0])] if current_node.op in self.offset_map: another_conv_node = current_node done = True - elif current_node.op == "_FusedQuantizedConv2D" and \ - 'fused_ops' in current_node.attr and \ - str(current_node.attr['fused_ops'].list.s) in self.offset_map_new_api: + elif ( + current_node.op == "_FusedQuantizedConv2D" + and "fused_ops" in current_node.attr + and str(current_node.attr["fused_ops"].list.s) in self.offset_map_new_api + ): another_conv_node = current_node done = True elif current_node.op == "QuantizedConcatV2": if current_node.name not in self.rerange_concat_node: done = True - elif current_node.op not in ("QuantizedMaxPool", - "QuantizedAvgPool"): + elif current_node.op not in ("QuantizedMaxPool", "QuantizedAvgPool"): done = True if not another_conv_node: continue - bias_node = self.node_mapping[self.get_node_name_from_input( - original_conv_node.input[2])] - bias_node_type = original_conv_node.attr['Tbias'] + bias_node = self.node_mapping[self.get_node_name_from_input(original_conv_node.input[2])] + bias_node_type = original_conv_node.attr["Tbias"] if bias_node_type.type != dtypes.float32 or bias_node_type.type == dtypes.qint32: continue - sum_off_set = 0 + sum_off_set = 0 if original_conv_node.op == "_FusedQuantizedConv2D": - if str(original_conv_node.attr['fused_ops'].list.s) == \ - str([b'BiasAdd', b'Sum', b'Relu', b'Requantize']) \ - or str(original_conv_node.attr['fused_ops'].list.s) == \ - str([b'BiasAdd', b'Sum', b'Requantize']): + if str(original_conv_node.attr["fused_ops"].list.s) == str( + [b"BiasAdd", b"Sum", b"Relu", b"Requantize"] + ) or str(original_conv_node.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"Requantize"]): sum_off_set = 1 - #else: + # else: # print(str(original_conv_node.attr['fused_ops'].list.s)) - min_filter_node = self.node_mapping[original_conv_node.input[5+sum_off_set]] - max_filter_node = self.node_mapping[original_conv_node.input[6+sum_off_set]] + min_filter_node = self.node_mapping[original_conv_node.input[5 + sum_off_set]] + max_filter_node = self.node_mapping[original_conv_node.input[6 + sum_off_set]] - channel_size = 1 if not min_filter_node.attr[ - 'value'].tensor.tensor_shape.dim else min_filter_node.attr[ - 'value'].tensor.tensor_shape.dim[0].size + channel_size = ( + 1 + if not min_filter_node.attr["value"].tensor.tensor_shape.dim + else min_filter_node.attr["value"].tensor.tensor_shape.dim[0].size + ) if channel_size == 1: max_filter_tensor = [] min_filter_tensor = [] - max_filter_tensor.append( - (max_filter_node.attr['value'].tensor.float_val)[0]) - min_filter_tensor.append( - (min_filter_node.attr['value'].tensor.float_val)[0]) + max_filter_tensor.append((max_filter_node.attr["value"].tensor.float_val)[0]) + min_filter_tensor.append((min_filter_node.attr["value"].tensor.float_val)[0]) else: - max_filter_tensor = tensor_util.MakeNdarray( - max_filter_node.attr['value'].tensor) - min_filter_tensor = tensor_util.MakeNdarray( - min_filter_node.attr['value'].tensor) - + max_filter_tensor = tensor_util.MakeNdarray(max_filter_node.attr["value"].tensor) + min_filter_tensor = tensor_util.MakeNdarray(min_filter_node.attr["value"].tensor) + offset_value = 6 - if another_conv_node.op == "_FusedQuantizedConv2D" and \ - 'fused_ops' in another_conv_node.attr and \ - str(another_conv_node.attr['fused_ops'].list.s) in self.offset_map_new_api: - offset_value = self.offset_map_new_api[str(another_conv_node.attr['fused_ops'].list.s)] + if ( + another_conv_node.op == "_FusedQuantizedConv2D" + and "fused_ops" in another_conv_node.attr + and str(another_conv_node.attr["fused_ops"].list.s) in self.offset_map_new_api + ): + offset_value = self.offset_map_new_api[str(another_conv_node.attr["fused_ops"].list.s)] else: offset_value = self.offset_map[another_conv_node.op] - min_freezed_output_node = self.node_mapping[ - another_conv_node.input[offset_value]] - max_freezed_output_node = self.node_mapping[ - another_conv_node.input[offset_value + 1]] - min_input = min_freezed_output_node.attr['value'].tensor.float_val[0] - max_input = max_freezed_output_node.attr['value'].tensor.float_val[0] + min_freezed_output_node = self.node_mapping[another_conv_node.input[offset_value]] + max_freezed_output_node = self.node_mapping[another_conv_node.input[offset_value + 1]] + min_input = min_freezed_output_node.attr["value"].tensor.float_val[0] + max_input = max_freezed_output_node.attr["value"].tensor.float_val[0] # To avoid generating int32 bias exception for corner case if min_input == 0 and max_input == 0: continue - bias_tensor = (tensor_util.MakeNdarray(bias_node.attr['value'].tensor)) + bias_tensor = tensor_util.MakeNdarray(bias_node.attr["value"].tensor) - activation_range = 127.0 if current_node.attr['out_type'].type == dtypes.qint8 \ - else 255.0 + activation_range = 127.0 if current_node.attr["out_type"].type == dtypes.qint8 else 255.0 int32_bias = Helper.generate_int32_bias_for_conv( - bias_tensor, channel_size, max_input, min_input, - max_filter_tensor, min_filter_tensor, activation_range) + bias_tensor, + channel_size, + max_input, + min_input, + max_filter_tensor, + min_filter_tensor, + activation_range, + ) - original_conv_node.attr['Tbias'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum)) - bias_node.attr['dtype'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum)) + original_conv_node.attr["Tbias"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum)) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom( + bias_node.attr["value"].CopyFrom( attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - int32_bias, dtypes.int32, bias_tensor.shape))) - bias_node.attr['value'].tensor.dtype = dtypes.qint32.as_datatype_enum - if 'Thost_inputs' in original_conv_node.attr: - original_conv_node.attr['Thost_inputs'].list.type[2] = original_conv_node.attr['Tbias'].type + tensor=tensor_util.make_tensor_proto(int32_bias, dtypes.int32, bias_tensor.shape) + ) + ) + bias_node.attr["value"].tensor.dtype = dtypes.qint32.as_datatype_enum + if "Thost_inputs" in original_conv_node.attr: + original_conv_node.attr["Thost_inputs"].list.type[2] = original_conv_node.attr["Tbias"].type diff --git a/neural_compressor/adaptor/tf_utils/util.py b/neural_compressor/adaptor/tf_utils/util.py index 9d7145a6d01..06122d55fc6 100644 --- a/neural_compressor/adaptor/tf_utils/util.py +++ b/neural_compressor/adaptor/tf_utils/util.py @@ -17,53 +17,63 @@ # """Tensorflow Utils Helper functions.""" -from collections import OrderedDict, UserDict import os +from collections import OrderedDict, UserDict + import numpy as np -from google.protobuf import text_format import tensorflow as tf -from tensorflow.core.framework import graph_pb2 +from google.protobuf import text_format +from pkg_resources import parse_version +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 from tensorflow.python.platform import gfile -from tensorflow.core.framework import node_def_pb2 -from tensorflow.core.framework import attr_value_pb2 + from neural_compressor.utils import logger -from .graph_util import GraphAnalyzer -from .graph_util import GraphRewriterHelper -from pkg_resources import parse_version -TF_SPR_BASE_VERSIONS = ('2.11.0202242', '2.11.0202250', '2.11.0202317', '2.11.0202323') +from .graph_util import GraphAnalyzer, GraphRewriterHelper + +TF_SPR_BASE_VERSIONS = ("2.11.0202242", "2.11.0202250", "2.11.0202317", "2.11.0202323") + def version1_lt_version2(version1, version2): """Check if version1 is less than version2.""" return parse_version(version1) < parse_version(version2) + def version1_gt_version2(version1, version2): """Check if version1 is greater than version2.""" return parse_version(version1) > parse_version(version2) + def version1_eq_version2(version1, version2): """Check if version1 is equal to version2.""" return parse_version(version1) == parse_version(version2) + def version1_gte_version2(version1, version2): """Check if version1 is greater than or equal to version2.""" return parse_version(version1) > parse_version(version2) or parse_version(version1) == parse_version(version2) + def version1_lte_version2(version1, version2): """Check if version1 is less than or equal to version2.""" return parse_version(version1) < parse_version(version2) or parse_version(version1) == parse_version(version2) + def disable_random(seed=1): """A Decorator to disable tf random seed.""" + def decorator(func): def wrapper(*args, **kw): tf.compat.v1.disable_eager_execution() tf.compat.v1.reset_default_graph() tf.compat.v1.set_random_seed(seed) return func(*args, **kw) + return wrapper + return decorator + def read_graph(in_graph, in_graph_is_binary=True): """Reads input graph file as GraphDef. @@ -71,7 +81,7 @@ def read_graph(in_graph, in_graph_is_binary=True): :param in_graph_is_binary: whether input graph is binary, default True. :return: input graphDef. """ - assert gfile.Exists(in_graph), 'Input graph pb file %s does not exist.' % in_graph + assert gfile.Exists(in_graph), "Input graph pb file %s does not exist." % in_graph input_graph_def = graph_pb2.GraphDef() mode = "rb" if in_graph_is_binary else "r" @@ -84,6 +94,7 @@ def read_graph(in_graph, in_graph_is_binary=True): return input_graph_def + def write_graph(out_graph_def, out_graph_file): """Write output graphDef to file. @@ -91,14 +102,13 @@ def write_graph(out_graph_def, out_graph_file): :param out_graph_file: path to output graph file. :return: None. """ - assert isinstance( - out_graph_def, - tf.compat.v1.GraphDef), 'out_graph_def is not instance of TensorFlow GraphDef.' + assert isinstance(out_graph_def, tf.compat.v1.GraphDef), "out_graph_def is not instance of TensorFlow GraphDef." - assert out_graph_file and os.path.exists(os.path.dirname( - out_graph_file)), '"output_graph" directory does not exists.' + assert out_graph_file and os.path.exists( + os.path.dirname(out_graph_file) + ), '"output_graph" directory does not exists.' - f = gfile.GFile(out_graph_file, 'wb') + f = gfile.GFile(out_graph_file, "wb") f.write(out_graph_def.SerializeToString()) @@ -112,10 +122,11 @@ def is_ckpt_format(model_path): string: return the ckpt prefix if the model_path contains ckpt format data else None. """ file_list = [os.path.splitext(i)[-1] for i in os.listdir(model_path)] - if file_list.count('.meta') == 1 and file_list.count('.index') == 1: + if file_list.count(".meta") == 1 and file_list.count(".index") == 1: return True return False + def _parse_ckpt_bn_input(graph_def): """Parse ckpt batch norm inputs to match correct moving mean and variance. @@ -125,28 +136,29 @@ def _parse_ckpt_bn_input(graph_def): graph_def: well linked graph_def """ for node in graph_def.node: - if node.op == 'FusedBatchNorm': + if node.op == "FusedBatchNorm": moving_mean_op_name = node.input[3] moving_var_op_name = node.input[4] moving_mean_op = _get_nodes_from_name(moving_mean_op_name, graph_def)[0] moving_var_op = _get_nodes_from_name(moving_var_op_name, graph_def)[0] - if moving_mean_op.op == 'Const': - name_part = moving_mean_op_name.rsplit('/', 1)[0] - real_moving_mean_op_name = name_part + '/moving_mean' + if moving_mean_op.op == "Const": + name_part = moving_mean_op_name.rsplit("/", 1)[0] + real_moving_mean_op_name = name_part + "/moving_mean" if len(_get_nodes_from_name(real_moving_mean_op_name, graph_def)) > 0: # replace the real moving mean op name node.input[3] = real_moving_mean_op_name - if moving_var_op.op == 'Const': - name_part = moving_var_op_name.rsplit('/', 1)[0] - real_moving_var_op_name = name_part + '/moving_variance' + if moving_var_op.op == "Const": + name_part = moving_var_op_name.rsplit("/", 1)[0] + real_moving_var_op_name = name_part + "/moving_variance" if len(_get_nodes_from_name(real_moving_var_op_name, graph_def)) > 0: # replace the real moving mean op name node.input[4] = real_moving_var_op_name return graph_def + def _get_nodes_from_name(node_name, graph_def): """Get nodes from graph_def using node name. @@ -159,6 +171,7 @@ def _get_nodes_from_name(node_name, graph_def): """ return [node for node in graph_def.node if node.name == node_name] + def is_saved_model_format(model_path): """Check the model_path format is saved_model or not. @@ -170,7 +183,8 @@ def is_saved_model_format(model_path): """ file_list = [os.path.splitext(i)[-1] for i in os.listdir(model_path)] # TF 2.11.0 added a new fingerprint.pb to the SavedModel directory. - return bool(file_list.count('.pb') in [1, 2, 3] and ('variables') in os.listdir(model_path)) + return bool(file_list.count(".pb") in [1, 2, 3] and ("variables") in os.listdir(model_path)) + def get_estimator_graph(estimator, input_fn): """Get the graph of the estimator. @@ -183,14 +197,14 @@ def get_estimator_graph(estimator, input_fn): graph """ with tf.Graph().as_default() as g: - features, input_hooks = estimator._get_features_from_input_fn( - input_fn, tf.estimator.ModeKeys.PREDICT) - estimator_spec = estimator._call_model_fn(features, None, - tf.estimator.ModeKeys.PREDICT, estimator.config) - - outputs = [tensor.name for tensor in estimator_spec.predictions.values()] if\ - isinstance(estimator_spec.predictions, dict) else \ - [estimator_spec.predictions.name] + features, input_hooks = estimator._get_features_from_input_fn(input_fn, tf.estimator.ModeKeys.PREDICT) + estimator_spec = estimator._call_model_fn(features, None, tf.estimator.ModeKeys.PREDICT, estimator.config) + + outputs = ( + [tensor.name for tensor in estimator_spec.predictions.values()] + if isinstance(estimator_spec.predictions, dict) + else [estimator_spec.predictions.name] + ) logger.info("Estimator output tensor names is {}.".format(outputs)) with tf.compat.v1.Session(graph=g) as sess: sess.run(tf.compat.v1.global_variables_initializer()) @@ -199,18 +213,18 @@ def get_estimator_graph(estimator, input_fn): # dictionary # When a model uses Iterator, we need to have 'MakeIterator' (default # name used by TF) in the output_node_names as well. - output_nodes = list(set([output.split(':')[0] for output in outputs])) - if 'MakeIterator' in [node.op for node in g.as_graph_def().node]: - output_nodes.append('MakeIterator') + output_nodes = list(set([output.split(":")[0] for output in outputs])) + if "MakeIterator" in [node.op for node in g.as_graph_def().node]: + output_nodes.append("MakeIterator") - graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, - g.as_graph_def(), output_nodes) + graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, g.as_graph_def(), output_nodes) graph = tf.Graph() with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def get_tensor_by_name(graph, name, try_cnt=3): """Get the tensor by name. @@ -225,14 +239,15 @@ def get_tensor_by_name(graph, name, try_cnt=3): Returns: tensor: tensor got by name. """ - if name.find(':') == -1: - name = name + ':0' + if name.find(":") == -1: + name = name + ":0" for _ in range(try_cnt): try: return graph.get_tensor_by_name(name) except BaseException: - name = 'import/' + name - raise ValueError('can not find tensor by name') + name = "import/" + name + raise ValueError("can not find tensor by name") + def iterator_sess_run(sess, iter_op, feed_dict, output_tensor, iteration=-1, measurer=None): """Run the graph that have iterator integrated in the graph. @@ -250,7 +265,7 @@ def iterator_sess_run(sess, iter_op, feed_dict, output_tensor, iteration=-1, mea sess.run(iter_op, feed_dict) preds = [] idx = 0 - while idx+1 != iteration: + while idx + 1 != iteration: try: if measurer: measurer.start() @@ -266,6 +281,7 @@ def iterator_sess_run(sess, iter_op, feed_dict, output_tensor, iteration=-1, mea preds = collate_tf_preds(preds) return preds + def collate_tf_preds(results): """Collate tbe prediction results.""" batch = results[0] @@ -282,6 +298,7 @@ def collate_tf_preds(results): return collate_results + def get_input_output_node_names(graph_def): """Get the input node name and output node name of the graph_def.""" g = GraphAnalyzer() @@ -289,29 +306,30 @@ def get_input_output_node_names(graph_def): g.parse_graph() return g.get_graph_input_output() + def fix_ref_type_of_graph_def(graph_def): """Fix ref type of the graph_def.""" # according to https://github.com/onnx/tensorflow-onnx/issues/77 for node in graph_def.node: - if node.op == 'RefSwitch': - node.op = 'Switch' + if node.op == "RefSwitch": + node.op = "Switch" for index in range(len(node.input)): - if 'moving_' in node.input[index]: - node.input[index] = node.input[index] + '/read' - elif node.op == 'AssignSub': - node.op = 'Sub' - if 'use_locking' in node.attr: - del node.attr['use_locking'] - elif node.op == 'AssignAdd': - node.op = 'Add' - if 'use_locking' in node.attr: - del node.attr['use_locking'] - elif node.op == 'Assign': - node.op = 'Identity' - if 'use_locking' in node.attr: - del node.attr['use_locking'] - if 'validate_shape' in node.attr: - del node.attr['validate_shape'] + if "moving_" in node.input[index]: + node.input[index] = node.input[index] + "/read" + elif node.op == "AssignSub": + node.op = "Sub" + if "use_locking" in node.attr: + del node.attr["use_locking"] + elif node.op == "AssignAdd": + node.op = "Add" + if "use_locking" in node.attr: + del node.attr["use_locking"] + elif node.op == "Assign": + node.op = "Identity" + if "use_locking" in node.attr: + del node.attr["use_locking"] + if "validate_shape" in node.attr: + del node.attr["validate_shape"] if len(node.input) == 2: # input0: ref: Should be from a Variable node. May be uninitialized. # input1: value: The value to be assigned to the variable. @@ -319,6 +337,7 @@ def fix_ref_type_of_graph_def(graph_def): del node.input[1] return graph_def + def strip_unused_nodes(graph_def, input_node_names, output_node_names): """Strip unused nodes of the graph_def. @@ -330,15 +349,15 @@ def strip_unused_nodes(graph_def, input_node_names, output_node_names): graph_info = cur_graph.parse_graph() type_attr = {"Sub": "T", "RealDiv": "T", "Identity": "T"} # this op should not be stripped for table initialization - if 'init_all_tables' in graph_info.keys(): - output_node_names.append('init_all_tables') + if "init_all_tables" in graph_info.keys(): + output_node_names.append("init_all_tables") not_found = {name for name in input_node_names} for node_name in list(graph_info.keys()): if node_name in not_found: not_found.remove(node_name) node = graph_info[node_name].node # skip the convertion to Placeholder that with type list - if 'component_types' in node.attr: + if "component_types" in node.attr: continue original_output = graph_info[node_name].outputs placeholder_node = node_def_pb2.NodeDef() @@ -346,14 +365,13 @@ def strip_unused_nodes(graph_def, input_node_names, output_node_names): placeholder_node.name = node.name if "dtype" in node.attr: - placeholder_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=node.attr["dtype"].type)) + placeholder_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=node.attr["dtype"].type)) elif node.op in type_attr.keys(): placeholder_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=node.attr[type_attr[node.op]].type)) + attr_value_pb2.AttrValue(type=node.attr[type_attr[node.op]].type) + ) else: - raise KeyError("%s op's type attribute is not found," - "you should add it to type_attr dict" % node.op) + raise KeyError("%s op's type attribute is not found," "you should add it to type_attr dict" % node.op) if "_output_shapes" in node.attr: placeholder_node.attr["_output_shapes"].CopyFrom(node.attr["_output_shapes"]) if "shape" in node.attr: @@ -363,23 +381,22 @@ def strip_unused_nodes(graph_def, input_node_names, output_node_names): cur_graph.replace_const_node(placeholder_node, [node_name], original_output) - return tf.compat.v1.graph_util.extract_sub_graph(cur_graph.dump_graph(), - output_node_names) + return tf.compat.v1.graph_util.extract_sub_graph(cur_graph.dump_graph(), output_node_names) + def strip_equivalent_nodes(graph_def, output_node_names): """Strip nodes with the same input and attr.""" stripped_graph = GraphAnalyzer() stripped_graph.graph = graph_def stripped_graph_info = stripped_graph.parse_graph() + def is_equivalent_input(input_tensor_list_1, input_tensor_list_2): if len(input_tensor_list_1) != len(input_tensor_list_2): return False const_num = 0 for input_tensor_1, input_tensor_2 in zip(input_tensor_list_1, input_tensor_list_2): - input_node_1 = \ - stripped_graph_info[GraphRewriterHelper.node_name_from_input(input_tensor_1)].node - input_node_2 = \ - stripped_graph_info[GraphRewriterHelper.node_name_from_input(input_tensor_2)].node + input_node_1 = stripped_graph_info[GraphRewriterHelper.node_name_from_input(input_tensor_1)].node + input_node_2 = stripped_graph_info[GraphRewriterHelper.node_name_from_input(input_tensor_2)].node if input_node_1.op in ["Const", "HostConst"] and input_node_2.op in ["Const", "HostConst"]: if input_node_1.attr != input_node_2.attr: return False @@ -394,60 +411,75 @@ def is_equivalent_input(input_tensor_list_1, input_tensor_list_2): replaced_nodes_type = {} stripped_graph_node_names = list(stripped_graph_info.keys()) len_nodes = len(stripped_graph_node_names) - for idx_1 in range(len_nodes-1): + for idx_1 in range(len_nodes - 1): node_name_1 = stripped_graph_node_names[idx_1] node_1 = stripped_graph_info[node_name_1].node - if node_1.op in ["Const", "HostConst", "MatMul", "TensorArrayV3"] \ - or node_name_1 in nodes_to_remove: + if node_1.op in ["Const", "HostConst", "MatMul", "TensorArrayV3"] or node_name_1 in nodes_to_remove: continue - for idx_2 in range(idx_1+1, len_nodes): + for idx_2 in range(idx_1 + 1, len_nodes): node_name_2 = stripped_graph_node_names[idx_2] node_2 = stripped_graph_info[node_name_2].node - if node_1.op == node_2.op \ - and node_name_1 != node_name_2 \ - and node_2 not in nodes_to_remove \ - and node_1.input \ - and is_equivalent_input(node_1.input, node_2.input) \ - and node_1.attr == node_2.attr: + if ( + node_1.op == node_2.op + and node_name_1 != node_name_2 + and node_2 not in nodes_to_remove + and node_1.input + and is_equivalent_input(node_1.input, node_2.input) + and node_1.attr == node_2.attr + ): for ouput_node_name in stripped_graph_info[node_name_2].outputs: output_node = stripped_graph_info[ouput_node_name].node for idx_output_node_input, output_node_input_name in enumerate(output_node.input): - if GraphRewriterHelper.node_name_from_input(output_node_input_name) == \ - node_name_2: + if GraphRewriterHelper.node_name_from_input(output_node_input_name) == node_name_2: new_input = output_node_input_name.replace(node_name_2, node_name_1) output_node.input[idx_output_node_input] = new_input - logger.debug("Replacing {} node '{}' with equivalent node '{}': " \ - "set {} node '{}'.input[{}] = '{}'" \ - .format(node_1.op, node_name_2, node_name_1, output_node.op, - output_node.name, idx_output_node_input, new_input)) + logger.debug( + "Replacing {} node '{}' with equivalent node '{}': " + "set {} node '{}'.input[{}] = '{}'".format( + node_1.op, + node_name_2, + node_name_1, + output_node.op, + output_node.name, + idx_output_node_input, + new_input, + ) + ) replaced_nodes_type[node_1.op] = replaced_nodes_type.get(node_1.op, 0) + 1 nodes_to_remove.append(node_name_2) for node_to_remove in nodes_to_remove: stripped_graph.remove_node(node_to_remove) - return tf.compat.v1.graph_util.extract_sub_graph \ - (stripped_graph.dump_graph(), list(set(stripped_graph_node_names).intersection(output_node_names))), \ - replaced_nodes_type + return ( + tf.compat.v1.graph_util.extract_sub_graph( + stripped_graph.dump_graph(), list(set(stripped_graph_node_names).intersection(output_node_names)) + ), + replaced_nodes_type, + ) + # THIS API IS TO BE DEPRECATED! def get_graph_def(model, outputs=[], auto_input_output=False): """Get the model's graph_def.""" from neural_compressor.model import Model as NCModel + if not isinstance(model, NCModel): model = NCModel(model) model.output_tensor_names = outputs return model.graph_def + def get_model_input_shape(model): """Get the inout shape of the input model.""" for node in model.graph_def.node: - if node.op == 'Placeholder': - _shape = list(tf.compat.v1.TensorShape(node.attr['shape'].shape)) - if tf.__version__ < '2.0.0': + if node.op == "Placeholder": + _shape = list(tf.compat.v1.TensorShape(node.attr["shape"].shape)) + if tf.__version__ < "2.0.0": _shape = [item.value for item in _shape] if len(_shape) > 1 and isinstance(_shape[0], int): return _shape[0] return 1 + def get_tensor_val_from_graph_node(graph_node_name_mapping, node_name): """Get the tensor value for given node name. @@ -457,35 +489,39 @@ def get_tensor_val_from_graph_node(graph_node_name_mapping, node_name): Returns: tensor_val: numpy array - """ from tensorflow.python.framework import tensor_util + node = graph_node_name_mapping[node_name] - node_tensor = node.attr['value'].tensor + node_tensor = node.attr["value"].tensor tensor_val = tensor_util.MakeNdarray(node_tensor) return tensor_val + def int8_node_name_reverse(node): """Reverse int8 node name.""" - int8_postfix = '_eightbit' + int8_postfix = "_eightbit" node_name = node.name - if 'Quantized' in node.op: + if "Quantized" in node.op: index_postfix = node_name.find(int8_postfix) if index_postfix != -1: node_name = node_name[:index_postfix] return node_name + def tf_diagnosis_helper(fp32_model, quan_model, tune_cfg, save_path): """Tensorflow diagnosis helper function.""" - from ...utils.utility import dump_data_to_local import tensorflow as tf + + from ...utils.utility import dump_data_to_local + fp32_node_mapping = {} qnode_mapping = {} for node in fp32_model.graph_def.node: fp32_node_mapping[node.name] = node for node in quan_model.graph_def.node: qnode_mapping[node.name] = node - supported_op_lst = set(['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool', 'DepthwiseConv2dNative']) + supported_op_lst = set(["Conv2D", "MatMul", "ConcatV2", "MaxPool", "AvgPool", "DepthwiseConv2dNative"]) fp32_node_lst = set() for node in fp32_model.graph_def.node: if node.op in supported_op_lst: @@ -495,39 +531,40 @@ def tf_diagnosis_helper(fp32_model, quan_model, tune_cfg, save_path): for node in quan_model.graph_def.node: node_name = node.name node_name = int8_node_name_reverse(node) - if 'Quantized' in node.op: + if "Quantized" in node.op: int8_node_lst.add(node_name) - elif node.attr['value'].tensor.dtype == tf.dtypes.bfloat16.as_datatype_enum: # pragma: no cover + elif node.attr["value"].tensor.dtype == tf.dtypes.bfloat16.as_datatype_enum: # pragma: no cover bf16_node_lst.add(node.name) else: continue inspect_node_lst = fp32_node_lst.intersection(bf16_node_lst.union(int8_node_lst)) activation_min_max, updated_cfg = _parse_config(quan_model.q_config, tune_cfg, inspect_node_lst) - dump_data_to_local(activation_min_max, save_path, 'activation_min_max.pkl') - dump_data_to_local(updated_cfg, save_path, 'cfg.pkl') + dump_data_to_local(activation_min_max, save_path, "activation_min_max.pkl") + dump_data_to_local(updated_cfg, save_path, "cfg.pkl") return inspect_node_lst, updated_cfg + def _parse_config(q_config, cfg, op_list): """Parse q_config and get dequantize min max value.""" activation_min_max = {} - if '__requant_min_max' in q_config: - for node_name, val in q_config['__requant_min_max'].items(): - node_name = node_name.split('_eightbit_requant_range')[0] + if "__requant_min_max" in q_config: + for node_name, val in q_config["__requant_min_max"].items(): + node_name = node_name.split("_eightbit_requant_range")[0] if node_name in op_list: - activation_min_max[node_name] = {'min': val[0], 'max': val[1]} - updated_cfg = {'op' : {}} - for op_name_and_type in cfg['op'].keys(): + activation_min_max[node_name] = {"min": val[0], "max": val[1]} + updated_cfg = {"op": {}} + for op_name_and_type in cfg["op"].keys(): if op_name_and_type[0] in op_list: - updated_cfg['op'][op_name_and_type] = cfg['op'][op_name_and_type] + updated_cfg["op"][op_name_and_type] = cfg["op"][op_name_and_type] return activation_min_max, updated_cfg + def generate_feed_dict(input_tensor, inputs): """Generate feed dict helper function.""" if len(input_tensor) == 1: feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -538,11 +575,9 @@ def generate_feed_dict(input_tensor, inputs): else: feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] else: - assert len(input_tensor) == len(inputs), \ - 'inputs len must equal with input_tensor' + assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" feed_dict = {} - if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) \ - or isinstance(inputs, UserDict): + if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): for name in inputs: for tensor in input_tensor: pos = tensor.name.rfind(":") @@ -555,9 +590,7 @@ def generate_feed_dict(input_tensor, inputs): # we should check and pair them def check_shape(tensor, data): # scalar or 1 dim default True - if tensor.shape == None or \ - len(tensor.shape.dims) == 1 or \ - not hasattr(data, 'shape'): + if tensor.shape is None or len(tensor.shape.dims) == 1 or not hasattr(data, "shape"): return True tensor_shape = tuple(tensor.shape) data_shape = tuple(data.shape) @@ -567,21 +600,22 @@ def check_shape(tensor, data): return True disorder_tensors = [] - disorder_inputs = [] + disorder_inputs = [] for idx, sort_tensor in enumerate(input_tensor): - sort_input = inputs[idx] + sort_input = inputs[idx] if check_shape(sort_tensor, sort_input): - feed_dict.update({sort_tensor: sort_input}) + feed_dict.update({sort_tensor: sort_input}) else: disorder_tensors.append(sort_tensor) disorder_inputs.append(sort_input) for i, dis_tensor in enumerate(disorder_tensors): - for j, dis_input in enumerate(disorder_inputs): + for j, dis_input in enumerate(disorder_inputs): if check_shape(dis_tensor, dis_input): - feed_dict.update({dis_tensor: dis_input}) + feed_dict.update({dis_tensor: dis_input}) break return feed_dict + def get_weight_from_input_tensor(model, input_tensor_names, op_types): """Extracts weight tensors and their associated nodes from a smooth quant node's input tensor. @@ -605,6 +639,7 @@ def get_weight_from_input_tensor(model, input_tensor_names, op_types): sq_weights_nodes = {} from tensorflow.python.framework import tensor_util + for name in input_tensor_names: # Use dict rather than list to fix the QKV/VQK misorder issue curr_weight_tensors = {} diff --git a/neural_compressor/adaptor/torch_utils/awq.py b/neural_compressor/adaptor/torch_utils/awq.py index a59f461c623..26089981419 100644 --- a/neural_compressor/adaptor/torch_utils/awq.py +++ b/neural_compressor/adaptor/torch_utils/awq.py @@ -1,21 +1,38 @@ -import torch +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import copy +from functools import partial + +import torch + from neural_compressor.adaptor.torch_utils.util import ( - fetch_module, - get_example_input, + fetch_module, get_absorb_layers, - get_module_input_output, + get_block_prefix, + get_example_input, get_hidden_states, - get_block_prefix + get_module_input_output, ) -from .model_wrapper import MulLinear + from ...utils import logger +from .model_wrapper import MulLinear from .smooth_quant import model_forward, set_module -from functools import partial def _get_absorb_per_block(model, example_inputs, folding=False, weight_config={}): - """Get absorbed layer per block. + """Get absorbed layer per block. Args: model (torch.nn.Module): input model @@ -24,23 +41,20 @@ def _get_absorb_per_block(model, example_inputs, folding=False, weight_config={} Returns: block_absorb_dict: dict of absorbed layer per block. eg. {0, [[absorbed_1, xx], [xx]], ...} """ - block_absorb_dict = {} # record absorbed layer per block - absorb_layer_dict = {} # record absorb layers for absorbed layers + block_absorb_dict = {} # record absorbed layer per block + absorb_layer_dict = {} # record absorb layers for absorbed layers absorb_to_layer, no_absorb_layers = get_absorb_layers( - model, example_inputs, - supported_layers=['Linear'], folding=False + model, example_inputs, supported_layers=["Linear"], folding=False ) logger.debug(f"The no absorb layers: {no_absorb_layers}") # skip ops when algorithm is not AWQ skip_op_set = set() for k, v in absorb_to_layer.items(): for vv in v: - if vv in weight_config and (weight_config[vv]['algorithm'] != 'AWQ' or \ - weight_config[vv]['bits'] == -1): + if vv in weight_config and (weight_config[vv]["algorithm"] != "AWQ" or weight_config[vv]["bits"] == -1): skip_op_set.add(k) for k in no_absorb_layers: - if k in weight_config and (weight_config[k]['algorithm'] != 'AWQ' or \ - weight_config[k]['bits'] == -1): + if k in weight_config and (weight_config[k]["algorithm"] != "AWQ" or weight_config[k]["bits"] == -1): skip_op_set.add(k) for k in skip_op_set: if k in absorb_to_layer: @@ -53,9 +67,9 @@ def _get_absorb_per_block(model, example_inputs, folding=False, weight_config={} block_prefix, block_num = get_block_prefix(model) for i in range(block_num): block_absorb_dict[i] = [] - block_name = block_prefix + '.' + str(i) + '.' + block_name = block_prefix + "." + str(i) + "." for k, v in absorb_to_layer.items(): - name_list =tuple(vv for vv in v if block_name in vv) + name_list = tuple(vv for vv in v if block_name in vv) if len(name_list) > 0: block_absorb_dict[i].append(name_list) absorb_layer_dict[name_list] = k @@ -90,14 +104,26 @@ def _get_act_scale(input_val): class ActAwareWeightQuant: """Implementation of Activation-aware Weight quantization (AWQ) algo.""" - def __init__(self, model, example_inputs=None, calib_func=None, dataloader=None, n_samples=128, - data_type='int', bits=4, group_size=32, scheme='asym', sym_full_range=False, - weight_config={},): + + def __init__( + self, + model, + example_inputs=None, + calib_func=None, + dataloader=None, + n_samples=128, + data_type="int", + bits=4, + group_size=32, + scheme="asym", + sym_full_range=False, + weight_config={}, + ): self.example_inputs = example_inputs if example_inputs is None: assert dataloader is not None, "datalaoder or example_inputs is required." self.example_inputs = get_example_input(dataloader) - # Step 1: get hidden states and kwargs of first block. + # Step 1: get hidden states and kwargs of first block. self.total_block_args, self.total_block_kwargs = get_hidden_states( model, dataloader=dataloader, n_samples=n_samples, calib_func=calib_func ) @@ -118,9 +144,9 @@ def quantize(self, auto_scale=True, mse_range=True, folding=False, return_int=Fa Args: auto_scale (bool, optional): whether search scale. Defaults to True. mse_range (bool, optional): whether search clip range. Defaults to True. - folding (bool, optional): whether only allow update scale when it can be fold + folding (bool, optional): whether only allow update scale when it can be fold to upper layer. Defaults to False. - return_int (bool, optional): whether return int dtype with WeightOnlyLinear. + return_int (bool, optional): whether return int dtype with WeightOnlyLinear. Defaults to False. Returns: @@ -130,29 +156,32 @@ def quantize(self, auto_scale=True, mse_range=True, folding=False, return_int=Fa # block_absorb_dict is split per block, includes all absorb relationship. # absorb_layer_dict is the inverse of block_absorb_dict for all blocks self.block_absorb_dict, self.absorb_layer_dict = _get_absorb_per_block( - self.model, self.example_inputs, + self.model, + self.example_inputs, # for only mse_range, folding is useless. - folding = folding if auto_scale else False, + folding=folding if auto_scale else False, weight_config=self.weight_config, ) # process per block for i, module_list in self.block_absorb_dict.items(): logger.info(f"Processing block: {i+1}/{self.block_num}") if len(module_list) == 0: - logger.info(f"No need to process this block.") + logger.info("No need to process this block.") continue # Step 1: fetch all input values of each linear for scale calculation # use the first linear for QKV tuple - block_name = self.block_prefix + '.' + str(i) + block_name = self.block_prefix + "." + str(i) block = fetch_module(self.model, block_name) - module_hook_config = { - v[0].split(block_name + '.')[1]: ['input'] for v in module_list - } + module_hook_config = {v[0].split(block_name + ".")[1]: ["input"] for v in module_list} + def block_calibration(model): for args, kwargs in zip(self.total_block_args, self.total_block_kwargs): model(*args, **kwargs) + input_values = get_module_input_output( - block, module_hook_config, calib_func=block_calibration, + block, + module_hook_config, + calib_func=block_calibration, ) # Step 3: search best scale for linears in one block and apply it if auto_scale: @@ -177,7 +206,7 @@ def search_scale(self, block, block_name, module_list, input_values): Args: block (torch.nn.Module): a block of model block_name (str): the block name in model. - module_list (dict): contains all linear tuple in current block, + module_list (dict): contains all linear tuple in current block, linears in the same tuple shares scale. input_values (dict): contains all input values of linears in current block @@ -185,30 +214,33 @@ def search_scale(self, block, block_name, module_list, input_values): scale_info: a dict that contains input scales of linears in current block """ from .weight_only import quant_weight + scale_info = {} logger.info("Searching best scales with AWQ algorithm") for module_tuple in module_list: # Step 1: Initailize quantization configuration. if module_tuple[0] in self.weight_config: - cur_dtype = self.weight_config[module_tuple[0]]['dtype'] - cur_bits = self.weight_config[module_tuple[0]]['bits'] - cur_group_size = self.weight_config[module_tuple[0]]['group_size'] - cur_scheme = self.weight_config[module_tuple[0]]['scheme'] + cur_dtype = self.weight_config[module_tuple[0]]["dtype"] + cur_bits = self.weight_config[module_tuple[0]]["bits"] + cur_group_size = self.weight_config[module_tuple[0]]["group_size"] + cur_scheme = self.weight_config[module_tuple[0]]["scheme"] else: - cur_dtype, cur_bits, cur_group_size, cur_scheme = \ - self.data_type, self.bits, self.group_size, self.scheme + cur_dtype, cur_bits, cur_group_size, cur_scheme = ( + self.data_type, + self.bits, + self.group_size, + self.scheme, + ) if cur_bits < 0: continue logger.info(f"[SCALE] Processing module: {module_tuple}") # Step 2: update module name in block - module_name_list = [i.split(block_name + '.')[1] for i in module_tuple] + module_name_list = [i.split(block_name + ".")[1] for i in module_tuple] # Step 3: collect w_max and x_max for scale calculation. - weight = torch.cat( - [fetch_module(block, _m).weight for _m in module_name_list], dim=0 - ) + weight = torch.cat([fetch_module(block, _m).weight for _m in module_name_list], dim=0) w_max = _get_weight_scale(weight, q_group_size=cur_group_size) del weight - input_val = input_values[module_name_list[0]]['input'] + input_val = input_values[module_name_list[0]]["input"] x_max = _get_act_scale(input_val) absorbed_modules = {_m: fetch_module(block, _m) for _m in module_name_list} # Step 4: collect origin output for MSE and state_dict for recover. @@ -220,7 +252,7 @@ def search_scale(self, block, block_name, module_list, input_values): module = absorbed_modules[module_name_list[0]] org_out = self.module_inference(module, input_val) # Step 5: collect origin output for MSE and state_dict for recover. - best_error = float('inf') + best_error = float("inf") best_scales = None best_scale_alpha = None n_grid = 20 @@ -228,16 +260,15 @@ def search_scale(self, block, block_name, module_list, input_values): # Step 6: set different alpha for scale and compare the MSE loss. for ratio in range(n_grid): ratio = ratio * 1 / n_grid - scales = (x_max.pow(ratio) / w_max.pow(1-ratio) - ).clamp(min=1e-4).view(-1) + scales = (x_max.pow(ratio) / w_max.pow(1 - ratio)).clamp(min=1e-4).view(-1) scales = scales / (scales.max() * scales.min()).sqrt() for name, module in absorbed_modules.items(): module.weight.data = module.weight.data.mul(scales.view(1, -1)) module.weight.data = quant_weight( module.weight.data, data_type=cur_dtype, - num_bits=cur_bits, - group_size=cur_group_size, + num_bits=cur_bits, + group_size=cur_group_size, scheme=cur_scheme, full_range=self.sym_full_range, ) / scales.view(1, -1) @@ -292,7 +323,7 @@ def apply_scale(self, scale_info): else: absorb_module.weight.div_(scale.view(-1, 1)) # hasattr is for LlamaRMSNorm - if hasattr(absorb_module, 'bias') and absorb_module.bias is not None: + if hasattr(absorb_module, "bias") and absorb_module.bias is not None: absorb_module.bias.div_(scale.view(-1)) for name in module_tuple: absorbed_module = fetch_module(self.model, name) @@ -303,25 +334,30 @@ def search_clip(self, block_name, module_list, input_values): Args: block_name (str): block name in model. - module_list (dict): contains all linear tuple in current block, + module_list (dict): contains all linear tuple in current block, linears in the same tuple shares scale. input_values (dict): contains all input values of linears in current block """ from .weight_only import quant_weight + logger.info("Searching the best clip range with AWQ algorithm") for module_tuple in module_list: - input_val = input_values[module_tuple[0].split(block_name + '.')[1]]['input'] + input_val = input_values[module_tuple[0].split(block_name + ".")[1]]["input"] # process linear modules one by one for module_name in module_tuple: # Step 1: Initailize quantization configuration. if module_name in self.weight_config: - cur_dtype = self.weight_config[module_name]['dtype'] - cur_bits = self.weight_config[module_name]['bits'] - cur_group_size = self.weight_config[module_name]['group_size'] - cur_scheme = self.weight_config[module_name]['scheme'] + cur_dtype = self.weight_config[module_name]["dtype"] + cur_bits = self.weight_config[module_name]["bits"] + cur_group_size = self.weight_config[module_name]["group_size"] + cur_scheme = self.weight_config[module_name]["scheme"] else: - cur_dtype, cur_bits, cur_group_size, cur_scheme = \ - self.data_type, self.bits, self.group_size, self.scheme + cur_dtype, cur_bits, cur_group_size, cur_scheme = ( + self.data_type, + self.bits, + self.group_size, + self.scheme, + ) if cur_bits < 0: continue logger.info(f"[CLIP] Processing module: {module_name}") @@ -332,19 +368,19 @@ def search_clip(self, block_name, module_list, input_values): org_out = self.module_inference(module, input_val) # Step 4: set different clip range for weight and compare the MSE loss. logger.info("Searching the best clip range with AWQ algorithm") - best_error = float('inf') + best_error = float("inf") best_clip_ratio = None n_grid = 100 max_shrink = 0.1 history = [] for i_s in range(int(max_shrink * n_grid)): - ratio = (1 - i_s / n_grid) # 1, 0.91-1.0 + ratio = 1 - i_s / n_grid # 1, 0.91-1.0 # MulLinear can also work with @weight.setter module.weight.data = quant_weight( module.weight.data, data_type=cur_dtype, - num_bits=cur_bits, - group_size=cur_group_size, + num_bits=cur_bits, + group_size=cur_group_size, scheme=cur_scheme, full_range=self.sym_full_range, quantile=ratio, @@ -362,13 +398,13 @@ def search_clip(self, block_name, module_list, input_values): logger.debug("The loss history of different clip range:{}".format(history)) if module_name not in self.weight_config: self.weight_config[module_name] = { - 'bits': cur_bits, - 'group_size': cur_group_size, - 'scheme': cur_scheme + "bits": cur_bits, + "group_size": cur_group_size, + "scheme": cur_scheme, } - self.weight_config[module_name]['quantile'] = best_clip_ratio + self.weight_config[module_name]["quantile"] = best_clip_ratio if isinstance(module, MulLinear): - self.weight_config[module_name+'.linear'] = self.weight_config[module_name] + self.weight_config[module_name + ".linear"] = self.weight_config[module_name] self.weight_config.pop(module_name) logger.debug("The best clip ratio for {}:{}".format(module_name, best_clip_ratio)) @@ -376,18 +412,19 @@ def apply_quantize_with_clip(self, return_int=False): """Quantize model with clip range. Args: - return_int (bool, optional): whether return int dtype with WeightOnlyLinear. + return_int (bool, optional): whether return int dtype with WeightOnlyLinear. Defaults to False. """ # apply quantization and clip logger.info("Quantizing the AWQ optimized fp32 model") from .weight_only import rtn_quantize + self.model = rtn_quantize( - self.model, - num_bits=self.bits, + self.model, + num_bits=self.bits, group_size=self.group_size, scheme=self.scheme, - weight_config=self.weight_config, + weight_config=self.weight_config, return_int=return_int, sym_full_range=self.sym_full_range, ) @@ -402,8 +439,8 @@ def update_block_input(self, input_list): for i, inp in enumerate(input_list): if len(self.total_block_args[i]) > 0: self.total_block_args[i][0] = inp - elif 'hidden_states' in self.total_block_kwargs[i]: - self.total_block_kwargs[i]['hidden_states'] = inp + elif "hidden_states" in self.total_block_kwargs[i]: + self.total_block_kwargs[i]["hidden_states"] = inp else: # pragma: no cover assert False, "cannot find hidden_states position for next block" diff --git a/neural_compressor/adaptor/torch_utils/bf16_convert.py b/neural_compressor/adaptor/torch_utils/bf16_convert.py index 8a7c5f47d67..1c3b9ce789e 100644 --- a/neural_compressor/adaptor/torch_utils/bf16_convert.py +++ b/neural_compressor/adaptor/torch_utils/bf16_convert.py @@ -17,15 +17,18 @@ """Bf16 Convert for Torch Utils.""" import torch import torch.nn as nn -from ...utils import logger from torch.fx import symbolic_trace +from ...utils import logger + + class BF16ModuleWrapper(nn.Module): """BF16Module Wrapper Class.""" + def __init__(self, module): """Init a BF16ModuleWrapper object.""" super(BF16ModuleWrapper, self).__init__() - self.add_module('module', module) + self.add_module("module", module) self.train(module.training) def forward(self, X): @@ -35,29 +38,30 @@ def forward(self, X): X = self.module(X) return X.float() + def Convert(model, tune_cfg): - """Convert to bf16 model. + """Convert to bf16 model. - Args: - model (object): the input model. - tune_cfg (dict): dictionary of quantization configuration. + Args: + model (object): the input model. + tune_cfg (dict): dictionary of quantization configuration. + + Returns: + mixed_precision_model (object): model with mixed precision. + """ + bf16_ops_list = tune_cfg["bf16_ops_list"] + fx_sub_module_list = tune_cfg["fx_sub_module_list"] if "fx_sub_module_list" in tune_cfg.keys() else [] + if len(bf16_ops_list) > 0: + logger.info("Convert operators to bfloat16") + mixed_precision_model = _bf16_wrapper_model(model, bf16_ops_list) + if fx_sub_module_list is not None and len(fx_sub_module_list) > 0: + mixed_precision_model = bf16_symbolic_trace(mixed_precision_model, fx_sub_module_list) + return mixed_precision_model - Returns: - mixed_precision_model (object): model with mixed precision. - """ - bf16_ops_list = tune_cfg['bf16_ops_list'] - fx_sub_module_list = tune_cfg['fx_sub_module_list'] \ - if 'fx_sub_module_list' in tune_cfg.keys() else [] - if len(bf16_ops_list) > 0: - logger.info("Convert operators to bfloat16") - mixed_precision_model = _bf16_wrapper_model(model, bf16_ops_list) - if fx_sub_module_list is not None and len(fx_sub_module_list) > 0: - mixed_precision_model = bf16_symbolic_trace(mixed_precision_model, fx_sub_module_list) - return mixed_precision_model -def _bf16_wrapper_model(model, bf16_ops_list, prefix=''): +def _bf16_wrapper_model(model, bf16_ops_list, prefix=""): for name, child in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name for bf16_op_name in bf16_ops_list: if op_name == bf16_op_name[0]: child = BF16ModuleWrapper(child) @@ -67,7 +71,7 @@ def _bf16_wrapper_model(model, bf16_ops_list, prefix=''): return model -def bf16_symbolic_trace(model, fx_sub_module_list, prefix=''): +def bf16_symbolic_trace(model, fx_sub_module_list, prefix=""): """Symbolic trace for bf16 models. Args: @@ -79,11 +83,11 @@ def bf16_symbolic_trace(model, fx_sub_module_list, prefix=''): model (object) """ for name, child in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name for fx_sub_module_name in fx_sub_module_list: if op_name == fx_sub_module_name: child = symbolic_trace(child) else: bf16_symbolic_trace(child, fx_sub_module_list, op_name) setattr(model, name, child) - return model \ No newline at end of file + return model diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py index ba33954be40..4c8597cfd3c 100644 --- a/neural_compressor/adaptor/torch_utils/gptq.py +++ b/neural_compressor/adaptor/torch_utils/gptq.py @@ -1,4 +1,3 @@ - #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -17,25 +16,27 @@ # limitations under the License. import math -import time +import random import re +import time +from collections import UserDict, defaultdict +from functools import partial + import torch import torch.nn as nn import transformers from tqdm import tqdm -from functools import partial + from ...utils import logger -import random -from collections import UserDict, defaultdict -DEBUG = False +DEBUG = False + # ================ device related =================== -def move_input_to_device(input, device=torch.device('cpu')): +def move_input_to_device(input, device=torch.device("cpu")): if isinstance(input, dict) or isinstance(input, UserDict): for inp in input.keys(): - input[inp] = input[inp].to(device) \ - if isinstance(input[inp], torch.Tensor) else input[inp] + input[inp] = input[inp].to(device) if isinstance(input[inp], torch.Tensor) else input[inp] elif isinstance(input, list) or isinstance(input, tuple): input_res, prev_size = [], None for inp in input: @@ -47,14 +48,14 @@ def move_input_to_device(input, device=torch.device('cpu')): if torch.tensor(inp).size == prev_size: input_res.append(inp) else: - input_res.append(inp.to(device) \ - if isinstance(inp, torch.Tensor) else inp) + input_res.append(inp.to(device) if isinstance(inp, torch.Tensor) else inp) prev_size = torch.tensor(inp).size() input = input_res else: input = input.to(device) # pylint: disable=no-member return input + # ==============model structure related============== def is_leaf(module): """Judge whether a module has no child-modules. @@ -70,7 +71,8 @@ def is_leaf(module): children_cnt += 1 return True if children_cnt == 0 else False -def trace_gptq_target_blocks(module, module_types = [torch.nn.ModuleList]): + +def trace_gptq_target_blocks(module, module_types=[torch.nn.ModuleList]): """Search transformer stacked structures, which is critical in LLMs and GPTQ execution. Args: @@ -79,7 +81,7 @@ def trace_gptq_target_blocks(module, module_types = [torch.nn.ModuleList]): Returns: gptq_related_blocks = { - "embeddings": {}, # Dict embedding layers before transfromer stack module, + "embeddings": {}, # Dict embedding layers before transfromer stack module, "transformers_pre": {}, # TODO "transformers_name": string. LLMs' transformer stack module name , "transformers": torch.nn.ModuleList. LLMs' transformer stack module, @@ -88,10 +90,10 @@ def trace_gptq_target_blocks(module, module_types = [torch.nn.ModuleList]): """ gptq_related_blocks = { "embeddings": {}, - "transformers_pre": {}, # todo - "transformers_name": "", # None - "transformers": [], # None - "transformers_post": {}, # todo + "transformers_pre": {}, # todo + "transformers_name": "", # None + "transformers": [], # None + "transformers_post": {}, # todo } for n, m in module.named_modules(): if type(m) in module_types: @@ -103,50 +105,51 @@ def trace_gptq_target_blocks(module, module_types = [torch.nn.ModuleList]): gptq_related_blocks["embeddings"][n] = m return gptq_related_blocks -def find_layers(module, layers=[nn.Conv2d, nn.Conv1d, nn.Linear, transformers.Conv1D], name=''): + +def find_layers(module, layers=[nn.Conv2d, nn.Conv1d, nn.Linear, transformers.Conv1D], name=""): """Get all layers with target types.""" if type(module) in layers: return {name: module} else: # use string type to find name: - if type(module).__name__ in ['Linear']: + if type(module).__name__ in ["Linear"]: return {name: module} else: pass res = {} for name1, child in module.named_children(): - res.update(find_layers( - child, layers=layers, name=name + '.' + name1 if name != '' else name1 - )) + res.update(find_layers(child, layers=layers, name=name + "." + name1 if name != "" else name1)) return res -def find_layers_name(module, layers=[nn.Conv2d, nn.Conv1d, nn.Linear, transformers.Conv1D], name=''): + +def find_layers_name(module, layers=[nn.Conv2d, nn.Conv1d, nn.Linear, transformers.Conv1D], name=""): """Get all layers with target types.""" if type(module) in layers: return [name] res = [] for name1, child in module.named_children(): - res += find_layers_name(child, layers=layers, name = name + '.' + name1 if name != '' else name1) + res += find_layers_name(child, layers=layers, name=name + "." + name1 if name != "" else name1) return res + def log_quantizable_layers_per_transformer( - transformer_blocks, - layers=[nn.Conv2d, nn.Conv1d, nn.Linear, transformers.Conv1D] - ): + transformer_blocks, layers=[nn.Conv2d, nn.Conv1d, nn.Linear, transformers.Conv1D] +): """Print all layers which will be quantized in GPTQ algorithm.""" logger.info("* * Layer to be quantized * *") - for block_id in range(len(transformer_blocks['transformers'])): - transformer_block = transformer_blocks['transformers'][block_id] + for block_id in range(len(transformer_blocks["transformers"])): + transformer_block = transformer_blocks["transformers"][block_id] layers_for_this_tblock = find_layers_name(transformer_block) layer_names = [ - (transformer_blocks['transformers_name'] + "." + str(block_id) + '.' + layer_name) \ + (transformer_blocks["transformers_name"] + "." + str(block_id) + "." + layer_name) for layer_name in layers_for_this_tblock ] for name in layer_names: logger.info(name) -#===============quantization related============================ + +# ===============quantization related============================ def quantize(x, scale, zero, maxq): """Do quantization.""" if maxq < 0: @@ -154,31 +157,25 @@ def quantize(x, scale, zero, maxq): q = torch.clamp(torch.round(x / scale) + zero, 0, maxq) return scale * (q - zero) + class GPTQuantizer(object): """Main API for GPTQ algorithm. - Please refer to: + + Please refer to: GPTQ: Accurate Post-training Compression for Generative Pretrained Transformers url: https://arxiv.org/abs/2210.17323 """ - - def __init__( - self, - model, - weight_config={}, - dataloader=None, - nsamples = 128, - use_max_length = True, - device=None - ): + + def __init__(self, model, weight_config={}, dataloader=None, nsamples=128, use_max_length=True, device=None): """ Args: model: the fp32 model to quantize - weight_config (dict, optional): contains all info required by GPTQ. Defaults to {}. For example, + weight_config (dict, optional): contains all info required by GPTQ. Defaults to {}. For example, weight_config={ 'layer1': { - 'bits': 4, - 'group_size': 32, + 'bits': 4, + 'group_size': 32, 'sym': False, 'percdamp': .01, 'act_order': False @@ -191,7 +188,7 @@ def __init__( # model self.model = model self.use_cache = self.model.config.use_cache - self.gptq_related_blocks = trace_gptq_target_blocks(self.model) # get the transformer block list above + self.gptq_related_blocks = trace_gptq_target_blocks(self.model) # get the transformer block list above self.dtype = next(iter(self.model.parameters())).dtype log_quantizable_layers_per_transformer(self.gptq_related_blocks) @@ -221,7 +218,7 @@ def __init__( def prepare_dataloader(self): if self.use_max_length: - # (Recommend) only take sequence whose length exceeds model.seqlen, + # (Recommend) only take sequence whose length exceeds model.seqlen, # which perserves calibration's tokens are all valid # This is GPTQ official dataloader implementation self.obtain_first_n_samples_fulllength() @@ -231,11 +228,11 @@ def prepare_dataloader(self): try: # Since length is unified, we can allocate a continous space to store inputs self.inp = torch.zeros( - (len(self.dataloader), self.model.seqlen, self.model_hidden_size), - dtype=self.dtype, - device=self.device + (len(self.dataloader), self.model.seqlen, self.model_hidden_size), + dtype=self.dtype, + device=self.device, ) - self.cache = {'i': 0} + self.cache = {"i": 0} self.out = torch.zeros_like(self.inp) self.is_ready = True except: @@ -246,7 +243,7 @@ def prepare_dataloader(self): self.obtain_first_n_samples() try: self.inp = [torch.zeros(1) for _ in range(len(self.dataloader))] - self.cache = {'i': 0} + self.cache = {"i": 0} self.out = [torch.zeros(1) for _ in range(len(self.dataloader))] self.is_ready = True except: @@ -272,7 +269,7 @@ def obtain_first_n_samples(self, seed=0): # dict elif isinstance(batch, dict): try: - length = batch['input_ids'].shape[-1] + length = batch["input_ids"].shape[-1] except: logger.warning("Please make sure your dict'like data contains key of 'input_ids'.") continue @@ -283,7 +280,7 @@ def obtain_first_n_samples(self, seed=0): # may have to slice every sequence related data for key in batch.keys(): if isinstance(batch[key], torch.Tensor): - batch_final[key] = batch[key][:, i:j] # slice on sequence length dim + batch_final[key] = batch[key][:, i:j] # slice on sequence length dim else: batch_final[key] = batch[key] else: @@ -300,7 +297,7 @@ def obtain_first_n_samples(self, seed=0): if len(self.dataloader) < self.nsamples: logger.warning(f"Try to use {self.nsamples} data, but entire dataset size is {len(self.dataloader)}.") - + def obtain_first_n_samples_fulllength(self, seed=0): self.dataloader.clear() random.seed(seed) @@ -323,7 +320,7 @@ def obtain_first_n_samples_fulllength(self, seed=0): # dict elif isinstance(batch, dict): try: - length = batch['input_ids'].shape[-1] + length = batch["input_ids"].shape[-1] except: logger.warning("Please make sure your dict'like data contains key of 'input_ids'.") continue @@ -336,7 +333,7 @@ def obtain_first_n_samples_fulllength(self, seed=0): # may have to slice every sequence related data for key in batch.keys(): if isinstance(batch[key], torch.Tensor): - batch_final[key] = batch[key][:, i:j] # slice on sequence length dim with same position + batch_final[key] = batch[key][:, i:j] # slice on sequence length dim with same position else: batch_final[key] = batch[key] else: @@ -354,9 +351,11 @@ def obtain_first_n_samples_fulllength(self, seed=0): # not match max length, not include in target dataset continue self.dataloader.append(batch_final) - if len(self.dataloader) < self.nsamples: # pragma: no cover - logger.warning(f"Trying to allocate {self.nsamples} data with fixed length {unified_length}, \ - but only {len(self.dataloader)} samples satisfy your setting. You may choose smaller 'model.seqlen' value.") + if len(self.dataloader) < self.nsamples: # pragma: no cover + logger.warning( + f"Trying to allocate {self.nsamples} data with fixed length {unified_length}, \ + but only {len(self.dataloader)} samples satisfy your setting. You may choose smaller 'model.seqlen' value." + ) @torch.no_grad() def initialize_inp_buffersize(self): @@ -375,10 +374,11 @@ def forward(layer, hidden_states, **kwargs): embedding_layer = embedding_layer.to(self.device) # Step2: modify the first transformer block's forward function to obtain inputs for calibration - self.gptq_related_blocks['transformers'][0] = self.gptq_related_blocks['transformers'][0].to(self.device) - forward_cache = self.gptq_related_blocks['transformers'][0].forward - self.gptq_related_blocks['transformers'][0].forward = \ - partial(forward, self.gptq_related_blocks['transformers'][0]) + self.gptq_related_blocks["transformers"][0] = self.gptq_related_blocks["transformers"][0].to(self.device) + forward_cache = self.gptq_related_blocks["transformers"][0].forward + self.gptq_related_blocks["transformers"][0].forward = partial( + forward, self.gptq_related_blocks["transformers"][0] + ) # Step3: run forward to obtain calibration datasets logger.info("Collecting calibration inputs...") @@ -395,53 +395,53 @@ def forward(layer, hidden_states, **kwargs): break # Step 4: restore original forward function, relocate layers back to cpu. - self.gptq_related_blocks['transformers'][0].forward = forward_cache - self.gptq_related_blocks['transformers'][0] = self.gptq_related_blocks['transformers'][0].cpu() + self.gptq_related_blocks["transformers"][0].forward = forward_cache + self.gptq_related_blocks["transformers"][0] = self.gptq_related_blocks["transformers"][0].cpu() for embedding_name, embedding_layer in self.gptq_related_blocks["embeddings"].items(): embedding_layer.to(self.device) torch.cuda.empty_cache() - + def get_full_layer_name(self, sub_layer_name, block_idx): transformer_name = self.gptq_related_blocks["transformers_name"] return ".".join([transformer_name, str(block_idx), sub_layer_name]) def check_layer_config(self): """Copy arguments from weight_config to build-in attributes.""" - if 'wbits' in self.weight_config: + if "wbits" in self.weight_config: tmp_weight_config = {} for name, module in self.model.named_modules(): tmp_weight_config[name] = {} - tmp_weight_config[name]['wbits'] = self.weight_config.get('wbits', self.wbits_default) - tmp_weight_config[name]['group_size'] = self.weight_config.get('group_size', self.group_size_default) - tmp_weight_config[name]['block_size'] = self.weight_config.get('block_size', self.group_size_default) - tmp_weight_config[name]['percdamp'] = self.weight_config.get('pecdamp', self.percdamp_default) - tmp_weight_config[name]['sym'] = self.weight_config.get('sym', self.sym_default) - tmp_weight_config[name]['act_order'] = self.weight_config.get('act_order', self.act_order_default) - tmp_weight_config[name]['perchannel'] = self.weight_config.get('perchannel', self.perchannel_default) - tmp_weight_config[name]['mse'] = self.weight_config.get('mse', self.mse_default) + tmp_weight_config[name]["wbits"] = self.weight_config.get("wbits", self.wbits_default) + tmp_weight_config[name]["group_size"] = self.weight_config.get("group_size", self.group_size_default) + tmp_weight_config[name]["block_size"] = self.weight_config.get("block_size", self.group_size_default) + tmp_weight_config[name]["percdamp"] = self.weight_config.get("pecdamp", self.percdamp_default) + tmp_weight_config[name]["sym"] = self.weight_config.get("sym", self.sym_default) + tmp_weight_config[name]["act_order"] = self.weight_config.get("act_order", self.act_order_default) + tmp_weight_config[name]["perchannel"] = self.weight_config.get("perchannel", self.perchannel_default) + tmp_weight_config[name]["mse"] = self.weight_config.get("mse", self.mse_default) self.weight_config = tmp_weight_config else: for layer_name, config in self.weight_config.items(): - self.weight_config[layer_name]['wbits'] = config.get('wbits', self.wbits_default) - self.weight_config[layer_name]['group_size'] = config.get('group_size', self.group_size_default) - self.weight_config[layer_name]['block_size'] = config.get('block_size', self.group_size_default) - self.weight_config[layer_name]['percdamp'] = config.get('pecdamp', self.percdamp_default) - self.weight_config[layer_name]['sym'] = config.get('sym', self.sym_default) - self.weight_config[layer_name]['act_order'] = config.get('act_order', self.act_order_default) - self.weight_config[layer_name]['perchannel'] = config.get('perchannel', self.perchannel_default) - self.weight_config[layer_name]['mse'] = config.get('mse', self.mse_default) + self.weight_config[layer_name]["wbits"] = config.get("wbits", self.wbits_default) + self.weight_config[layer_name]["group_size"] = config.get("group_size", self.group_size_default) + self.weight_config[layer_name]["block_size"] = config.get("block_size", self.group_size_default) + self.weight_config[layer_name]["percdamp"] = config.get("pecdamp", self.percdamp_default) + self.weight_config[layer_name]["sym"] = config.get("sym", self.sym_default) + self.weight_config[layer_name]["act_order"] = config.get("act_order", self.act_order_default) + self.weight_config[layer_name]["perchannel"] = config.get("perchannel", self.perchannel_default) + self.weight_config[layer_name]["mse"] = config.get("mse", self.mse_default) def get_layer_config(self, layer_name): """Obtain config for one layer, since GPTQ supports layer-wise config.""" # First try the exact name matching, if cannot find, use re to search. For example, can support ".*" in op_name config = None config = self.weight_config.get(layer_name, None) - if config != None: + if config is not None: return config else: for k, v in self.weight_config.items(): regex = re.compile(k) - if len(regex.findall(layer_name)) != None: + if len(regex.findall(layer_name)) is not None: config = v return config else: @@ -451,11 +451,12 @@ def get_layer_config(self, layer_name): @torch.no_grad() def pre_quantization(self): """Prepare input calibration data and other attributes which are critical for gptq execution.""" + # critical: hooker function which collects inputs def forward(layer, hidden_states, **kwargs): # inputs[inputs_info['idx']] = input_ids # TODO solve the problem of batchsize!=1 - self.inp[self.cache['i']] = hidden_states - self.cache['i'] += 1 + self.inp[self.cache["i"]] = hidden_states + self.cache["i"] += 1 for arg in kwargs: # TODO: investigate include parameters if self.use_max_length: @@ -466,7 +467,7 @@ def forward(layer, hidden_states, **kwargs): else: # each outputs can be different shape, hence also use list to store if isinstance(kwargs[arg], torch.Tensor) or arg == "alibi": - if self.cache.get(arg, None) == None: + if self.cache.get(arg, None) is None: self.cache[arg] = [] self.cache[arg].append(kwargs[arg]) continue @@ -477,10 +478,11 @@ def forward(layer, hidden_states, **kwargs): embedding_layer = embedding_layer.to(self.device) # Step2: modify the first transformer block's forward function to obtain inputs for calibration - self.gptq_related_blocks['transformers'][0] = self.gptq_related_blocks['transformers'][0].to(self.device) - forward_cache = self.gptq_related_blocks['transformers'][0].forward - self.gptq_related_blocks['transformers'][0].forward = \ - partial(forward, self.gptq_related_blocks['transformers'][0]) + self.gptq_related_blocks["transformers"][0] = self.gptq_related_blocks["transformers"][0].to(self.device) + forward_cache = self.gptq_related_blocks["transformers"][0].forward + self.gptq_related_blocks["transformers"][0].forward = partial( + forward, self.gptq_related_blocks["transformers"][0] + ) # Step3: run forward to obtain calibration datasets logger.info("Collecting calibration inputs...") @@ -502,13 +504,13 @@ def forward(layer, hidden_states, **kwargs): logger.info("Done.") # Step 4: restore original forward function, relocate layers back to cpu. - self.gptq_related_blocks['transformers'][0].forward = forward_cache - self.gptq_related_blocks['transformers'][0] = self.gptq_related_blocks['transformers'][0].cpu() + self.gptq_related_blocks["transformers"][0].forward = forward_cache + self.gptq_related_blocks["transformers"][0] = self.gptq_related_blocks["transformers"][0].cpu() for embedding_name, embedding_layer in self.gptq_related_blocks["embeddings"].items(): embedding_layer.to(self.device) torch.cuda.empty_cache() # end - logger.info('GPTQ quantization prepared.') + logger.info("GPTQ quantization prepared.") def gather_single_batch_from_dict(self, data_dict, idx): single_batch = {} @@ -524,10 +526,10 @@ def execute_quantization(self, means=None, stds=None): self.pre_quantization() # Step2: run gptq quantization in a transformer block-wise manner. gptq_config = {} - tblock_length = len(self.gptq_related_blocks['transformers']) + tblock_length = len(self.gptq_related_blocks["transformers"]) for block_idx in range(tblock_length): logger.info(f"Quantizing layer {block_idx + 1} / {tblock_length}..") - transformer_block = self.gptq_related_blocks['transformers'][block_idx].to(self.device) + transformer_block = self.gptq_related_blocks["transformers"][block_idx].to(self.device) # Step2.1: obtain all layers (Linear, Conv2d, etc) in the block which can be quantized. sub_layers = find_layers(transformer_block) sub_layers_to_quant = {} @@ -535,7 +537,7 @@ def execute_quantization(self, means=None, stds=None): # filter sub_layers with included layer_names in self.weight_config full_layer_name = self.get_full_layer_name(layer_name, block_idx) # if self.weight_config.get(full_layer_name, None) == None: - if self.get_layer_config(full_layer_name) == None: + if self.get_layer_config(full_layer_name) is None: logger.warning(f"{full_layer_name} can be quantized " + "but excluded from quantization configs.") else: sub_layers_to_quant[layer_name] = layer_obj @@ -548,26 +550,27 @@ def execute_quantization(self, means=None, stds=None): # weight_config_this_layer = self.weight_config.get( # self.get_full_layer_name(layer_name, block_idx), None # ) - weight_config_this_layer = self.get_layer_config( - self.get_full_layer_name(layer_name, block_idx) - ) + weight_config_this_layer = self.get_layer_config(self.get_full_layer_name(layer_name, block_idx)) gptq_for_this_block[layer_name] = GPTQ(sub_layers[layer_name]) - #gptq_for_this_block[layer_name].quantizer = Quantizer() + # gptq_for_this_block[layer_name].quantizer = Quantizer() gptq_for_this_block[layer_name].quantizer.configure( - weight_config_this_layer['wbits'], - weight_config_this_layer['perchannel'], - weight_config_this_layer['sym'], - weight_config_this_layer['mse'], + weight_config_this_layer["wbits"], + weight_config_this_layer["perchannel"], + weight_config_this_layer["sym"], + weight_config_this_layer["mse"], ) + # Step 2.3: modify forward functions to hook inputs data (used in gptq execution) def add_batch(_name): def tmp(_, inp, out): gptq_for_this_block[_name].add_batch(inp[0].data, out.data) # noqa: F821 + return tmp - handles = [] # register handles which add inputs and outputs to gptq object + + handles = [] # register handles which add inputs and outputs to gptq object for layer_name in sub_layers: handles.append(sub_layers[layer_name].register_forward_hook(add_batch(layer_name))) - idx = self.cache.pop('i') + idx = self.cache.pop("i") for j in range(len(self.dataloader)): if self.use_max_length: # self.inp[j] shape: [seq_len, hidden_size] @@ -576,7 +579,7 @@ def tmp(_, inp, out): # self.inp[j] shape: [1, seq_len, hidden_size] (batchsize is 1 by default) cache_batch = self.gather_single_batch_from_dict(self.cache, j) self.out[j] = transformer_block(self.inp[j], **cache_batch)[0] - self.cache['i'] = idx + self.cache["i"] = idx for h in handles: h.remove() # Step 2.4: everything is prepared, so start quantization! @@ -584,26 +587,25 @@ def tmp(_, inp, out): # weight_config_this_layer = self.weight_config.get( # self.get_full_layer_name(layer_name, block_idx), None # ) - weight_config_this_layer = self.get_layer_config( - self.get_full_layer_name(layer_name, block_idx) - ) + weight_config_this_layer = self.get_layer_config(self.get_full_layer_name(layer_name, block_idx)) logger.info(f"Quantizing layer {layer_name}") scale, zp = gptq_for_this_block[layer_name].fasterquant( - blocksize = weight_config_this_layer['block_size'], - percdamp = weight_config_this_layer['percdamp'], - groupsize = weight_config_this_layer['group_size'], - act_order = weight_config_this_layer['act_order'], + blocksize=weight_config_this_layer["block_size"], + percdamp=weight_config_this_layer["percdamp"], + groupsize=weight_config_this_layer["group_size"], + act_order=weight_config_this_layer["act_order"], ) - gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {'scale': scale} - if not weight_config_this_layer['sym']: - gptq_config[self.get_full_layer_name(layer_name, block_idx)]['zero'] = zp - if weight_config_this_layer['act_order']: # save perm for restoring the weights - gptq_config[self.get_full_layer_name(layer_name, block_idx)]['perm'] = \ - gptq_for_this_block[layer_name].perm + gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale} + if not weight_config_this_layer["sym"]: + gptq_config[self.get_full_layer_name(layer_name, block_idx)]["zero"] = zp + if weight_config_this_layer["act_order"]: # save perm for restoring the weights + gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[ + layer_name + ].perm gptq_for_this_block[layer_name].free() - + # Step 2.5: replace output data with quantized weights - idx = self.cache.pop('i') + idx = self.cache.pop("i") for j in range(len(self.dataloader)): if self.use_max_length: # self.inp[j] shape: [seq_len, hidden_size] @@ -612,14 +614,14 @@ def tmp(_, inp, out): # self.inp[j] shape: [1, seq_len, hidden_size] (batchsize is 1 by default) cache_batch = self.gather_single_batch_from_dict(self.cache, j) self.out[j] = transformer_block(self.inp[j], **cache_batch)[0] - self.cache['i'] = idx - self.gptq_related_blocks['transformers'][block_idx] = transformer_block.cpu() + self.cache["i"] = idx + self.gptq_related_blocks["transformers"][block_idx] = transformer_block.cpu() del gptq_for_this_block torch.cuda.empty_cache() # iteratively replace the input with output, thus layerwise quantization can continue. self.inp, self.out = self.out, self.inp - logger.info('------------------------------') - + logger.info("------------------------------") + logger.info("Quantization done") self.model.config.use_cache = self.use_cache @@ -629,11 +631,13 @@ def tmp(_, inp, out): gptq_config[k][m] = n.tolist() return self.model, gptq_config + class GPTQ: """ - Please refer to: + Please refer to: GPTQ: Accurate Post-training Compression for Generative Pretrained Transformers (https://arxiv.org/abs/2210.17323) """ + def __init__(self, layer): self.layer = layer self.device = self.layer.weight.device @@ -642,12 +646,12 @@ def __init__(self, layer): W = W.flatten(1) if isinstance(self.layer, transformers.Conv1D): W = W.t() - self.rows = W.shape[0] # output channels - self.columns = W.shape[1] # input channels + self.rows = W.shape[0] # output channels + self.columns = W.shape[1] # input channels self.H = torch.zeros((self.columns, self.columns), device=self.device) self.nsamples = 0 self.quantizer = Quantizer() - self.perm = None # act_order choice + self.perm = None # act_order choice def add_batch(self, inp, out): # if DEBUG: @@ -676,9 +680,9 @@ def add_batch(self, inp, out): # inp = inp.float() inp = math.sqrt(2 / self.nsamples) * inp.float() # self.H += 2 / self.nsamples * inp.matmul(inp.t()) - self.H += inp.matmul(inp.t()) # H = X*X, which should be a sysm matrix + self.H += inp.matmul(inp.t()) # H = X*X, which should be a sysm matrix - def fasterquant(self, blocksize=128, percdamp=.01, groupsize=-1, act_order=False): + def fasterquant(self, blocksize=128, percdamp=0.01, groupsize=-1, act_order=False): W = self.layer.weight.data.clone() if isinstance(self.layer, nn.Conv2d): W = W.flatten(1) @@ -695,7 +699,7 @@ def fasterquant(self, blocksize=128, percdamp=.01, groupsize=-1, act_order=False del self.H dead = torch.diag(H) == 0 H[dead, dead] = 1 - W[:, dead] = 0 # such channel makes no contribution to quantization computation + W[:, dead] = 0 # such channel makes no contribution to quantization computation # rearrange considering the diag's value if act_order: @@ -709,7 +713,7 @@ def fasterquant(self, blocksize=128, percdamp=.01, groupsize=-1, act_order=False damp = percdamp * torch.mean(torch.diag(H)) diag = torch.arange(self.columns, device=self.device) - H[diag, diag] += damp # add a average value of + H[diag, diag] += damp # add a average value of H = torch.linalg.cholesky(H) H = torch.cholesky_inverse(H) H = torch.linalg.cholesky(H, upper=True) @@ -728,21 +732,19 @@ def fasterquant(self, blocksize=128, percdamp=.01, groupsize=-1, act_order=False Losses1 = torch.zeros_like(W1) Hinv1 = Hinv[i1:i2, i1:i2] - for i in range(count): # within a block, channel wise + for i in range(count): # within a block, channel wise w = W1[:, i] d = Hinv1[i, i] if groupsize != -1: if (i1 + i) % groupsize == 0: - self.quantizer.find_params(W[:, (i1 + i):(i1 + i + groupsize)], weight=True) + self.quantizer.find_params(W[:, (i1 + i) : (i1 + i + groupsize)], weight=True) scale.append(self.quantizer.scale) zero.append(self.quantizer.zero) - q = quantize( - w.unsqueeze(1), self.quantizer.scale, self.quantizer.zero, self.quantizer.maxq - ).flatten() + q = quantize(w.unsqueeze(1), self.quantizer.scale, self.quantizer.zero, self.quantizer.maxq).flatten() Q1[:, i] = q - Losses1[:, i] = (w - q) ** 2 / d ** 2 + Losses1[:, i] = (w - q) ** 2 / d**2 err1 = (w - q) / d W1[:, i:] -= err1.unsqueeze(1).matmul(Hinv1[i, i:].unsqueeze(0)) @@ -759,10 +761,10 @@ def fasterquant(self, blocksize=128, percdamp=.01, groupsize=-1, act_order=False # logger.info(f"{torch.sum((self.layer(self.inp1) - self.out1) ** 2)}") # logger.info(f"{torch.sum(Losses)}") - if self.device != torch.device('cpu'): + if self.device != torch.device("cpu"): torch.cuda.synchronize() - logger.info(f'time {(time.time() - tick)}') - logger.info(f'error {torch.sum(Losses).item()}') + logger.info(f"time {(time.time() - tick)}") + logger.info(f"error {torch.sum(Losses).item()}") if act_order: invperm = torch.argsort(perm) @@ -790,29 +792,24 @@ def free(self): self.Trace = None torch.cuda.empty_cache() -class Quantizer(nn.Module): +class Quantizer(nn.Module): def __init__(self, shape=1): super(Quantizer, self).__init__() - self.register_buffer('maxq', torch.tensor(0)) - self.register_buffer('scale', torch.zeros(shape)) - self.register_buffer('zero', torch.zeros(shape)) - - def configure( - self, - bits, perchannel=False, sym=True, - mse=False, norm=2.4, grid=100, maxshrink=.8, - trits=False - ): - self.maxq = torch.tensor(2 ** bits - 1) + self.register_buffer("maxq", torch.tensor(0)) + self.register_buffer("scale", torch.zeros(shape)) + self.register_buffer("zero", torch.zeros(shape)) + + def configure(self, bits, perchannel=False, sym=True, mse=False, norm=2.4, grid=100, maxshrink=0.8, trits=False): + self.maxq = torch.tensor(2**bits - 1) self.perchannel = perchannel self.sym = sym self.mse = mse self.norm = norm self.grid = grid - self.maxshrink = maxshrink + self.maxshrink = maxshrink if trits: - self.maxq = torch.tensor(-1) + self.maxq = torch.tensor(-1) def find_params(self, x, weight=False): dev = x.device @@ -847,19 +844,19 @@ def find_params(self, x, weight=False): xmax[tmp] = +1 if self.maxq < 0: - self.scale = xmax - self.zero = xmin + self.scale = xmax + self.zero = xmin else: - self.scale = (xmax - xmin) / self.maxq - if self.sym: - self.zero = torch.full_like(self.scale, (self.maxq + 1) / 2) - else: - self.zero = torch.round(-xmin / self.scale) + self.scale = (xmax - xmin) / self.maxq + if self.sym: + self.zero = torch.full_like(self.scale, (self.maxq + 1) / 2) + else: + self.zero = torch.round(-xmin / self.scale) if self.mse: - best = torch.full([x.shape[0]], float('inf'), device=dev) + best = torch.full([x.shape[0]], float("inf"), device=dev) for i in range(int(self.maxshrink * self.grid)): - p = 1 - i / self.grid + p = 1 - i / self.grid xmin1 = p * xmin xmax1 = p * xmax scale1 = (xmax1 - xmin1) / self.maxq @@ -892,7 +889,7 @@ def find_params(self, x, weight=False): self.zero = self.zero.reshape((1, -1, 1, 1)) if len(shape) == 3: self.scale = self.scale.reshape((1, 1, -1)) - self.zero = self.zero.reshape((1, 1, -1)) + self.zero = self.zero.reshape((1, 1, -1)) if len(shape) == 2: self.scale = self.scale.unsqueeze(0) self.zero = self.zero.unsqueeze(0) diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py index 8295fc99cc2..e4862ff43b3 100644 --- a/neural_compressor/adaptor/torch_utils/hawq_metric.py +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -20,17 +20,20 @@ torch = LazyImport("torch") import copy +import logging + import numpy as np import torch.nn -from torch.quantization.quantize_fx import fuse_fx import torch.nn as nn -import logging +from torch.quantization.quantize_fx import fuse_fx logger = logging.getLogger(__name__) -from typing import Dict, List, Optional, Any, Union, Callable, Set +from typing import Any, Callable, Dict, List, Optional, Set, Union + import torch import tqdm + class Node_collector: """Define Collector based on hook, which is used to record the intermediate result.""" @@ -64,7 +67,7 @@ def __init__(self, model, dataloader, q_model, criterion=None): self.unfused_model = model.model self.q_model = q_model tmp_model = model.model - if 'graph' in (str(dir(tmp_model))): # check the attribute and it's length + if "graph" in (str(dir(tmp_model))): # check the attribute and it's length logger.info("This is aready fused model") self.model = model.model else: @@ -77,9 +80,9 @@ def __init__(self, model, dataloader, q_model, criterion=None): self.index = 0 self.device = self.get_device(self.model) self.criterion = criterion - self._batch_size=dataloader.batch_size - self.params= [p for p in self.model.parameters() if p.requires_grad] - if self.criterion == None: + self._batch_size = dataloader.batch_size + self.params = [p for p in self.model.parameters() if p.requires_grad] + if self.criterion is None: self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config self.criterion = self.criterion.to(self.device) self.weight_to_op, self.op_list = self.get_fused_mapping() @@ -95,7 +98,7 @@ def is_fused_module(self, module): (bool): is fused or not """ op_type = str(type(module)) - if 'fused' in op_type: + if "fused" in op_type: return True else: return False @@ -119,10 +122,9 @@ def mse_metric_gap(self, fp32_tensor, dequantize_tensor): dequantize_max = np.max(dequantize_tensor) dequantize_min = np.min(dequantize_tensor) fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) - dequantize_tensor = (dequantize_tensor - dequantize_min) / \ - (dequantize_max - dequantize_min) + dequantize_tensor = (dequantize_tensor - dequantize_min) / (dequantize_max - dequantize_min) diff_tensor = fp32_tensor - dequantize_tensor - euclidean_dist = np.sum(diff_tensor ** 2) + euclidean_dist = np.sum(diff_tensor**2) return euclidean_dist / fp32_tensor.size def get_fused_mapping(self): @@ -139,7 +141,6 @@ def get_fused_mapping(self): if self.is_fused_module(child): for name, _ in child.named_children(): if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - weight_to_op[op_name + "." + name + ".weight"] = self._mapping_module_to_op(op_name) break else: @@ -173,7 +174,7 @@ def act_grad_hook(model, grad_input, grad_output): def _get_enable_act_grad_hook(self, name): def enable_act_grad_hook(model, inputs, outputs): input = inputs[0] - if input.requires_grad is False: # + if input.requires_grad is False: # input.requires_grad = True self.layer_acts[name] = input @@ -217,8 +218,9 @@ def reset_act_gradient_and_hooks(self): def get_params(self): """Get weight names and parameters.""" - weight_names = [n for n, p in self.model.named_parameters() if - p.requires_grad] ##remove bias and "bias" not in n + weight_names = [ + n for n, p in self.model.named_parameters() if p.requires_grad + ] ##remove bias and "bias" not in n params = [p for n, p in self.model.named_parameters() if p.requires_grad] ##keep bias self.weight_names = weight_names self.params = params @@ -236,7 +238,7 @@ def _forward_backward(self, model, data, create_graph=False, return_w_grad=True) if return_w_grad: gradients = [] for n, p in self.model.named_parameters(): - if p.grad != None and n in self.weight_names: + if p.grad is not None and n in self.weight_names: gradient = p.grad gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() @@ -256,6 +258,7 @@ def _sample_rademacher_like_params(self): def sample(parameter): r = torch.randint_like(parameter, high=2, device=self.device) return r.masked_fill_(r == 0, -1) + return [sample(p) for p in self.params] def _sample_normal_like_params(self): @@ -264,7 +267,7 @@ def _sample_normal_like_params(self): def get_vtHv_weight(self, params, num_samples): """Get vtHv weight.""" v = self._sample_rademacher(params) - H_v=self._sample_normal_like_params() + H_v = self._sample_normal_like_params() cnt = 0 for step, data in enumerate(self.dataloader): batch_size = data[0].shape[0] @@ -276,7 +279,7 @@ def get_vtHv_weight(self, params, num_samples): break if cnt > 0: H_v = [item / cnt for item in H_v] - v_t_H_v = torch.stack([torch.sum(h_v * v_t)/h_v.size().numel() for (h_v, v_t) in zip(H_v, v)]) + v_t_H_v = torch.stack([torch.sum(h_v * v_t) / h_v.size().numel() for (h_v, v_t) in zip(H_v, v)]) return v_t_H_v def get_weight_traces(self, num_samples): @@ -296,7 +299,7 @@ def get_weight_traces(self, num_samples): layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) - logger.info("diff_ratio:"+str(diff_ratio)+"|"+str(self.tolerance)) + logger.info("diff_ratio:" + str(diff_ratio) + "|" + str(self.tolerance)) if diff_ratio < self.tolerance: ##TODO magic number and iter>10 logger.info("End of hessian computation!") break @@ -340,11 +343,11 @@ def get_act_traces(self, num_samples): prev_avg_model_trace = 0 act_traces_sums = None for i in range(bs): ##force the bs to be one - input = data[0][i:i + 1] - target = data[1][i:i + 1] + input = data[0][i : i + 1] + target = data[1][i : i + 1] self._forward_backward(self.unfused_model, (input, target), create_graph=True, return_w_grad=False) acts = [self.layer_acts[key] for key in self.layer_acts.keys()] - if act_traces_sums == None: + if act_traces_sums is None: act_traces_sums = [0] * len(acts) acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts vt_H_v_sum_per_act = [0] * len(acts) @@ -355,13 +358,13 @@ def get_act_traces(self, num_samples): H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=True) vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] - vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in - enumerate(vt_H_v_sum_per_act)] + vt_H_v_sum_per_act = [ + vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in enumerate(vt_H_v_sum_per_act) + ] vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) - diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( - prev_model_act_trace + self.eps) + diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / (prev_model_act_trace + self.eps) if diff_ratio < self.tolerance and iter > 10: ##TODO magic number break # if iter == 50: ##TODO for debug @@ -461,17 +464,17 @@ def get_act_gap(self, fp32_model, q_model): act_gap[fp_i] = np.sum(activation_qnt_error) / activation_qnt_error.size return act_gap, mse_gap - def get_avg_traces(self, enable_act=True,num_sample=0): + def get_avg_traces(self, enable_act=True, num_sample=0): """Estimates average hessian trace for each parameter.""" - if num_sample==0: - num_samp=self._batch_size + if num_sample == 0: + num_samp = self._batch_size else: - num_samp=num_sample + num_samp = num_sample assert num_samp > 0 - logger.info("num_samp:"+str(num_samp)) + logger.info("num_samp:" + str(num_samp)) traces = {} weight_traces = self.get_weight_traces(num_samp) - traces['weight'] = weight_traces + traces["weight"] = weight_traces act_trace = {} if enable_act: act_gap, mse_gap = self.get_act_gap(self.model, self.q_model) @@ -479,14 +482,15 @@ def get_avg_traces(self, enable_act=True,num_sample=0): for i, j in zip(act_traces, mse_gap): # currently use mse to analysis act_trace[i] = float(act_traces[i]) + float(mse_gap[j]) # Tensor->float - traces['activation'] = act_traces + traces["activation"] = act_traces return traces ##copy from torch.quantization._numeric_suite def _find_match( - str_list: Union[Dict[str, Any], List[str]], key_str: str, - postfix: str, + str_list: Union[Dict[str, Any], List[str]], + key_str: str, + postfix: str, ) -> Optional[str]: split_str = key_str.split(".") if split_str[-1] == postfix: @@ -517,9 +521,7 @@ def _find_match( ##copy form torch.quantization._numeric_suite -def compare_weights( - float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] -) -> Dict[str, Dict[str, torch.Tensor]]: +def compare_weights(float_dict: Dict[str, Any], quantized_dict: Dict[str, Any]) -> Dict[str, Dict[str, torch.Tensor]]: r"""Compare the weights of the float module with its corresponding quantized module. Returns a dict with key corresponding to module names and each entry being @@ -576,13 +578,9 @@ def compare_weights( if float_weight_ih_key in float_dict and float_weight_hh_key in float_dict: weight_dict[key] = {} weight_dict[key]["float"] = float_dict[float_weight_ih_key] - weight_dict[key]["quantized"] = ( - quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] - ) + weight_dict[key]["quantized"] = quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] weight_dict[key]["float"] = float_dict[float_weight_hh_key] - weight_dict[key]["quantized"] = ( - quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] - ) + weight_dict[key]["quantized"] = quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] return weight_dict @@ -594,8 +592,8 @@ def hawq_top(fp32_model, q_model, dataloader, criterion, enable_act): orig_eval = False fp32_model.eval() ht = HessianTrace(fp32_model, dataloader=dataloader, q_model=q_model) - traces = ht.get_avg_traces(enable_act,num_sample=0) - op_to_traces = traces['weight'] + traces = ht.get_avg_traces(enable_act, num_sample=0) + op_to_traces = traces["weight"] q_model_state_dict = {} for key in q_model.state_dict().keys(): length = len("_model.") @@ -604,19 +602,19 @@ def hawq_top(fp32_model, q_model, dataloader, criterion, enable_act): weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) pertur_lst = {} for key in weight_quant_loss: - op_float_tensor = weight_quant_loss[key]['float'] - op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() - diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) + op_float_tensor = weight_quant_loss[key]["float"] + op_qnt_tensor = weight_quant_loss[key]["quantized"].dequantize() + diff_l2 = torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2 pertur_lst[key] = diff_l2 if enable_act: - act_to_traces = traces['activation'] + act_to_traces = traces["activation"] for trace_i, pertur_i, act_i in zip(op_to_traces.keys(), pertur_lst.keys(), act_to_traces.keys()): # Formula:Omig=Trace*L2+act_trace op_to_traces[trace_i] = pertur_lst[pertur_i] * op_to_traces[trace_i] + act_to_traces[act_i] else: for trace_i, pertur_i in zip(op_to_traces.keys(), pertur_lst.keys()): op_to_traces[trace_i] = op_to_traces[trace_i] # Formula:Omig=Trace*L2 - if orig_eval == False: + if orig_eval is False: fp32_model.train() return op_to_traces diff --git a/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py b/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py index d59e7fe6ca5..e7a013e8721 100644 --- a/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py +++ b/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Create portable serialized representations of Python objects. See module copyreg for a mechanism for registering custom picklers. @@ -37,27 +36,26 @@ __version__ format_version compatible_formats - """ -from types import FunctionType -from copyreg import dispatch_table -from copyreg import _extension_registry, _inverted_registry, _extension_cache -from itertools import islice -from functools import partial +import codecs +import io +import re import sys -from sys import maxsize +from copyreg import _extension_cache, _extension_registry, _inverted_registry, dispatch_table +from functools import partial +from itertools import islice from struct import pack, unpack -import re -import io -import codecs +from sys import maxsize +from types import FunctionType + import _compat_pickle -__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", - "Unpickler", "dump", "dumps", "load", "loads"] +__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads"] try: from _pickle import PickleBuffer + __all__.append("PickleBuffer") _HAVE_PICKLE_BUFFER = True except ImportError: @@ -68,16 +66,17 @@ bytes_types = (bytes, bytearray) # These are purely informational; no code uses these. -format_version = "4.0" # File format version we write -compatible_formats = ["1.0", # Original protocol 0 - "1.1", # Protocol 0 with INST added - "1.2", # Original protocol 1 - "1.3", # Protocol 1 with BINFLOAT added - "2.0", # Protocol 2 - "3.0", # Protocol 3 - "4.0", # Protocol 4 - "5.0", # Protocol 5 - ] # Old format versions we can read +format_version = "4.0" # File format version we write +compatible_formats = [ + "1.0", # Original protocol 0 + "1.1", # Protocol 0 with INST added + "1.2", # Original protocol 1 + "1.3", # Protocol 1 with BINFLOAT added + "2.0", # Protocol 2 + "3.0", # Protocol 3 + "4.0", # Protocol 4 + "5.0", # Protocol 5 +] # Old format versions we can read # This is the highest protocol number we know how to read. HIGHEST_PROTOCOL = 5 @@ -87,17 +86,20 @@ # includes it. DEFAULT_PROTOCOL = 4 + class PickleError(Exception): """A common base class for the other pickling exceptions.""" + pass + class PicklingError(PickleError): """This exception is raised when an unpicklable object is passed to the - dump() method. + dump() method.""" - """ pass + class UnpicklingError(PickleError): """This exception is raised when there is a problem unpickling an object, such as a security violation. @@ -105,16 +107,18 @@ class UnpicklingError(PickleError): Note that other exceptions may also be raised during unpickling, including (but not necessarily limited to) AttributeError, EOFError, ImportError, and IndexError. - """ + pass + # An instance of _Stop is raised by Unpickler.load_stop() in response to # the STOP opcode, passing the object that is the result of unpickling. class _Stop(Exception): def __init__(self, value): self.value = value + # Jython has PyStringMap; it's a dict subclass with string keys try: from org.python.core import PyStringMap @@ -214,8 +218,7 @@ def __init__(self, value): __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) -class _Framer: # pragma: no cover - +class _Framer: # pragma: no cover _FRAME_SIZE_MIN = 4 _FRAME_SIZE_TARGET = 64 * 1024 @@ -277,8 +280,7 @@ def write_large_bytes(self, header, payload): write(payload) -class _Unframer: # pragma: no cover - +class _Unframer: # pragma: no cover def __init__(self, file_read, file_readline, file_tell=None): self.file_read = file_read self.file_readline = file_readline @@ -293,8 +295,7 @@ def readinto(self, buf): buf[:] = self.file_read(n) return n if n < len(buf): - raise UnpicklingError( - "pickle exhausted before end of frame") + raise UnpicklingError("pickle exhausted before end of frame") return n else: n = len(buf) @@ -308,8 +309,7 @@ def read(self, n): self.current_frame = None return self.file_read(n) if len(data) < n: - raise UnpicklingError( - "pickle exhausted before end of frame") + raise UnpicklingError("pickle exhausted before end of frame") return data else: return self.file_read(n) @@ -320,55 +320,52 @@ def readline(self): if not data: self.current_frame = None return self.file_readline() - if data[-1] != b'\n'[0]: - raise UnpicklingError( - "pickle exhausted before end of frame") + if data[-1] != b"\n"[0]: + raise UnpicklingError("pickle exhausted before end of frame") return data else: return self.file_readline() def load_frame(self, frame_size): - if self.current_frame and self.current_frame.read() != b'': - raise UnpicklingError( - "beginning of a new frame before end of current frame") + if self.current_frame and self.current_frame.read() != b"": + raise UnpicklingError("beginning of a new frame before end of current frame") self.current_frame = io.BytesIO(self.file_read(frame_size)) # Tools used for pickling. -def _getattribute(obj, name): # pragma: no cover - for subpath in name.split('.'): - if subpath == '': - raise AttributeError("Can't get local attribute {!r} on {!r}" - .format(name, obj)) + +def _getattribute(obj, name): # pragma: no cover + for subpath in name.split("."): + if subpath == "": + raise AttributeError("Can't get local attribute {!r} on {!r}".format(name, obj)) try: parent = obj obj = getattr(obj, subpath) except AttributeError: - raise AttributeError("Can't get attribute {!r} on {!r}" - .format(name, obj)) from None + raise AttributeError("Can't get attribute {!r} on {!r}".format(name, obj)) from None return obj, parent -def whichmodule(obj, name): # pragma: no cover + +def whichmodule(obj, name): # pragma: no cover """Find the module an object belong to.""" - module_name = getattr(obj, '__module__', None) + module_name = getattr(obj, "__module__", None) if module_name is not None: return module_name # Protect the iteration by using a list copy of sys.modules against dynamic # modules that trigger imports of other modules upon calls to getattr. for module_name, module in sys.modules.copy().items(): - if (module_name == '__main__' - or module_name == '__mp_main__' # bpo-42406 - or module is None): + if module_name == "__main__" or module_name == "__mp_main__" or module is None: # bpo-42406 continue try: if _getattribute(module, name)[0] is obj: return module_name except AttributeError: pass - return '__main__' + return "__main__" + -def encode_long(x): # pragma: no cover +def encode_long(x): # pragma: no cover r"""Encode a long to a two's complement little-endian binary string. Note that 0 is a special case, returning an empty string, to save a byte in the LONG1 pickling context. @@ -390,15 +387,16 @@ def encode_long(x): # pragma: no cover >>> """ if x == 0: - return b'' + return b"" nbytes = (x.bit_length() >> 3) + 1 - result = x.to_bytes(nbytes, byteorder='little', signed=True) + result = x.to_bytes(nbytes, byteorder="little", signed=True) if x < 0 and nbytes > 1: - if result[-1] == 0xff and (result[-2] & 0x80) != 0: + if result[-1] == 0xFF and (result[-2] & 0x80) != 0: result = result[:-1] return result -def decode_long(data): # pragma: no cover + +def decode_long(data): # pragma: no cover r"""Decode a long from a two's complement little-endian binary string. >>> decode_long(b'') @@ -416,15 +414,14 @@ def decode_long(data): # pragma: no cover >>> decode_long(b"\x7f") 127 """ - return int.from_bytes(data, byteorder='little', signed=True) + return int.from_bytes(data, byteorder="little", signed=True) # Pickling machinery -class _Pickler: # pragma: no cover - def __init__(self, file, protocol=None, *, fix_imports=True, - buffer_callback=None): +class _Pickler: # pragma: no cover + def __init__(self, file, protocol=None, *, fix_imports=True, buffer_callback=None): """This takes a binary file for writing a pickle data stream. The optional *protocol* argument tells the pickler to use the @@ -495,8 +492,7 @@ def dump(self, obj): # Check whether Pickler was initialized correctly. This is # only needed to mimic the behavior of _pickle.Pickler.dump(). if not hasattr(self, "_file_write"): - raise PicklingError("Pickler.__init__() was not called by " - "%s.__init__()" % (self.__class__.__name__,)) + raise PicklingError("Pickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) if self.proto >= 2: self.write(PROTO + pack("= 4: @@ -537,7 +533,7 @@ def put(self, idx): else: return LONG_BINPUT + pack("= 2 and func_name == "__newobj_ex__": cls, args, kwargs = args if not hasattr(cls, "__new__"): - raise PicklingError("args[0] from {} args has no __new__" - .format(func_name)) + raise PicklingError("args[0] from {} args has no __new__".format(func_name)) if obj is not None and cls is not obj.__class__: - raise PicklingError("args[0] from {} args has the wrong class" - .format(func_name)) + raise PicklingError("args[0] from {} args has the wrong class".format(func_name)) if self.proto >= 4: save(cls) save(args) @@ -695,11 +685,9 @@ def save_reduce(self, func, args, state=None, listitems=None, # Python 2.2). cls = args[0] if not hasattr(cls, "__new__"): - raise PicklingError( - "args[0] from __newobj__ args has no __new__") + raise PicklingError("args[0] from __newobj__ args has no __new__") if obj is not None and cls is not obj.__class__: - raise PicklingError( - "args[0] from __newobj__ args has the wrong class") + raise PicklingError("args[0] from __newobj__ args has the wrong class") args = args[1:] save(cls) save(args) @@ -756,6 +744,7 @@ def save_reduce(self, func, args, state=None, listitems=None, def save_none(self, obj): self.write(NONE) + dispatch[type(None)] = save_none def save_bool(self, obj): @@ -763,6 +752,7 @@ def save_bool(self, obj): self.write(NEWTRUE if obj else NEWFALSE) else: self.write(TRUE if obj else FALSE) + dispatch[bool] = save_bool def save_long(self, obj): @@ -772,14 +762,14 @@ def save_long(self, obj): # case. # First one- and two-byte unsigned ints: if obj >= 0: - if obj <= 0xff: + if obj <= 0xFF: self.write(BININT1 + pack("= 2: @@ -790,37 +780,39 @@ def save_long(self, obj): else: self.write(LONG4 + pack("d', obj)) + self.write(BINFLOAT + pack(">d", obj)) else: - self.write(FLOAT + repr(obj).encode("ascii") + b'\n') + self.write(FLOAT + repr(obj).encode("ascii") + b"\n") + dispatch[float] = save_float def save_bytes(self, obj): if self.proto < 3: - if not obj: # bytes object is empty + if not obj: # bytes object is empty self.save_reduce(bytes, (), obj=obj) else: - self.save_reduce(codecs.encode, - (str(obj, 'latin1'), 'latin1'), obj=obj) + self.save_reduce(codecs.encode, (str(obj, "latin1"), "latin1"), obj=obj) return n = len(obj) - if n <= 0xff: + if n <= 0xFF: self.write(SHORT_BINBYTES + pack(" 0xffffffff and self.proto >= 4: + elif n > 0xFFFFFFFF and self.proto >= 4: self._write_large_bytes(BINBYTES8 + pack("= self.framer._FRAME_SIZE_TARGET: self._write_large_bytes(BINBYTES + pack("= 5") + raise PicklingError("PickleBuffer can only pickled with " "protocol >= 5") with obj.raw() as m: if not m.contiguous: - raise PicklingError("PickleBuffer can not be pickled when " - "pointing to a non-contiguous buffer") + raise PicklingError("PickleBuffer can not be pickled when " "pointing to a non-contiguous buffer") in_band = True if self._buffer_callback is not None: in_band = bool(self._buffer_callback(obj)) @@ -866,11 +858,11 @@ def save_picklebuffer(self, obj): def save_str(self, obj): if self.bin: - encoded = obj.encode('utf-8', 'surrogatepass') + encoded = obj.encode("utf-8", "surrogatepass") n = len(encoded) - if n <= 0xff and self.proto >= 4: + if n <= 0xFF and self.proto >= 4: self.write(SHORT_BINUNICODE + pack(" 0xffffffff and self.proto >= 4: + elif n > 0xFFFFFFFF and self.proto >= 4: self._write_large_bytes(BINUNICODE8 + pack("= self.framer._FRAME_SIZE_TARGET: self._write_large_bytes(BINUNICODE + pack("= 2: code = _extension_registry.get((module_name, name)) if code: assert code > 0 - if code <= 0xff: + if code <= 0xFF: write(EXT1 + pack("= 3. @@ -1115,8 +1105,7 @@ def save_global(self, obj, name=None): elif parent is not module: self.save_reduce(getattr, (parent, lastname)) elif self.proto >= 3: - write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + - bytes(name, "utf-8") + b'\n') + write(GLOBAL + bytes(module_name, "utf-8") + b"\n" + bytes(name, "utf-8") + b"\n") else: if self.fix_imports: r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING @@ -1126,12 +1115,11 @@ def save_global(self, obj, name=None): elif module_name in r_import_mapping: module_name = r_import_mapping[module_name] try: - write(GLOBAL + bytes(module_name, "ascii") + b'\n' + - bytes(name, "ascii") + b'\n') + write(GLOBAL + bytes(module_name, "ascii") + b"\n" + bytes(name, "ascii") + b"\n") except UnicodeEncodeError: raise PicklingError( - "can't pickle global identifier '%s.%s' using " - "pickle protocol %i" % (module, name, self.proto)) from None + "can't pickle global identifier '%s.%s' using " "pickle protocol %i" % (module, name, self.proto) + ) from None self.memoize(obj) @@ -1151,10 +1139,8 @@ def save_type(self, obj): # Unpickling machinery -class _Unpickler: # pragma: no cover - - def __init__(self, file, *, fix_imports=True, - encoding="ASCII", errors="strict", buffers=None): +class _Unpickler: # pragma: no cover + def __init__(self, file, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None): """This takes a binary file for reading a pickle data stream. The protocol version of the pickle is detected automatically, so @@ -1210,8 +1196,9 @@ def load(self, tensor_name=None): # only needed to mimic the behavior of _pickle.Unpickler.dump(). if not hasattr(self, "_file_read"): - raise UnpicklingError("Unpickler.__init__() was not called by " - "%s.__init__()" % (self.__class__.__name__,)) + raise UnpicklingError( + "Unpickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,) + ) self.tensor_name = tensor_name self._unframer = _Unframer(self._file_read, self._file_readline) self.read = self._unframer.read @@ -1250,39 +1237,45 @@ def load_proto(self): if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) self.proto = proto + dispatch[PROTO[0]] = load_proto def load_frame(self): - frame_size, = unpack(' sys.maxsize: raise ValueError("frame size > sys.maxsize: %d" % frame_size) self._unframer.load_frame(frame_size) + dispatch[FRAME[0]] = load_frame def load_persid(self): try: pid = self.readline()[:-1].decode("ascii") except UnicodeDecodeError: - raise UnpicklingError( - "persistent IDs in protocol 0 must be ASCII strings") + raise UnpicklingError("persistent IDs in protocol 0 must be ASCII strings") self.append(self.persistent_load(pid)) + dispatch[PERSID[0]] = load_persid def load_binpersid(self): pid = self.stack.pop() self.append(self.persistent_load(pid)) + dispatch[BINPERSID[0]] = load_binpersid def load_none(self): self.append(None) + dispatch[NONE[0]] = load_none def load_false(self): self.append(False) + dispatch[NEWFALSE[0]] = load_false def load_true(self): self.append(True) + dispatch[NEWTRUE[0]] = load_true def load_int(self): @@ -1294,48 +1287,57 @@ def load_int(self): else: val = int(data, 0) self.append(val) + dispatch[INT[0]] = load_int def load_binint(self): - self.append(unpack('d', self.read(8))[0]) + self.append(unpack(">d", self.read(8))[0]) + dispatch[BINFLOAT[0]] = load_binfloat def _decode_string(self, value): @@ -1350,77 +1352,80 @@ def _decode_string(self, value): def load_string(self): data = self.readline()[:-1] # Strip outermost quotes - if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'': + if len(data) >= 2 and data[0] == data[-1] and data[0] in b"\"'": data = data[1:-1] else: raise UnpicklingError("the STRING opcode argument must be quoted") self.append(self._decode_string(codecs.escape_decode(data)[0])) + dispatch[STRING[0]] = load_string def load_binstring(self): # Deprecated BINSTRING uses signed 32-bit length - len, = unpack(' maxsize: - raise UnpicklingError("BINBYTES exceeds system's maximum size " - "of %d bytes" % maxsize) + raise UnpicklingError("BINBYTES exceeds system's maximum size " "of %d bytes" % maxsize) self.append(self.read(len)) + dispatch[BINBYTES[0]] = load_binbytes def load_unicode(self): - self.append(str(self.readline()[:-1], 'raw-unicode-escape')) + self.append(str(self.readline()[:-1], "raw-unicode-escape")) + dispatch[UNICODE[0]] = load_unicode def load_binunicode(self): - len, = unpack(' maxsize: - raise UnpicklingError("BINUNICODE exceeds system's maximum size " - "of %d bytes" % maxsize) - self.append(str(self.read(len), 'utf-8', 'surrogatepass')) + raise UnpicklingError("BINUNICODE exceeds system's maximum size " "of %d bytes" % maxsize) + self.append(str(self.read(len), "utf-8", "surrogatepass")) + dispatch[BINUNICODE[0]] = load_binunicode def load_binunicode8(self): - len, = unpack(' maxsize: - raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " - "of %d bytes" % maxsize) - self.append(str(self.read(len), 'utf-8', 'surrogatepass')) + raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " "of %d bytes" % maxsize) + self.append(str(self.read(len), "utf-8", "surrogatepass")) + dispatch[BINUNICODE8[0]] = load_binunicode8 def load_binbytes8(self): - len, = unpack(' maxsize: - raise UnpicklingError("BINBYTES8 exceeds system's maximum size " - "of %d bytes" % maxsize) + raise UnpicklingError("BINBYTES8 exceeds system's maximum size " "of %d bytes" % maxsize) self.append(self.read(len)) + dispatch[BINBYTES8[0]] = load_binbytes8 def load_bytearray8(self): - len, = unpack(' maxsize: - raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size " - "of %d bytes" % maxsize) + raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size " "of %d bytes" % maxsize) b = bytearray(len) self.readinto(b) self.append(b) + dispatch[BYTEARRAY8[0]] = load_bytearray8 def load_next_buffer(self): if self._buffers is None: - raise UnpicklingError("pickle stream refers to out-of-band data " - "but no *buffers* argument was given") + raise UnpicklingError("pickle stream refers to out-of-band data " "but no *buffers* argument was given") try: buf = next(self._buffers) except StopIteration: raise UnpicklingError("not enough out-of-band buffers") self.append(buf) + dispatch[NEXT_BUFFER[0]] = load_next_buffer def load_readonly_buffer(self): @@ -1428,72 +1433,86 @@ def load_readonly_buffer(self): with memoryview(buf) as m: if not m.readonly: self.stack[-1] = m.toreadonly() + dispatch[READONLY_BUFFER[0]] = load_readonly_buffer def load_short_binstring(self): len = self.read(1)[0] data = self.read(len) self.append(self._decode_string(data)) + dispatch[SHORT_BINSTRING[0]] = load_short_binstring def load_short_binbytes(self): len = self.read(1)[0] self.append(self.read(len)) + dispatch[SHORT_BINBYTES[0]] = load_short_binbytes def load_short_binunicode(self): len = self.read(1)[0] - self.append(str(self.read(len), 'utf-8', 'surrogatepass')) + self.append(str(self.read(len), "utf-8", "surrogatepass")) + dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode def load_tuple(self): items = self.pop_mark() self.append(tuple(items)) + dispatch[TUPLE[0]] = load_tuple def load_empty_tuple(self): self.append(()) + dispatch[EMPTY_TUPLE[0]] = load_empty_tuple def load_tuple1(self): self.stack[-1] = (self.stack[-1],) + dispatch[TUPLE1[0]] = load_tuple1 def load_tuple2(self): self.stack[-2:] = [(self.stack[-2], self.stack[-1])] + dispatch[TUPLE2[0]] = load_tuple2 def load_tuple3(self): self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] + dispatch[TUPLE3[0]] = load_tuple3 def load_empty_list(self): self.append([]) + dispatch[EMPTY_LIST[0]] = load_empty_list def load_empty_dictionary(self): self.append({}) + dispatch[EMPTY_DICT[0]] = load_empty_dictionary def load_empty_set(self): self.append(set()) + dispatch[EMPTY_SET[0]] = load_empty_set def load_frozenset(self): items = self.pop_mark() self.append(frozenset(items)) + dispatch[FROZENSET[0]] = load_frozenset def load_list(self): items = self.pop_mark() self.append(items) + dispatch[LIST[0]] = load_list def load_dict(self): items = self.pop_mark() - d = {items[i]: items[i+1] - for i in range(0, len(items), 2)} + d = {items[i]: items[i + 1] for i in range(0, len(items), 2)} self.append(d) + dispatch[DICT[0]] = load_dict # INST and OBJ differ only in how they get a class object. It's not @@ -1502,13 +1521,11 @@ def load_dict(self): # klass is the class to instantiate, and k points to the topmost mark # object, following which are the arguments for klass.__init__. def _instantiate(self, klass, args): - if (args or not isinstance(klass, type) or - hasattr(klass, "__getinitargs__")): + if args or not isinstance(klass, type) or hasattr(klass, "__getinitargs__"): try: value = klass(*args) except TypeError as err: - raise TypeError("in constructor for %s: %s" % - (klass.__name__, str(err)), sys.exc_info()[2]) + raise TypeError("in constructor for %s: %s" % (klass.__name__, str(err)), sys.exc_info()[2]) else: value = klass.__new__(klass) self.append(value) @@ -1518,6 +1535,7 @@ def load_inst(self): name = self.readline()[:-1].decode("ascii") klass = self.find_class(module, name) self._instantiate(klass, self.pop_mark()) + dispatch[INST[0]] = load_inst def load_obj(self): @@ -1525,6 +1543,7 @@ def load_obj(self): args = self.pop_mark() cls = args.pop(0) self._instantiate(cls, args) + dispatch[OBJ[0]] = load_obj def load_newobj(self): @@ -1532,6 +1551,7 @@ def load_newobj(self): cls = self.stack.pop() obj = cls.__new__(cls, *args) self.append(obj) + dispatch[NEWOBJ[0]] = load_newobj def load_newobj_ex(self): @@ -1540,6 +1560,7 @@ def load_newobj_ex(self): cls = self.stack.pop() obj = cls.__new__(cls, *args, **kwargs) self.append(obj) + dispatch[NEWOBJ_EX[0]] = load_newobj_ex def load_global(self): @@ -1547,6 +1568,7 @@ def load_global(self): name = self.readline()[:-1].decode("utf-8") klass = self.find_class(module, name) self.append(klass) + dispatch[GLOBAL[0]] = load_global def load_stack_global(self): @@ -1555,21 +1577,25 @@ def load_stack_global(self): if type(name) is not str or type(module) is not str: raise UnpicklingError("STACK_GLOBAL requires str") self.append(self.find_class(module, name)) + dispatch[STACK_GLOBAL[0]] = load_stack_global def load_ext1(self): code = self.read(1)[0] self.get_extension(code) + dispatch[EXT1[0]] = load_ext1 def load_ext2(self): - code, = unpack(' maxsize: raise ValueError("negative LONG_BINPUT argument") self.memo[i] = self.stack[-1] + dispatch[LONG_BINPUT[0]] = load_long_binput def load_memoize(self): memo = self.memo memo[len(memo)] = self.stack[-1] + dispatch[MEMOIZE[0]] = load_memoize def load_append(self): @@ -1675,6 +1712,7 @@ def load_append(self): value = stack.pop() list = stack[-1] list.append(value) + dispatch[APPEND[0]] = load_append def load_appends(self): @@ -1693,6 +1731,7 @@ def load_appends(self): append = list_obj.append for item in items: append(item) + dispatch[APPENDS[0]] = load_appends def load_setitem(self): @@ -1701,6 +1740,7 @@ def load_setitem(self): key = stack.pop() dict = stack[-1] dict[key] = value + dispatch[SETITEM[0]] = load_setitem def load_setitems(self): @@ -1708,6 +1748,7 @@ def load_setitems(self): dict = self.stack[-1] for i in range(0, len(items), 2): dict[items[i]] = items[i + 1] + dispatch[SETITEMS[0]] = load_setitems def load_additems(self): @@ -1719,6 +1760,7 @@ def load_additems(self): add = set_obj.add for item in items: add(item) + dispatch[ADDITEMS[0]] = load_additems def load_build(self): @@ -1743,69 +1785,68 @@ def load_build(self): if slotstate: for k, v in slotstate.items(): setattr(inst, k, v) + dispatch[BUILD[0]] = load_build def load_mark(self): self.metastack.append(self.stack) self.stack = [] self.append = self.stack.append + dispatch[MARK[0]] = load_mark def load_stop(self): value = self.stack.pop() raise _Stop(value) + dispatch[STOP[0]] = load_stop # Shorthands -def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None): # pragma: no cover - _Pickler(file, protocol, fix_imports=fix_imports, - buffer_callback=buffer_callback).dump(obj) -def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None): # pragma: no cover +def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None): # pragma: no cover + _Pickler(file, protocol, fix_imports=fix_imports, buffer_callback=buffer_callback).dump(obj) + + +def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None): # pragma: no cover f = io.BytesIO() - _Pickler(f, protocol, fix_imports=fix_imports, - buffer_callback=buffer_callback).dump(obj) + _Pickler(f, protocol, fix_imports=fix_imports, buffer_callback=buffer_callback).dump(obj) res = f.getvalue() assert isinstance(res, bytes_types) return res -def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict", - buffers=None): # pragma: no cover - return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, - encoding=encoding, errors=errors).load() -def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict", - buffers=None): # pragma: no cover +def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None): # pragma: no cover + return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, encoding=encoding, errors=errors).load() + + +def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None): # pragma: no cover if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) - return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, - encoding=encoding, errors=errors).load() + return _Unpickler(file, fix_imports=fix_imports, buffers=buffers, encoding=encoding, errors=errors).load() + # Use the faster _pickle if possible Pickler, Unpickler = _Pickler, _Unpickler dump, dumps, load, loads = _dump, _dumps, _load, _loads + # Doctest -def _test(): # pragma: no cover +def _test(): # pragma: no cover import doctest + return doctest.testmod() + if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser( - description='display contents of the pickle files') - parser.add_argument( - 'pickle_file', type=argparse.FileType('br'), - nargs='*', help='the pickle file') - parser.add_argument( - '-t', '--test', action='store_true', - help='run self-test suite') - parser.add_argument( - '-v', action='store_true', - help='run verbosely; only affects self-test run') + + parser = argparse.ArgumentParser(description="display contents of the pickle files") + parser.add_argument("pickle_file", type=argparse.FileType("br"), nargs="*", help="the pickle file") + parser.add_argument("-t", "--test", action="store_true", help="run self-test suite") + parser.add_argument("-v", action="store_true", help="run verbosely; only affects self-test run") args = parser.parse_args() if args.test: _test() @@ -1814,6 +1855,7 @@ def _test(): # pragma: no cover parser.print_help() else: import pprint + for f in args.pickle_file: obj = load(f) pprint.pprint(obj) diff --git a/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py b/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py index 837d9fbef3c..1746ad82140 100644 --- a/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py +++ b/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py @@ -17,23 +17,30 @@ """Layer wise quantization.""" import os import shutil -from copy import deepcopy -from tqdm import tqdm from collections import UserDict +from copy import deepcopy -from .utils import torch, logger -from ..model_wrapper import QDQLayer -from torch.quantization import prepare, convert from accelerate.utils import set_module_tensor_to_device -from .utils import (_get_path, get_named_children, update_module, - load_tensor, register_weight_hooks, - load_tensor_from_shard, clean_module_weight) +from torch.quantization import convert, prepare +from tqdm import tqdm from neural_compressor.config import default_workspace +from ..model_wrapper import QDQLayer from ..smooth_quant import TorchSmoothQuant +from .utils import ( + _get_path, + clean_module_weight, + get_named_children, + load_tensor, + load_tensor_from_shard, + logger, + register_weight_hooks, + torch, + update_module, +) -TMP_DIR = os.path.join(default_workspace, 'layer_wise_quant_tmp_dir') +TMP_DIR = os.path.join(default_workspace, "layer_wise_quant_tmp_dir") def mk_tmp_dir(): @@ -44,24 +51,22 @@ def del_tmp_dir(): shutil.rmtree(TMP_DIR) -def forward_wrapper(model, input, device='cpu'): +def forward_wrapper(model, input, device="cpu"): if isinstance(input, dict) or isinstance(input, UserDict): - if device == 'cpu': + if device == "cpu": output = model(**input) else: # pragma: no cover for inp in input.keys(): - input[inp] = input[inp].to(device) \ - if isinstance(input[inp], torch.Tensor) else input[inp] + input[inp] = input[inp].to(device) if isinstance(input[inp], torch.Tensor) else input[inp] output = model(**input) elif isinstance(input, list) or isinstance(input, tuple): # pragma: no cover - if device == 'cpu': + if device == "cpu": output = model(*input) else: # pragma: no cover - input = [inp.to(device) if isinstance(inp, torch.Tensor) - else inp for inp in input] # pylint: disable=E1133 + input = [inp.to(device) if isinstance(inp, torch.Tensor) else inp for inp in input] # pylint: disable=E1133 output = model(*input) else: # pragma: no cover - if device == 'cpu' or not isinstance(input, torch.Tensor): + if device == "cpu" or not isinstance(input, torch.Tensor): output = model(input) else: # pragma: no cover input = input.to(device) # pylint: disable=no-member @@ -71,10 +76,21 @@ def forward_wrapper(model, input, device='cpu'): class LayerWiseQuant: """Layer wise quantization. + Layer-by-layer quantize the model, in order to save memomery. """ - def __init__(self, q_model, pretrained_model_name_or_path, op_cfgs, calib_data, - smooth_quant=False, output_dir=None, device='cpu', alpha=0.5): + + def __init__( + self, + q_model, + pretrained_model_name_or_path, + op_cfgs, + calib_data, + smooth_quant=False, + output_dir=None, + device="cpu", + alpha=0.5, + ): """Init LayerWiseQuant.""" # self.q_model = load_shell(pretrained_model_name_or_path, cls) self.q_model = q_model @@ -92,8 +108,7 @@ def __init__(self, q_model, pretrained_model_name_or_path, op_cfgs, calib_data, self.smooth_quant = smooth_quant self.alpha = float(alpha) - assert self.alpha > 0 and self.alpha < 1, \ - f'alpha should be in range (0, 1), but got {alpha}' + assert self.alpha > 0 and self.alpha < 1, f"alpha should be in range (0, 1), but got {alpha}" if smooth_quant: self.init_sq() @@ -101,12 +116,13 @@ def init_sq(self): handles = register_weight_hooks(self.fp32_model, self.path) # traced_model = torch.jit.trace(self.fp32_model, torch.randint(0, 100, (1, 3))) - op_types = ['Linear'] + op_types = ["Linear"] sq = TorchSmoothQuant(self.fp32_model, self.calib_data) - sq._check_need_calibration(self.alpha, percentile=99.999, op_types=['Linear', 'Conv2d'], - scales_per_op=False, calib_iter=100) + sq._check_need_calibration( + self.alpha, percentile=99.999, op_types=["Linear", "Conv2d"], scales_per_op=False, calib_iter=100 + ) absorb_to_layer = sq._get_all_layer_names() - assert absorb_to_layer is not None, "if you are using huggingface model," + assert absorb_to_layer is not None, "if you are using huggingface model," "you could set torchscript to True when loading the model or set the return_dict to False" self.absorb_layers = [] self.scale_weight_layers = [] @@ -122,7 +138,7 @@ def init_sq(self): self.absorb_layers.append(k) self.scale_weight_layers += v else: - logger.warning('Cannot trace the model, smooth quant may be not used.') + logger.warning("Cannot trace the model, smooth quant may be not used.") self._remove_hooks(handles) def quantize(self, clean_weight=True): @@ -138,9 +154,9 @@ def quantize(self, clean_weight=True): def _layer_wise_quantize(self, calib_data): for idx, (name, module) in enumerate(self.modules): - qconfig = self.op_cfgs.module_name_qconfigs.get(name + '.module') + qconfig = self.op_cfgs.module_name_qconfigs.get(name + ".module") # if qconfig: - if module.__class__.__name__ in ['Linear']: + if module.__class__.__name__ in ["Linear"]: module = QDQLayer(module) self.modules[idx] = (name, module) update_module(self.q_model, name, module) @@ -155,11 +171,11 @@ def _layer_wise_quantize(self, calib_data): self.q_model(calib_data) else: try: - pbar = tqdm(enumerate(calib_data), total=len(calib_data), desc='layer_wise quant') + pbar = tqdm(enumerate(calib_data), total=len(calib_data), desc="layer_wise quant") for idx, input in pbar: forward_wrapper(self.q_model, input, self.device) except Exception: # pragma: no cover - pbar = tqdm(enumerate(calib_data), total=len(calib_data), desc='layer_wise quant') + pbar = tqdm(enumerate(calib_data), total=len(calib_data), desc="layer_wise quant") for idx, (input, label) in pbar: forward_wrapper(self.q_model, input, self.device) self._remove_hooks() @@ -170,11 +186,11 @@ def _save(self, path=None, clean_weight=True): # pragma: no cover for name, module in self.modules: self._load_state_dict(name, TMP_DIR) new_module = convert(module, inplace=False) - torch.save(new_module, os.path.join(path, f'{name}.pt')) + torch.save(new_module, os.path.join(path, f"{name}.pt")) del new_module if clean_weight: clean_module_weight(module) - torch.save(self.fp32_model, os.path.join(path, 'model_arch.pt')) + torch.save(self.fp32_model, os.path.join(path, "model_arch.pt")) def _convert(self, clean_weight=False): for name, module in self.modules: @@ -186,50 +202,51 @@ def _convert(self, clean_weight=False): def _regist_hooks(self): def forward_pre_hook(name): def load_value(param_name): - if 'lm_head' in param_name and getattr(self.q_model.config, "tie_word_embeddings", True): + if "lm_head" in param_name and getattr(self.q_model.config, "tie_word_embeddings", True): input_embeddings = self.q_model.get_input_embeddings() for name, module in self.modules: if module == input_embeddings: - param_name = name + '.' + param_name.split('.')[-1] + param_name = name + "." + param_name.split(".")[-1] prefix = self.q_model.base_model_prefix - if 'pytorch_model.bin.index.json' in os.listdir(self.path): + if "pytorch_model.bin.index.json" in os.listdir(self.path): value = load_tensor_from_shard(self.path, param_name, prefix) else: - value = load_tensor(os.path.join(self.path, 'pytorch_model.bin'), param_name, prefix) + value = load_tensor(os.path.join(self.path, "pytorch_model.bin"), param_name, prefix) return value def hook(module, input): - file_path = os.path.join(TMP_DIR, f'{name}.pt') + file_path = os.path.join(TMP_DIR, f"{name}.pt") if os.path.exists(file_path): self._load_state_dict(name, TMP_DIR) else: if isinstance(module, QDQLayer): for n, _ in module.module.named_parameters(): - value = load_value(name + '.' + n) - set_module_tensor_to_device(self.q_model, name + '.module.' + n, self.device, value) + value = load_value(name + "." + n) + set_module_tensor_to_device(self.q_model, name + ".module." + n, self.device, value) if self.smooth_quant: self._adjust_parameters(module, name, input[0]) prepare(module, inplace=True) else: for n, p in module.named_parameters(): - param_name = name + '.' + n + param_name = name + "." + n value = load_value(param_name) # from hf transformers.modeling_utils._load_state_dict_into_meta_model set_module_tensor_to_device(self.q_model, param_name, self.device, value) + return hook - + def forward_hook(name): def hook(module, input, output): - file_path = os.path.join(TMP_DIR, f'{name}.pt') + file_path = os.path.join(TMP_DIR, f"{name}.pt") if os.path.exists(TMP_DIR): torch.save(module.state_dict(), file_path) clean_module_weight(module) + return hook for name, module in self.modules: self._handle[name] = [module.register_forward_pre_hook(forward_pre_hook(name))] self._handle[name] += [module.register_forward_hook(forward_hook(name))] - def _remove_hooks(self, handles=None): if handles is None: @@ -255,9 +272,9 @@ def _adjust_parameters(self, module, name, input): self.quantized_layers[name] = 1.0 / scale if name in self.scale_weight_layers: module.module.weight = torch.nn.Parameter(weights * scale) - - def _load_state_dict(self, module_name, weight_path): - file_path = os.path.join(weight_path, f'{module_name}.pt') + + def _load_state_dict(self, module_name, weight_path): + file_path = os.path.join(weight_path, f"{module_name}.pt") state_dict = torch.load(file_path) for n, p in state_dict.items(): - set_module_tensor_to_device(self.q_model, f'{module_name}.{n}', self.device, p) + set_module_tensor_to_device(self.q_model, f"{module_name}.{n}", self.device, p) diff --git a/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py b/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py index a1f9c0b3548..293e1aee040 100644 --- a/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py +++ b/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py @@ -19,52 +19,50 @@ import io import os import warnings -from typing import Any, BinaryIO, Callable, Dict, Optional, Union, IO - -from .utils import torch -from neural_compressor.adaptor.torch_utils.layer_wise_quant import modified_pickle as pickle +from typing import IO, Any, BinaryIO, Callable, Dict, Optional, Union from torch.serialization import ( - _get_restore_location, StorageType, + _get_restore_location, + _is_torchscript_zip, + _is_zipfile, _maybe_decode_ascii, _open_file_like, _open_zipfile_reader, - _is_torchscript_zip, - _is_zipfile ) +from neural_compressor.adaptor.torch_utils.layer_wise_quant import modified_pickle as pickle + +from .utils import torch + FILE_LIKE = Union[str, os.PathLike, BinaryIO, IO[bytes]] -MAP_LOCATION = Optional[Union[Callable[[torch.Tensor, str], torch.Tensor], - torch.device, str, Dict[str, str]]] +MAP_LOCATION = Optional[Union[Callable[[torch.Tensor, str], torch.Tensor], torch.device, str, Dict[str, str]]] -def _load(zip_file, tensor_name, prefix, map_location, pickle_module, - pickle_file='data.pkl', **pickle_load_args): +def _load(zip_file, tensor_name, prefix, map_location, pickle_module, pickle_file="data.pkl", **pickle_load_args): restore_location = _get_restore_location(map_location) loaded_storages = {} from packaging.version import Version - torch_version = torch.__version__.split('+')[0] + + torch_version = torch.__version__.split("+")[0] version = Version(torch_version) def load_tensor(dtype, numel, key, location): - name = f'data/{key}' + name = f"data/{key}" - if version.release < Version("2.0.0").release: # pragma: no cover + if version.release < Version("2.0.0").release: # pragma: no cover storage = zip_file.get_storage_from_record(name, numel, torch.UntypedStorage).storage().untyped() - typed_storage = torch.storage.TypedStorage( - wrap_storage=restore_location(storage, location), - dtype=dtype) + typed_storage = torch.storage.TypedStorage(wrap_storage=restore_location(storage, location), dtype=dtype) loaded_storages[key] = typed_storage else: - storage = zip_file.get_storage_from_record(name, numel, torch.UntypedStorage)\ - ._typed_storage()._untyped_storage + storage = ( + zip_file.get_storage_from_record(name, numel, torch.UntypedStorage)._typed_storage()._untyped_storage + ) typed_storage = torch.storage.TypedStorage( - wrap_storage=restore_location(storage, location), - dtype=dtype, - _internal=True) + wrap_storage=restore_location(storage, location), dtype=dtype, _internal=True + ) if typed_storage._data_ptr() != 0: loaded_storages[key] = typed_storage @@ -93,16 +91,14 @@ def load_tensor(dtype, numel, key, location): # return typed_storage - load_module_mapping: Dict[str, str] = { - 'torch.tensor': 'torch._tensor' - } + load_module_mapping: Dict[str, str] = {"torch.tensor": "torch._tensor"} # Need to subclass Unpickler instead of directly monkey-patching the find_class method # because it's marked readonly in pickle. # The type: ignore is because mypy can't statically determine the type of this class. class UnpicklerWrapper(pickle_module.Unpickler): # type: ignore[name-defined] def find_class(self, mod_name, name): - if type(name) is str and 'Storage' in name: + if type(name) is str and "Storage" in name: try: return StorageType(name) except KeyError: # pragma: no cover @@ -115,10 +111,11 @@ def persistent_load(self, saved_id): typename = _maybe_decode_ascii(saved_id[0]) data = saved_id[1:] - assert typename == 'storage', \ - f"Unknown typename for persistent_load, expected 'storage' but got '{typename}'" + assert ( + typename == "storage" + ), f"Unknown typename for persistent_load, expected 'storage' but got '{typename}'" storage_type, key, location, numel = data - if storage_type is torch.UntypedStorage: # pragma: no cover + if storage_type is torch.UntypedStorage: # pragma: no cover dtype = torch.uint8 else: dtype = storage_type.dtype @@ -128,9 +125,9 @@ def persistent_load(self, saved_id): else: name_list = [self.tensor_name] if prefix: - no_prefix_name = self.tensor_name.split('.') + no_prefix_name = self.tensor_name.split(".") no_prefix_name.remove(prefix) - no_prefix_name = '.'.join(no_prefix_name) + no_prefix_name = ".".join(no_prefix_name) name_list.append(no_prefix_name) if self.tensor_name and self.metastack[-1][-2] not in name_list: # typed_storage = None @@ -143,6 +140,7 @@ def persistent_load(self, saved_id): typed_storage = load_tensor(dtype, nbytes, key, _maybe_decode_ascii(location)) return typed_storage + # Load the data (which may in turn use `persistent_load` to load tensors) data_file = io.BytesIO(zip_file.get_record(pickle_file)) @@ -162,14 +160,14 @@ def load( pickle_module: Any = None, *, weights_only: bool = False, - **pickle_load_args: Any + **pickle_load_args: Any, ) -> Any: # Reference: https://github.com/pytorch/pytorch/issues/54354 # The first line of this docstring overrides the one Sphinx generates for the # documentation. We need it so that Sphinx doesn't leak `pickle`s path from # the build environment (e.g. ` None: self.module = module self.dequant = torch.ao.quantization.DeQuantStub() self.input_scale = input_scale - + def forward(self, X): if self.input_scale is not None: X = torch.mul(X, self.input_scale) @@ -86,10 +90,12 @@ def forward(self, X): def _wrap_lwq_layer(model, lwq_layers, op_cfgs): + from torch.quantization import convert, prepare + from .layer_wise_quant.utils import get_module, update_module - from torch.quantization import prepare, convert + for name, input_scale in lwq_layers.items(): - qconifg = op_cfgs.module_name_qconfigs.get(name + '.module') + qconifg = op_cfgs.module_name_qconfigs.get(name + ".module") module = get_module(model, name) new_model = QDQLayer(module, input_scale) new_model.qconfig = qconifg @@ -102,15 +108,15 @@ def _wrap_lwq_layer(model, lwq_layers, op_cfgs): class SQLinearWrapper(torch.nn.Module): def __init__(self, module, input_scale, input_minmax, alpha=0.5, dtype=torch.quint8): super().__init__() - self.register_buffer('input_scale', input_scale) + self.register_buffer("input_scale", input_scale) self.alpha = alpha self.dtype = dtype # calculate and only save scale, zero_point to avoid memory usage self.scale, self.zero_point = self._calculate_qparams(input_scale, input_minmax, dtype) - self.add_module('sq_linear', module) + self.add_module("sq_linear", module) self._update_sq_linear() self.ipex = False # a flag used for ipex inference - + @property def weight(self): return self.sq_linear.weight @@ -141,6 +147,7 @@ def _calculate_qparams(self, input_scale, input_minmax, dtype=torch.quint8): def _get_weight_scale(self): # get weight scale and zero_point from torch.ao.quantization.observer import default_per_channel_weight_observer + obs = default_per_channel_weight_observer() obs(self.sq_linear.weight) scale, _ = obs.calculate_qparams() @@ -160,12 +167,13 @@ def _recover_sq_linear(self): def _wrapper_sq_linear(tmp_model, input_scale_dict): - """Help function to generate a fake SmoothQuant model for loading weights""" + """Help function to generate a fake SmoothQuant model for loading weights.""" + class SQLinearWrapper(torch.nn.Module): def __init__(self, module, input_scale): super().__init__() - self.register_buffer('input_scale', input_scale) - self.add_module('sq_linear', module) + self.register_buffer("input_scale", input_scale) + self.add_module("sq_linear", module) def forward(self, X): X = torch.mul(X, self.input_scale) @@ -174,6 +182,7 @@ def forward(self, X): module_name_list = input_scale_dict.keys() from .smooth_quant import get_module, set_module + for name in module_name_list: module = get_module(tmp_model, name) input_scale = input_scale_dict[name] @@ -183,8 +192,9 @@ def forward(self, X): def _wrapper_qdq_linear(tmp_model, module_name_list=[]): - """Help function to generate a fake QDQ model for loading weights""" + """Help function to generate a fake QDQ model for loading weights.""" from .smooth_quant import get_module, set_module + for name in module_name_list: module = get_module(tmp_model, name) new_module = QDQLinear(module) @@ -193,14 +203,26 @@ def _wrapper_qdq_linear(tmp_model, module_name_list=[]): class WeightOnlyLinear(torch.nn.Module): - def __init__(self, in_features, out_features, bits, groupsize, - dtype='int', zp=False, bias=False, scale_dtype=torch.float32, - compression_dtype=torch.int32, compression_dim=1, - gptq_perm=False, device='cpu'): + def __init__( + self, + in_features, + out_features, + bits, + groupsize, + dtype="int", + zp=False, + bias=False, + scale_dtype=torch.float32, + compression_dtype=torch.int32, + compression_dim=1, + gptq_perm=False, + device="cpu", + ): super().__init__() self.dtype = dtype - if 'int' not in self.dtype: # for nf4, fp4 + if "int" not in self.dtype: # for nf4, fp4 from neural_compressor.adaptor.torch_utils.weight_only import FLOAT_MAPPING, INT_MAPPING + float_list = FLOAT_MAPPING[self.dtype] int_list = INT_MAPPING[self.dtype] self.int2float_mapping = {} @@ -212,78 +234,76 @@ def __init__(self, in_features, out_features, bits, groupsize, self.bits = bits self.groupsize = groupsize if groupsize != -1 else in_features self.compression_dim = compression_dim - assert compression_dtype in [torch.int8, torch.int16, torch.int32, torch.int64], \ - "Only support torch.int8|16|32|64 as compressed dtype." + assert compression_dtype in [ + torch.int8, + torch.int16, + torch.int32, + torch.int64, + ], "Only support torch.int8|16|32|64 as compressed dtype." dtype_bits_mapping = {torch.int8: 8, torch.int16: 16, torch.int32: 32, torch.int64: 64} self.compress_bits = dtype_bits_mapping[compression_dtype] self.n_pack = self.compress_bits // self.bits self.compressed_dtype = compression_dtype self.float_type = scale_dtype # K is input channel, N is output channel - assert compression_dim in [0, 1], "Only support 0 or 1 as compression dimension, " +\ - "0 is output channel, 1 is input channel." + assert compression_dim in [0, 1], ( + "Only support 0 or 1 as compression dimension, " + "0 is output channel, 1 is input channel." + ) self.register_buffer( - 'scale', + "scale", torch.zeros( - (out_features, math.ceil(in_features / self.groupsize)), + (out_features, math.ceil(in_features / self.groupsize)), dtype=self.float_type, - ).to(device) + ).to(device), ) if compression_dim == 1: self.register_buffer( - 'packed_weight', + "packed_weight", torch.zeros( - (out_features, math.ceil(in_features / self.n_pack)), + (out_features, math.ceil(in_features / self.n_pack)), dtype=self.compressed_dtype, - ).to(device) + ).to(device), ) if zp: self.register_buffer( - 'packed_zp', + "packed_zp", torch.zeros( - (self.out_features, math.ceil(self.in_features / self.groupsize / self.n_pack)), + (self.out_features, math.ceil(self.in_features / self.groupsize / self.n_pack)), dtype=self.compressed_dtype, - ).to(device) + ).to(device), ) else: self.register_buffer( - 'packed_weight', + "packed_weight", torch.zeros( - (math.ceil(out_features / self.n_pack), in_features), + (math.ceil(out_features / self.n_pack), in_features), dtype=self.compressed_dtype, - ).to(device) + ).to(device), ) if zp: self.register_buffer( - 'packed_zp', + "packed_zp", torch.zeros( - ( - math.ceil(self.out_features / self.n_pack), - math.ceil(self.in_features / self.groupsize) - ), + (math.ceil(self.out_features / self.n_pack), math.ceil(self.in_features / self.groupsize)), dtype=self.compressed_dtype, - ).to(device) + ).to(device), ) if bias: - self.register_buffer( - 'bias', torch.zeros(self.out_features, dtype=self.float_type).to(device) - ) + self.register_buffer("bias", torch.zeros(self.out_features, dtype=self.float_type).to(device)) else: self.bias = None if gptq_perm: - self.register_buffer( - 'gptq_perm', torch.zeros(in_features, dtype=torch.int32).to(device) - ) + self.register_buffer("gptq_perm", torch.zeros(in_features, dtype=torch.int32).to(device)) else: self.gptq_perm = None def pack(self, int_weight, scale, zp, bias, gptq_perm=None): int_weight = int_weight.to(self.device) if bias is not None: - assert hasattr(self, 'bias'), "bias is not set when initializing." + assert hasattr(self, "bias"), "bias is not set when initializing." self.bias = bias.type(self.float_type).to(self.device) if gptq_perm is not None: - assert hasattr(self, 'gptq_perm'), "gptq_perm is not set when initializing." + assert hasattr(self, "gptq_perm"), "gptq_perm is not set when initializing." self.gptq_perm = gptq_perm.type(torch.int32).to(self.device) assert scale.shape == self.scale.shape, "Scale shape is mismatched." self.scale = scale.type(self.float_type).to(self.device) @@ -299,7 +319,7 @@ def pack(self, int_weight, scale, zp, bias, gptq_perm=None): for j in range(target_shape[1]): start = self.n_pack * j end = self.n_pack * (j + 1) - tmp = int_weight[:, start: end].type(self.compressed_dtype) + tmp = int_weight[:, start:end].type(self.compressed_dtype) for e in range(tmp.shape[1]): tmp[:, e] &= mask tmp[:, e] = tmp[:, e] << (self.bits * e) @@ -312,12 +332,12 @@ def pack(self, int_weight, scale, zp, bias, gptq_perm=None): if self.compression_dim == 0: zp = zp.T self.packed_zp = self.packed_zp.T - assert hasattr(self, 'packed_zp'), "zp is not set when initializing." + assert hasattr(self, "packed_zp"), "zp is not set when initializing." target_shape = self.packed_zp.shape for j in range(target_shape[1]): start = self.n_pack * j end = self.n_pack * (j + 1) - tmp = zp[:, start: end].type(self.compressed_dtype) + tmp = zp[:, start:end].type(self.compressed_dtype) for e in range(tmp.shape[1]): tmp[:, e] &= mask tmp[:, e] = tmp[:, e] << (self.bits * e) @@ -329,7 +349,7 @@ def recover(self): logger.debug(f"Recovering {self} weight") device = self.scale.device mask = torch.tensor(2**self.bits - 1, dtype=self.compressed_dtype).to(device) - if hasattr(self, 'packed_zp'): + if hasattr(self, "packed_zp"): weight_dtype = torch.uint8 else: weight_dtype = torch.int8 @@ -350,18 +370,18 @@ def recover(self): tmp = tmp << (self.compress_bits - self.bits * (e + 1)) tmp = tmp >> self.compress_bits - self.bits if weight_dtype == torch.uint8: - tmp &= mask # remove sign bit + tmp &= mask # remove sign bit weight[:, index] = tmp.type(weight_dtype) if self.compression_dim == 0: weight = weight.T - if 'int' not in self.dtype: + if "int" not in self.dtype: new_weight = torch.zeros(self.out_features, self.in_features).to(device) for k, v in self.int2float_mapping.items(): new_weight += torch.where(weight == k, v, 0) weight = new_weight # unpack zero_point - if hasattr(self, 'packed_zp'): - zp_dtype = self.compressed_dtype # to avoid overflow when weight-zp + if hasattr(self, "packed_zp"): + zp_dtype = self.compressed_dtype # to avoid overflow when weight-zp zp = torch.zeros(self.scale.shape, dtype=zp_dtype).to(device) packed_zp = self.packed_zp if self.compression_dim == 0: @@ -382,9 +402,9 @@ def recover(self): if self.compression_dim == 0: zp = zp.T # recover fp32 weight with int_weight, scale, and zero_point - left_element = self.in_features % self.groupsize + left_element = self.in_features % self.groupsize if left_element != 0: - split_index = self.in_features // self.groupsize * self.groupsize + split_index = self.in_features // self.groupsize * self.groupsize weight1 = weight[:, :-split_index].reshape(-1, self.groupsize) scale1 = self.scale[:, :-1].reshape(-1, 1) zp1 = zp[:, :-1].reshape(-1, 1) @@ -392,7 +412,7 @@ def recover(self): weight2 = weight[:, -split_index:] scale2 = self.scale[:, -1:] zp2 = zp[:, -1].reshape(-1, 1) - weight2 = ((weight2 - zp2) * scale2) + weight2 = (weight2 - zp2) * scale2 fp32_weight = torch.cat((weight1, weight2), dim=1) else: weight = weight.reshape(-1, self.groupsize) @@ -401,15 +421,15 @@ def recover(self): fp32_weight = ((weight - zp) * scale).reshape(self.out_features, -1) else: # recover fp32 weight with int_weight, scale - left_element = self.in_features % self.groupsize + left_element = self.in_features % self.groupsize if left_element != 0: - split_index = self.in_features // self.groupsize * self.groupsize + split_index = self.in_features // self.groupsize * self.groupsize weight1 = weight[:, :split_index].reshape(-1, self.groupsize) scale1 = self.scale[:, :-1].reshape(-1, 1) weight1 = (weight1 * scale1).reshape(self.out_features, -1) weight2 = weight[:, split_index:] scale2 = self.scale[:, -1:] - weight2 = (weight2 * scale2) + weight2 = weight2 * scale2 fp32_weight = torch.cat((weight1, weight2), dim=1) else: weight = weight.reshape(-1, self.groupsize) @@ -422,7 +442,7 @@ def recover(self): def forward(self, input): if level == DEBUG: - if not hasattr(self, 'weight'): + if not hasattr(self, "weight"): self.weight = self.recover() input = input.type(self.weight.dtype) logger.debug(f"Calculating {self}") @@ -433,20 +453,17 @@ def forward(self, input): return F.linear(input, weight, self.bias) def extra_repr(self) -> str: - return 'in_features={}, out_features={}, bits={}, group_size={}, bias={}'.format( + return "in_features={}, out_features={}, bits={}, group_size={}, bias={}".format( self.in_features, self.out_features, self.bits, self.groupsize, self.bias is not None ) class FakeAffineTensorQuantFunction(Function): - """Fake version of affine quantization - """ + """Fake version of affine quantization.""" @staticmethod def forward(ctx, inputs, num_bits=4, group_size=1024, scheme="asym"): - """ - - As it will be only applied on activation with per tensor granularity, broadcast is not needed. + """As it will be only applied on activation with per tensor granularity, broadcast is not needed. Args: ctx: Pytorch convention. @@ -474,18 +491,14 @@ def backward(ctx, grad_outputs): class TEQLinearFakeQuant(torch.nn.Module): - """ - wrapper quantization linear - """ + """Wrapper quantization linear.""" def __init__(self, orig_layer, alpha=None, num_bits=4, group_size=-1, scheme="asym"): - """ - A forward hook to linear module + """A forward hook to linear module :param orig_layer: the original module :param alpha: trainable alpha/scale :param num_bits: quantization level - :param group_size: for fine-grained quantization - """ + :param group_size: for fine-grained quantization.""" super(TEQLinearFakeQuant, self).__init__() self.orig_layer = orig_layer self.alpha = alpha @@ -501,8 +514,7 @@ def forward(self, x): x = x / alpha.view(shape) weight = self.orig_layer.weight weight = weight * alpha.unsqueeze(dim=0) - weight_q = FakeAffineTensorQuantFunction().apply(weight, self.num_bits, - self.group_size, self.scheme) + weight_q = FakeAffineTensorQuantFunction().apply(weight, self.num_bits, self.group_size, self.scheme) return F.linear(x, weight_q, self.orig_layer.bias) @@ -510,16 +522,14 @@ class MulLinear(torch.nn.Module): """Linear wrapper to apply scale to input.""" def __init__(self, module, input_scale=None): - """ - A forward hook to save input max of a module + """A forward hook to save input max of a module :param module: the linear module - :param input_scale: scale for input - """ + :param input_scale: scale for input.""" super().__init__() if input_scale is None: input_scale = torch.empty(module.in_features) - self.register_buffer('input_scale', input_scale) - self.add_module('linear', module) + self.register_buffer("input_scale", input_scale) + self.add_module("linear", module) @property def weight(self): @@ -545,4 +555,3 @@ def _recover_linear(self): scale = self.input_scale.view(1, self.input_scale.shape[0]) with torch.no_grad(): self.linear.weight *= scale - diff --git a/neural_compressor/adaptor/torch_utils/pattern_detector.py b/neural_compressor/adaptor/torch_utils/pattern_detector.py index f04c8f4a7e9..211f1f0c151 100644 --- a/neural_compressor/adaptor/torch_utils/pattern_detector.py +++ b/neural_compressor/adaptor/torch_utils/pattern_detector.py @@ -17,6 +17,7 @@ """Block detector for Transformer-based model.""" from ...utils.utility import LazyImport + torch = LazyImport("torch") from typing import Dict, List, Union @@ -25,13 +26,13 @@ BLOCK_PATTERNS = [ # [['OP_TYPE1', NUM_OPS], ['OP_TYPE2', NUM_OPS], ...] - [['Linear', 4], ['Linear', 4]], # TODO add model name - [['Linear', 2], ['Linear', 2]], # TODO add model name - [['Conv1D', 2], ['Conv1D', 2]], # GPT-2 - [['Linear', 4], ['Linear', 3]], # Llama - [['Linear', 4], ['Linear', 2]], # T5-Encoder, OPT - [['Linear', 4], ['Linear', 1], ['Linear', 1]], # Bert - [['Linear', 4], ['Linear', 4], ['Linear', 2]], # T5-Decoder + [["Linear", 4], ["Linear", 4]], # TODO add model name + [["Linear", 2], ["Linear", 2]], # TODO add model name + [["Conv1D", 2], ["Conv1D", 2]], # GPT-2 + [["Linear", 4], ["Linear", 3]], # Llama + [["Linear", 4], ["Linear", 2]], # T5-Encoder, OPT + [["Linear", 4], ["Linear", 1], ["Linear", 1]], # Bert + [["Linear", 4], ["Linear", 4], ["Linear", 2]], # T5-Decoder ] @@ -50,8 +51,7 @@ def __init__(self, model: torch.nn.Module, pattern_lst: List[List[Union[str, int self.pos_info = None def detect_block(self) -> Dict[str, List[List[str]]]: - """ - Traverse the model definition and return the attention blocks and ffn blocks. + """Traverse the model definition and return the attention blocks and ffn blocks. Returns: @@ -76,7 +76,7 @@ def detect_block(self) -> Dict[str, List[List[str]]]: return blocks @staticmethod - def traverse_model(model, prefix="", depth=1, result=None, key = 0): + def traverse_model(model, prefix="", depth=1, result=None, key=0): """Traverse the pytorch model according to its hierarchical structure. Args: @@ -86,22 +86,23 @@ def traverse_model(model, prefix="", depth=1, result=None, key = 0): result: depth and its included ops. Defaults to {0: {}}. key: current root key. Defaults to 0. """ - module_lst =list(model.named_children()) + module_lst = list(model.named_children()) if len(module_lst) == 0: # layer name: 'encoder.layer.7.attention.self.query' # model repr: Linear(in_features=768, out_features=768, bias=True) # class name: 'Linear' result[key] = (prefix, model, model.__class__.__name__) for i, (name, sub_module) in enumerate(module_lst, 1): - indent = " "*depth - new_name = prefix + '.' + name if prefix != "" else name + indent = " " * depth + new_name = prefix + "." + name if prefix != "" else name model_type = sub_module.__class__.__name__ - logger.debug( f"Depth: [{depth}]" + indent + f"[{model_type}]{ new_name}") + logger.debug(f"Depth: [{depth}]" + indent + f"[{model_type}]{ new_name}") sub_key = (depth, i, model_type) if sub_key not in result[key]: result[key][sub_key] = dict() - TransformerBasedModelBlockPatternDetector.traverse_model(sub_module, prefix=new_name, \ - depth=depth+1, result=result[key], key = sub_key) + TransformerBasedModelBlockPatternDetector.traverse_model( + sub_module, prefix=new_name, depth=depth + 1, result=result[key], key=sub_key + ) @staticmethod def _search_pattern(pos_info: Dict, pattern: List[List[Union[str, int]]]) -> List[List[str]]: @@ -150,6 +151,7 @@ def _search_pattern(pos_info: Dict, pattern: List[List[Union[str, int]]]) -> Lis def _group_block(detect_result): """Collect attention and ffn blocks from detect result.""" import itertools + ffn_block_lst = [] attention_block_lst = [] for block_lst, pattern in detect_result: diff --git a/neural_compressor/adaptor/torch_utils/smooth_quant.py b/neural_compressor/adaptor/torch_utils/smooth_quant.py index 08d3be8d0cc..0a722302d94 100644 --- a/neural_compressor/adaptor/torch_utils/smooth_quant.py +++ b/neural_compressor/adaptor/torch_utils/smooth_quant.py @@ -18,22 +18,25 @@ import copy import json + try: from neural_compressor.utils.utility import LazyImport - torch = LazyImport('torch') + + torch = LazyImport("torch") from ...utils import logger except: - import torch import logging + + import torch + logger = logging.getLogger() from collections import UserDict, defaultdict -def move_input_to_device(input, device=torch.device('cpu')): +def move_input_to_device(input, device=torch.device("cpu")): if isinstance(input, dict) or isinstance(input, UserDict): for inp in input.keys(): - input[inp] = input[inp].to(device) \ - if isinstance(input[inp], torch.Tensor) else input[inp] + input[inp] = input[inp].to(device) if isinstance(input[inp], torch.Tensor) else input[inp] elif isinstance(input, list) or isinstance(input, tuple): input_res, prev_size = [], None @@ -46,8 +49,7 @@ def move_input_to_device(input, device=torch.device('cpu')): if torch.tensor(inp).size == prev_size: input_res.append(inp) else: - input_res.append(inp.to(device) \ - if isinstance(inp, torch.Tensor) else inp) + input_res.append(inp.to(device) if isinstance(inp, torch.Tensor) else inp) prev_size = torch.tensor(inp).size() input = input_res else: @@ -56,7 +58,7 @@ def move_input_to_device(input, device=torch.device('cpu')): ##TODO potential bug, data type -def forward_wrapper(model, input, device=torch.device('cpu')): +def forward_wrapper(model, input, device=torch.device("cpu")): try: input = move_input_to_device(input, device) except: @@ -97,24 +99,24 @@ def model_forward_per_sample(model, sample, device): return output -def quant_dequant_w(m, num_bits=8, scheme='sym'): +def quant_dequant_w(m, num_bits=8, scheme="sym"): eps = torch.finfo(torch.float32).eps if isinstance(m, torch.nn.Linear): x = m.weight tmp = torch.zeros(torch.max(x, dim=1).values.size()) - if scheme == 'sym': - q_min, q_max = -2. ** (num_bits - 1), 2. ** (num_bits - 1) - 1. + if scheme == "sym": + q_min, q_max = -(2.0 ** (num_bits - 1)), 2.0 ** (num_bits - 1) - 1.0 x_max = torch.max(torch.abs(x), dim=1).values scale = x_max / (float(q_max - q_min) / 2) else: - q_min, q_max = 0, 2. ** num_bits - 1. + q_min, q_max = 0, 2.0**num_bits - 1.0 x_max = torch.maximum(torch.max(x, dim=1).values, tmp) x_min = torch.minimum(torch.min(x, dim=1).values, tmp) - scale = (x_max - x_min) / (2 ** num_bits - 1) + scale = (x_max - x_min) / (2**num_bits - 1) scale = torch.clip(scale, min=eps) - if scheme == 'sym': + if scheme == "sym": bias = 0 else: bias = torch.round(0 - (torch.min(x, dim=1).values) / scale) @@ -128,17 +130,17 @@ def quant_dequant_w(m, num_bits=8, scheme='sym'): x = torch.permute(x, (0, 2, 3, 1)) x = x.reshape(-1, x.shape[-1]) tmp = torch.zeros(torch.max(x, dim=0).values.size()) - if scheme == 'sym': - q_min, q_max = -2. ** (num_bits - 1), 2. ** (num_bits - 1) - 1. + if scheme == "sym": + q_min, q_max = -(2.0 ** (num_bits - 1)), 2.0 ** (num_bits - 1) - 1.0 x_max = torch.max(torch.abs(x), dim=0).values scale = x_max / (2 ** (num_bits - 1) - 1) else: - q_min, q_max = 0, 2. ** num_bits - 1. + q_min, q_max = 0, 2.0**num_bits - 1.0 x_max = torch.maximum(torch.max(x, dim=0).values, tmp) x_min = torch.minimum(torch.min(x, dim=0).values, tmp) - scale = (x_max - x_min) / (2 ** num_bits - 1) + scale = (x_max - x_min) / (2**num_bits - 1) scale = torch.clip(scale, min=eps) - if scheme == 'sym': + if scheme == "sym": bias = 0 else: bias = torch.round(0 - (torch.min(x, dim=0).values) / scale) @@ -157,14 +159,14 @@ def quant_dequant_w(m, num_bits=8, scheme='sym'): def quant_dequant_x(x, min_x=None, max_x=None, num_bits=8): eps = torch.finfo(torch.float32).eps - q_min, q_max = 0, 2. ** num_bits - 1. - if max_x == None or min_x == None: + q_min, q_max = 0, 2.0**num_bits - 1.0 + if max_x is None or min_x is None: max_x = torch.max(x) min_x = torch.min(x) else: max_x = torch.max(max_x) min_x = torch.min(min_x) - scale = (max_x - min_x) / (2 ** num_bits - 1) + scale = (max_x - min_x) / (2**num_bits - 1) scale = torch.clip(scale, min=eps) bias = torch.round((0 - min_x) / scale) q_x = torch.round(x / scale + bias) @@ -173,13 +175,13 @@ def quant_dequant_x(x, min_x=None, max_x=None, num_bits=8): def get_module(model, key): - """Get module from model by key name + """Get module from model by key name. Args: model (torch.nn.Module): original model key (str): module name to be replaced """ - attrs = key.split('.') + attrs = key.split(".") module = model for attr in attrs: try: @@ -191,14 +193,14 @@ def get_module(model, key): def set_module(model, key, new_module): - """Set new module into model by key name + """Set new module into model by key name. Args: model (torch.nn.Module): original model key (str): module name to be replaced new_module (torch.nn.Module): new module to be inserted """ - attrs = key.split('.') + attrs = key.split(".") module = model for attr in attrs[:-1]: try: @@ -223,7 +225,6 @@ def cal_scale(input_max, weights, alpha, scale_type="orig"): return scale - class WrapperLayer(torch.nn.Module): def __init__(self, layer, input_min, input_max, save_q_input=False): super(WrapperLayer, self).__init__() @@ -250,11 +251,11 @@ def update_scale(self, input_scale, weight_scale): ##TODO better tradeoff performance and memory, currently it's too slow def q_dq_forward(self, x, input_scale, weight_scale): layer_copy = copy.deepcopy(self.orig_layer) - if weight_scale != None: + if weight_scale is not None: layer_copy.weight *= weight_scale q_dq_weight = quant_dequant_w(layer_copy) layer_copy.weight.data.copy_(q_dq_weight) - if input_scale == None: + if input_scale is None: x = quant_dequant_x(x, self.input_min, self.input_max) else: x = input_scale * x @@ -276,12 +277,12 @@ def forward(self, x): class TorchSmoothQuant: - """ - Fake input channel quantization, for more details please refer to + """Fake input channel quantization, for more details please refer to [1] SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models [2] SPIQ: Data-Free Per-Channel Static Input Quantization Currently, we only handle the layers whose smooth scale could be absorbed, we will support other layers later. + We only support inplace mode which means the model weights will be changed, you can call recover function to recover the weights if needed """ @@ -309,32 +310,28 @@ def __init__(self, model, dataloader, example_inputs=None, q_func=None, traced_m self.input_mins = {} self.input_maxes_abs = {} self.traced_model = traced_model - if self.traced_model == None: + if self.traced_model is None: self.traced_model = self.model self.weight_scale_info = {} self.absorb_scales_info = {} self.insert_mul = False self.allow_absorb = True self.record_max_info = False - self.max_value_info = {} # to record max values for alpha tune + self.max_value_info = {} # to record max values for alpha tune self.self_absorb_layers = {} self.absorb_to_layer = {} self.adjust_alpha_space = False def _get_device(self): - """ - Get the model device - :return:Model device - """ + """Get the model device + :return:Model device.""" for _, p in self.model.named_parameters(): return p.data.device, p.data.dtype def _save_input_pc_hook(self, name, percentile=100): - """ - A forward hook to save input max of a module + """A forward hook to save input max of a module :param name: the module name - :return: A hook function - """ + :return: A hook function.""" def save_input_hook(module, inputs, outputs): if name not in self.input_maxes.keys(): @@ -399,12 +396,9 @@ def _add_min_max_observer(self, modules, percentile=100): hook_handle = modules[key].register_forward_hook(hook_func) self.hook_handles.append(hook_handle) - def _remove_observer(self): - """ - remove the observer from the model - :return: - """ + """Remove the observer from the model + :return:""" for hook_handle in self.hook_handles: hook_handle.remove() @@ -429,12 +423,10 @@ def _calibrate(self, absorb_to_layer, calib_iter, percentile, save_input_output= return self.input_maxes_abs def _dump_min_max(self, calib_iter=100): - """ - Dump min max per channel information, the min max value will be saved in input_maxes attribute + """Dump min max per channel information, the min max value will be saved in input_maxes attribute :param calibration_method: only support min_max currently :param calib_iter: Sample size for calibration - :return: - """ + :return:""" if self.q_func: self.q_func(self.model) else: @@ -450,13 +442,10 @@ def _dump_min_max(self, calib_iter=100): self.input_mins[key] = torch.min(min_val, dim=0)[0] self.input_maxes_abs[key] = torch.max(torch.stack(self.input_maxes_abs[key], dim=0), dim=0)[0] - def _reshape_in_channel_to_last(self, layer_name): - """ - Move the input channel to the last dim + """Move the input channel to the last dim :param layer_name: Layer name - :return: The reshaped weight - """ + :return: The reshaped weight.""" layer = get_module(self.model, layer_name) if layer.__class__.__name__ == "WrapperLayer": layer = layer.orig_layer @@ -468,12 +457,10 @@ def _reshape_in_channel_to_last(self, layer_name): return weight def _reshape_scale_for_weight(self, layer, scale): - """ - reshape the scale for weight input channel, depthwise output channel + """Reshape the scale for weight input channel, depthwise output channel :param layer: torch module :param scale: orig scale - :return: reshaped scale - """ + :return: reshaped scale.""" if hasattr(layer, "orig_layer"): layer = layer.orig_layer if isinstance(layer, torch.nn.Conv2d) and layer.groups > 1: ##only depthwise conv could hit here @@ -488,9 +475,9 @@ def _reshape_scale_for_weight(self, layer, scale): return scale def _reshape_scale_for_input(self, layer, scale): - """ - reshape the scale for input feature in channel + """Reshape the scale for input feature in channel :param layer: + :param scale: :return: """ @@ -505,17 +492,16 @@ def _reshape_scale_for_input(self, layer, scale): return scale def _scale_layer_weight(self, layer_name, scale, alpha=0.5, input_minmax=None): ##input channel - """ - Scale the layer weights at input channel, depthwise conv output channel + """Scale the layer weights at input channel, depthwise conv output channel :param layer_name: The layer name :param scale: The scale to be multiplied :param alpha: alpha for SQLinearWrapper :param input_minmax: input_minmax for SQLinearWrapper - :return: - """ + :return:""" layer = get_module(self.model, layer_name) if self.insert_mul: from .model_wrapper import SQLinearWrapper + layer = get_module(self.model, layer_name) if isinstance(layer, SQLinearWrapper): layer._recover_sq_linear() @@ -529,33 +515,32 @@ def _scale_layer_weight(self, layer_name, scale, alpha=0.5, input_minmax=None): return scale def _absorb_scales(self, layer_name, scale): ##output channel - """ - Absorb the scale to the layer at output channel + """Absorb the scale to the layer at output channel :param layer_name: The module name :param scale: The scale to be absorbed :param alpha_key: The alpha passed to SQLinearWrapper - :return: - """ + :return:""" if self.insert_mul or not self.allow_absorb: return # absorb is updated in SQLinearWrapper in def _scale_layer_weight ##if self.allow absorb layer = get_module(self.model, layer_name) - if layer.__class__.__name__ == 'WrapperLayer': + if layer.__class__.__name__ == "WrapperLayer": layer = layer.orig_layer - if isinstance(layer, torch.nn.BatchNorm2d) or isinstance(layer, torch.nn.GroupNorm) or \ - isinstance(layer, torch.nn.InstanceNorm2d): + if ( + isinstance(layer, torch.nn.BatchNorm2d) + or isinstance(layer, torch.nn.GroupNorm) + or isinstance(layer, torch.nn.InstanceNorm2d) + ): if layer.affine: layer.weight *= scale layer.bias *= scale else: layer.affine = True weight = torch.ones(layer.num_features, device=self.device, dtype=self.dtype) * scale - layer.weight = torch.nn.Parameter( - weight, requires_grad=False) + layer.weight = torch.nn.Parameter(weight, requires_grad=False) bias = torch.zeros(layer.num_features, device=self.device, dtype=self.dtype) - layer.bias = torch.nn.Parameter(bias, requires_grad=False - ) + layer.bias = torch.nn.Parameter(bias, requires_grad=False) elif isinstance(layer, torch.nn.LayerNorm): if layer.elementwise_affine: layer.weight *= scale @@ -563,45 +548,42 @@ def _absorb_scales(self, layer_name, scale): ##output channel else: layer.elementwise_affine = True weight = torch.ones(layer.num_features, device=self.device, dtype=self.dtype) * scale - layer.weight = torch.nn.Parameter( - torch.ones(weight, requires_grad=False)) + layer.weight = torch.nn.Parameter(torch.ones(weight, requires_grad=False)) bias = torch.zeros(layer.num_features, device=self.device, dtype=self.dtype) - layer.bias = torch.nn.Parameter( - bias, requires_grad=False) + layer.bias = torch.nn.Parameter(bias, requires_grad=False) elif isinstance(layer, torch.nn.Conv2d): ##the order could not be changed - if hasattr(layer, "bias") and (layer.bias != None): + if hasattr(layer, "bias") and (layer.bias is not None): layer.bias *= scale scale = scale.view(scale.shape[0], 1, 1, 1) layer.weight *= scale elif isinstance(layer, torch.nn.Linear): - if hasattr(layer, "bias") and (layer.bias != None): + if hasattr(layer, "bias") and (layer.bias is not None): layer.bias *= scale scale = scale.view(scale.shape[0], 1) layer.weight *= scale - elif layer.__class__.__name__ == "LlamaRMSNorm" \ - or layer.__class__.__name__ == "T5LayerNorm": ##quite tricky + elif layer.__class__.__name__ == "LlamaRMSNorm" or layer.__class__.__name__ == "T5LayerNorm": ##quite tricky layer.weight *= scale else: - logger.warning(f"found unsupported layer {type(layer)}, try to multiply scale to " - f"weight and bias directly, this may introduce accuracy issue, please have a check ") - if hasattr(layer, "weight") and layer.weight != None: + logger.warning( + f"found unsupported layer {type(layer)}, try to multiply scale to " + f"weight and bias directly, this may introduce accuracy issue, please have a check " + ) + if hasattr(layer, "weight") and layer.weight is not None: layer.weight *= scale - if hasattr(layer, "bias") and layer.bias != None: + if hasattr(layer, "bias") and layer.bias is not None: layer.bias *= scale def _cal_scales(self, absorb_to_layer, input_maxes, alpha=0.5, tuning=False): - """ - cal the adjsut scales - :param absorb_to_layer: A dict mapping absorb layer to smooth quantized layer - :param input_maxes: The channel-wise input max info for layers - :param alpha: Alpha value to balance the quantization difficulty of activation and weight, a float of a dict - :return: - """ + """Cal the adjsut scales + :param absorb_to_layer: A dict mapping absorb layer to smooth quantized layer + :param input_maxes: The channel-wise input max info for layers + :param alpha: Alpha value to balance the quantization difficulty of activation and weight, a float of a dict + :return:""" absorb_to_input_maxes = {} for key in absorb_to_layer.keys(): layer_name = absorb_to_layer[key][0] @@ -623,17 +605,17 @@ def _cal_scales(self, absorb_to_layer, input_maxes, alpha=0.5, tuning=False): for layer_name in layer_names: weight = self._reshape_in_channel_to_last(layer_name) weights.append(weight) - + weight_max_per_channel = torch.max(torch.abs(torch.cat(weights, dim=0)), dim=0)[0] if self.record_max_info and not tuning: # the input of layers with same absorb layer is the same. input_minmax = [self.input_mins[layer_names[0]], self.input_maxes[layer_names[0]]] self.max_value_info[key] = {} - self.max_value_info[key]['alpha'] = alpha_tmp - self.max_value_info[key]['input_minmax'] = input_minmax - self.max_value_info[key]['weight_max'] = weight_max_per_channel - self.max_value_info[key]['absorbed_layer'] = layer_names - continue + self.max_value_info[key]["alpha"] = alpha_tmp + self.max_value_info[key]["input_minmax"] = input_minmax + self.max_value_info[key]["weight_max"] = weight_max_per_channel + self.max_value_info[key]["absorbed_layer"] = layer_names + continue scale = cal_scale(input_max, weights, alpha_tmp) absorb_scales_info[key] = 1.0 / scale @@ -645,18 +627,14 @@ def _cal_scales(self, absorb_to_layer, input_maxes, alpha=0.5, tuning=False): return absorb_scales_info, weight_scales_info def _adjust_parameters(self, absorb_to_layer, input_maxes, alpha=0.5, tuning=False): - """ - adjust the weights and biases + """Adjust the weights and biases :param absorb_to_layer: A dict mapping absorb layer to smooth quantized layer :param input_maxes: The channel-wise input max info for layers :param alpha: Alpha value to balance the quantization difficulty of activation and weight, a float of a dict - :return: - """ - absorb_scales_info, weight_scales_info = self._cal_scales( - absorb_to_layer, input_maxes, alpha, tuning - ) + :return:""" + absorb_scales_info, weight_scales_info = self._cal_scales(absorb_to_layer, input_maxes, alpha, tuning) if not absorb_scales_info or not weight_scales_info: - return weight_scales_info, absorb_scales_info + return weight_scales_info, absorb_scales_info for index, key in enumerate(absorb_to_layer.keys()): if isinstance(alpha, float): alpha_tmp = alpha @@ -667,13 +645,10 @@ def _adjust_parameters(self, absorb_to_layer, input_maxes, alpha=0.5, tuning=Fal layer_names = absorb_to_layer[key] for layer_name in layer_names: input_minmax = [self.input_mins[layer_names[0]], self.input_maxes[layer_names[0]]] - self._scale_layer_weight( - layer_name, weight_scales_info[layer_name], alpha_tmp, input_minmax - ) + self._scale_layer_weight(layer_name, weight_scales_info[layer_name], alpha_tmp, input_minmax) return weight_scales_info, absorb_scales_info - def _check_need_calibration(self, alpha, percentile, op_types, - scales_per_op, calib_iter): + def _check_need_calibration(self, alpha, percentile, op_types, scales_per_op, calib_iter): """ check need calibration or not :param alpha: current alpha @@ -693,8 +668,12 @@ def _check_need_calibration(self, alpha, percentile, op_types, self.calib_iter = calib_iter return need_calib - if self.percentile == percentile and self.op_types == op_types \ - and self.scales_per_op == scales_per_op and self.calib_iter == calib_iter: + if ( + self.percentile == percentile + and self.op_types == op_types + and self.scales_per_op == scales_per_op + and self.calib_iter == calib_iter + ): if isinstance(alpha, float): need_calib = False elif self.alpha == "auto": @@ -713,8 +692,7 @@ def _get_auto_loss(self, output, output_q, loss_type="abs", loss_alpha=1.0): :param output_q: Quant output for one layer :param loss_type: The type of loss :param loss_alpha: Loss alpha i for mean scale error - :return: A tensor of the loss - """ + :return: A tensor of the loss.""" if len(output.shape) <= 2: max_value = torch.max(torch.abs(output)) else: @@ -727,16 +705,13 @@ def _get_auto_loss(self, output, output_q, loss_type="abs", loss_alpha=1.0): loss = torch.sum(torch.log(1.0 + torch.abs(output - output_q) / torch.abs(output))) return loss elif loss_type == "abs": - return torch.sum( - torch.pow(torch.abs(output - output_q), - 0.5)) + return torch.sum(torch.pow(torch.abs(output - output_q), 0.5)) else: return torch.sum((output - output_q) ** 2) def _get_sq_layer_names(self): """Get the all the hook sq layer - :return: All the sq layer names - """ + :return: All the sq layer names.""" ##TODO this may not fit for folding=False module_names = [] for key in self.absorb_to_layer: @@ -752,18 +727,19 @@ def _get_all_hook_module_names(self): def _qdq_model_wrapper_for_auto(self, save_q_input=False): """Wrapper all the module with qdq - :return: - """ + :return:""" module_names = self._get_all_hook_module_names() self.to_unwrap_module_names = module_names for name in module_names: module = get_module(self.model, name) - set_module(self.model, name, WrapperLayer(module, self.input_mins[name], - self.input_maxes[name], - save_q_input=save_q_input)) + set_module( + self.model, + name, + WrapperLayer(module, self.input_mins[name], self.input_maxes[name], save_q_input=save_q_input), + ) def _qdq_model_unwrapper_for_auto(self): - module_names = self.to_unwrap_module_names + module_names = self.to_unwrap_module_names for name in module_names: module = get_module(self.model, name) # print(name, flush=True) @@ -772,7 +748,7 @@ def _qdq_model_unwrapper_for_auto(self): def _change_qdq_for_auto(self, enable=True): module_names = self._get_all_hook_module_names() for name in module_names: - name = name.split('.orig_layer')[0] + name = name.split(".orig_layer")[0] module = get_module(self.model, name) if enable: module.enable_quant() @@ -801,8 +777,9 @@ def _get_one_sample_auto_loss(self, input, alpha_space, orig_best_alpha, input_m fp32_output[name] = module.output module.output = None self._change_qdq_for_auto(enable=True) - absorb_input_scales, weight_scales = self._cal_scales(self.absorb_to_layer, input_maxes, \ - orig_best_alpha, tuning=True) + absorb_input_scales, weight_scales = self._cal_scales( + self.absorb_to_layer, input_maxes, orig_best_alpha, tuning=True + ) self._update_scales_for_auto(absorb_input_scales, weight_scales) forward_wrapper(self.model, input, self.device) ##save quant_input loss_alphas = {} @@ -854,13 +831,13 @@ def dict_to_list(dic): best_alpha[ln_name] = float(res[0][0]) - elif cur_shared_criterion == 'min' or cur_shared_criterion == 'max': + elif cur_shared_criterion == "min" or cur_shared_criterion == "max": tmp_best_alpha = [] for layer_name in layer_names: res = dict_to_list(loss_alphas[layer_name]) res.sort(key=lambda x: x[1]) tmp_best_alpha.append(float(res[0][0])) - if cur_shared_criterion == 'min': + if cur_shared_criterion == "min": best_alpha[ln_name] = min(tmp_best_alpha) else: best_alpha[ln_name] = max(tmp_best_alpha) @@ -869,11 +846,11 @@ def dict_to_list(dic): raise NotImplementedError return best_alpha + def _auto_tune_alpha_new( + self, input_maxes, auto_calib_iter=32, alpha_min=0.3, alpha_max=0.7, alpha_step=0.05, shared_criterion="min" + ): + """Perform alpha-tuning to obtain layer-wise optimal alpha values and adjust parameters accordingly. - def _auto_tune_alpha_new(self, input_maxes, auto_calib_iter=32, alpha_min=0.3, alpha_max=0.7, alpha_step=0.05, - shared_criterion='min'): - """ - Perform alpha-tuning to obtain layer-wise optimal alpha values and adjust parameters accordingly. This function takes quantization of the former layers into consideration when qdq one layer Also, it reduces the memory usage at the cost of increasingtuning time TODO may have compatibility issue when setting folding=True @@ -887,8 +864,13 @@ def _auto_tune_alpha_new(self, input_maxes, auto_calib_iter=32, alpha_min=0.3, a """ logger.info("start sq auto tuning") alpha_scale = 100 - alpha_space = list(range(round(alpha_min * alpha_scale), round((alpha_max + alpha_step) * alpha_scale), - round(alpha_step * alpha_scale))) + alpha_space = list( + range( + round(alpha_min * alpha_scale), + round((alpha_max + alpha_step) * alpha_scale), + round(alpha_step * alpha_scale), + ) + ) alpha_space = [alpha / alpha_scale for alpha in alpha_space] ##wrapper new module self._qdq_model_wrapper_for_auto(save_q_input=True) @@ -896,8 +878,9 @@ def _auto_tune_alpha_new(self, input_maxes, auto_calib_iter=32, alpha_min=0.3, a default_alpha = alpha_space[len(alpha_space) // 2] if 0.5 in alpha_space: default_alpha = 0.5 - absorb_input_scales, weight_scales = self._cal_scales(self.absorb_to_layer, input_maxes, - default_alpha, tuning=True) + absorb_input_scales, weight_scales = self._cal_scales( + self.absorb_to_layer, input_maxes, default_alpha, tuning=True + ) self._update_scales_for_auto(absorb_input_scales, weight_scales) loss_alphas = {} cnt = 0 @@ -926,7 +909,7 @@ def _auto_tune_alpha_new(self, input_maxes, auto_calib_iter=32, alpha_min=0.3, a cur_loss = loss_alphas[key] for alpha_key in cur_loss.keys(): cur_loss[alpha_key] += loss_tmp[key][alpha_key] - if isinstance(input, list): + if isinstance(input, list): input = move_input_to_device(input, self.device) for inp in input: cnt += inp.shape[0] @@ -937,10 +920,11 @@ def _auto_tune_alpha_new(self, input_maxes, auto_calib_iter=32, alpha_min=0.3, a best_alphas = self._get_best_alpha(self.absorb_to_layer, loss_alphas, shared_criterion) for key in best_alphas.keys(): logger.info(f"{cnt // multiply_factor},{key}:{best_alphas[key]}") - absorb_input_scales, weight_scales = self._cal_scales(self.absorb_to_layer, input_maxes, - best_alphas, tuning=True) + absorb_input_scales, weight_scales = self._cal_scales( + self.absorb_to_layer, input_maxes, best_alphas, tuning=True + ) self._update_scales_for_auto(absorb_input_scales, weight_scales) - loss_alphas = {}##TODO check need to remove this one + loss_alphas = {} ##TODO check need to remove this one if cnt >= auto_calib_iter: break @@ -951,12 +935,17 @@ def _auto_tune_alpha_new(self, input_maxes, auto_calib_iter=32, alpha_min=0.3, a logger.info("auto tuning done") return best_alphas - - def transform(self, alpha=0.5, folding=False, percentile=100, op_types=['Linear', 'Conv2d'], - scales_per_op=False, calib_iter=100, - auto_alpha_args={'alpha_min': 0.0, 'alpha_max': 1.0, 'alpha_step': 0.1, 'shared_criterion': 'mean'}): - """ - The main entry of smooth quant + def transform( + self, + alpha=0.5, + folding=False, + percentile=100, + op_types=["Linear", "Conv2d"], + scales_per_op=False, + calib_iter=100, + auto_alpha_args={"alpha_min": 0.0, "alpha_max": 1.0, "alpha_step": 0.1, "shared_criterion": "mean"}, + ): + """The main entry of smooth quant :param alpha: Alpha value to balance the quantization difficulty of activation and weight, please refer to the paper for more details :param folding: whether insert mul(False) or just allow foldable layers(True) for SmoothQuant @@ -965,8 +954,7 @@ def transform(self, alpha=0.5, folding=False, percentile=100, op_types=['Linear' :param scales_per_op: Not supported now :param calib_iter: Data size for calibration :return: A FP32 model with the same architecture as the orig model but with different weight which will be - benefit to quantization - """ + benefit to quantization.""" if not isinstance(self.model, torch.nn.Module): logger.warning("smooth quant is ignored since the model is not a torch module") return self.model @@ -979,6 +967,7 @@ def transform(self, alpha=0.5, folding=False, percentile=100, op_types=['Linear' if isinstance(alpha, float) and (alpha < 0 or alpha > 1): logger.warning("reset alpah to in range [0.0, 1.0]") import numpy + alpha = numpy.clip(alpha, 0.0, 1.0) self.recover() @@ -999,8 +988,9 @@ def transform(self, alpha=0.5, folding=False, percentile=100, op_types=['Linear' logger.debug(f"self_absorb_layers:{self.self_absorb_layers}") if self.allow_absorb: self.absorb_to_layer, no_absorb_layers = self._trace( - op_types) ##TODO we need to insert mul layer for no_absorb_layers later - if self.absorb_to_layer == None and no_absorb_layers == None: + op_types + ) ##TODO we need to insert mul layer for no_absorb_layers later + if self.absorb_to_layer is None and no_absorb_layers is None: return self.model # remove self.self_absorb_layers if it exists in self.absorb_to_layer @@ -1010,10 +1000,9 @@ def transform(self, alpha=0.5, folding=False, percentile=100, op_types=['Linear' self.self_absorb_layers.pop(i) self.absorb_to_layer.update(self.self_absorb_layers) - if self.absorb_to_layer == None and no_absorb_layers == None: + if self.absorb_to_layer is None and no_absorb_layers is None: logger.warning("sorry, could not trace the model, smooth quant is ignored") - logger.warning("if you are using huggingface model," - "you could set torchscript to True ") + logger.warning("if you are using huggingface model," "you could set torchscript to True ") return self.model save_input_output = False if alpha == "auto" else True # if alpha == "auto": @@ -1021,21 +1010,22 @@ def transform(self, alpha=0.5, folding=False, percentile=100, op_types=['Linear' input_maxes_abs = self._calibrate(self.absorb_to_layer, calib_iter, percentile, save_input_output) - # Check if input_maxes match self.absorb_to_layer + # Check if input_maxes match self.absorb_to_layer # (due to self._get_all_layer_names use layer tree instead of forward_path) if not folding: diff_modules = set(self.absorb_to_layer.keys()).difference(input_maxes_abs.keys()) for d in diff_modules: del self.absorb_to_layer[d] - if alpha == 'auto': - self.alpha_per_layer = self._auto_tune_alpha_new(input_maxes_abs, auto_calib_iter=32, - **auto_alpha_args) ##save the alpha + if alpha == "auto": + self.alpha_per_layer = self._auto_tune_alpha_new( + input_maxes_abs, auto_calib_iter=32, **auto_alpha_args + ) ##save the alpha - if alpha == 'auto': + if alpha == "auto": alpha = self.alpha_per_layer example_inputs = self._get_example_input() - if example_inputs != None: + if example_inputs is not None: out_pre_sq = model_forward_per_sample(self.model, example_inputs, self.device) if self.record_max_info: @@ -1044,18 +1034,20 @@ def transform(self, alpha=0.5, folding=False, percentile=100, op_types=['Linear' self.model._smoothquant_optimized = False return self.model - self.weight_scale_info, self.absorb_scales_info = self._adjust_parameters(self.absorb_to_layer, - input_maxes_abs, alpha) - + self.weight_scale_info, self.absorb_scales_info = self._adjust_parameters( + self.absorb_to_layer, input_maxes_abs, alpha + ) + self.model._smoothquant_optimized = True - if example_inputs != None: + if example_inputs is not None: # Check mathematical equivelancy out_post_sq = model_forward_per_sample(self.model, example_inputs, self.device) if not self.output_is_equal(out_post_sq, out_pre_sq): logger.warning( "Mathematical equivelancy of Smoothquant is not preserved. " - "Please kindly report this issue to https://github.com/intel/neural-compressor.") + "Please kindly report this issue to https://github.com/intel/neural-compressor." + ) else: logger.warning(" Could not get example input, equivelancy check is skipped") @@ -1072,15 +1064,15 @@ def output_is_equal(self, out1, out2, atol=1e-04): return torch.all(torch.isclose(out1, out2, atol=atol)) return False except: - logger.warning("Automatically check failed, Please check equivelancy manually " - "between out_pre_sq and out_post_sq if necessary.") + logger.warning( + "Automatically check failed, Please check equivelancy manually " + "between out_pre_sq and out_post_sq if necessary." + ) return True def recover(self): - """ - recover the model weights - :return: - """ + """Recover the model weights + :return:""" with torch.no_grad(): for key in self.weight_scale_info: self._scale_layer_weight(key, 1.0 / self.weight_scale_info[key]) @@ -1089,9 +1081,9 @@ def recover(self): self.weight_scale_info = {} ##clear the data self.absorb_scales_info = {} - def _get_all_layer_names(self, op_types=['Linear']): - """ - Try the model to find the layers which can be smooth quantized. + def _get_all_layer_names(self, op_types=["Linear"]): + """Try the model to find the layers which can be smooth quantized. + :param op_types: The op types to be smooth quantized :return: self_absorb_layer: A dict, absorb layer name (itself): layers to be smooth quantized @@ -1106,7 +1098,7 @@ def _get_all_layer_names(self, op_types=['Linear']): key_list.sort() duplicate_list = [] for i, k1 in enumerate(key_list): - for k2 in key_list[i+1:]: + for k2 in key_list[i + 1 :]: if k1 in k2: duplicate_list.append(k1) for i in duplicate_list: @@ -1114,7 +1106,7 @@ def _get_all_layer_names(self, op_types=['Linear']): return self_absorb_layer def _get_example_input(self): - if self.dataloader == None and self.example_inputs == None: + if self.dataloader is None and self.example_inputs is None: return None if self.example_inputs is None: try: @@ -1129,8 +1121,8 @@ def _get_example_input(self): return self.example_inputs def _trace(self, op_types, skip_unsupported_layers=True): - """ - Try the model to find the layers which can be smooth quantized. + """Try the model to find the layers which can be smooth quantized. + :param op_types: The op types to be smooth quantized :return: absorb_to_layer: A dict, absorb layer name:layers to be smooth quantized @@ -1139,16 +1131,17 @@ def _trace(self, op_types, skip_unsupported_layers=True): tg = GraphTrace() self._get_example_input() absorb_to_layer, no_absorb_layers = tg.get_absorb_to_layer( - self.traced_model, self.example_inputs, op_types, - skip_unsupported_layers=skip_unsupported_layers + self.traced_model, self.example_inputs, op_types, skip_unsupported_layers=skip_unsupported_layers ) if not skip_unsupported_layers: return absorb_to_layer - if absorb_to_layer == None and no_absorb_layers == None: + if absorb_to_layer is None and no_absorb_layers is None: logger.warning("sorry, could not trace the model, smooth quant is skipped") - logger.warning("if you are using huggingface model," - "you could set torchscript to True " - "when loading the model or set the return_dict to False") + logger.warning( + "if you are using huggingface model," + "you could set torchscript to True " + "when loading the model or set the return_dict to False" + ) elif absorb_to_layer == {}: logger.warning("could not find any layer to be absorbed") else: @@ -1157,12 +1150,13 @@ def _trace(self, op_types, skip_unsupported_layers=True): to_absorb_cnt += len(item) logger.info( f" {to_absorb_cnt} out of {to_absorb_cnt + len(no_absorb_layers)} " - f"layers could be absorbed in smooth quant") + f"layers could be absorbed in smooth quant" + ) return absorb_to_layer, no_absorb_layers def get_parent(node, all_parents=False): - if node.inputs() == None: + if node.inputs() is None: return None elif len(list(node.inputs())) == 0: return None @@ -1173,8 +1167,7 @@ def get_parent(node, all_parents=False): class GraphTrace: - """ - """ + """""" def __init__(self): self.supported_torch_module_to_aten = { @@ -1187,21 +1180,22 @@ def __init__(self): "InstanceNorm2d": "aten::instance_norm", "LlamaRMSNorm": "aten::mul", "T5LayerNorm": "aten::mul", - "LPLayerNorm": "aten::layer_norm" ##mpt_chat + "LPLayerNorm": "aten::layer_norm", ##mpt_chat } ##TODO potential bug, need to check only have one bug ##TODO, must statisfy af(x)=f(ax),current skip layer may be incomplete - self.skip_ops_to_find_absorb = ["aten::to", - "aten::relu", - "aten::leaky_relu", - "aten::hardtanh" - ] - - self.could_absorb_layers = ["aten::layer_norm", "aten::batch_norm", "aten::linear", "aten::_convolution", - "aten::group_norm", - "aten::instance_norm", - "aten::mul"] ##TODO,suppport more norm + self.skip_ops_to_find_absorb = ["aten::to", "aten::relu", "aten::leaky_relu", "aten::hardtanh"] + + self.could_absorb_layers = [ + "aten::layer_norm", + "aten::batch_norm", + "aten::linear", + "aten::_convolution", + "aten::group_norm", + "aten::instance_norm", + "aten::mul", + ] ##TODO,suppport more norm def trace(self, model, dummy_input): traced_model = None @@ -1212,7 +1206,7 @@ def trace(self, model, dummy_input): orig_device = model.device.type else: orig_device = "cpu" - if orig_device != "cpu" and orig_device != 'meta': + if orig_device != "cpu" and orig_device != "meta": model = model.to("cpu") dummy_input = move_input_to_device(dummy_input, "cpu") if isinstance(dummy_input, dict) or isinstance(dummy_input, UserDict): @@ -1237,7 +1231,7 @@ def trace(self, model, dummy_input): model = model.to(orig_device) return traced_model - def get_nodes(self, traced_model, op_types=['Linear']): + def get_nodes(self, traced_model, op_types=["Linear"]): if isinstance(op_types, str): op_types = [op_types] nodes = [] @@ -1258,31 +1252,29 @@ def get_prev_absorb_layer(self, nodes): parent = get_parent(parent) continue if parent.kind() in self.could_absorb_layers: - parent_out_kinds = [] for val_user in list(parent.outputs())[0].uses(): next_node = val_user.user parent_out_kinds.append(next_node.kind()) parent_out_kinds = set(parent_out_kinds) - parent_out_kinds.discard('aten::size') + parent_out_kinds.discard("aten::size") if parent_out_kinds == parent_out_kinds.intersection(self.could_absorb_layers): prev_absorb_layer.append(parent) elif parent_out_kinds.intersection(self.skip_ops_to_find_absorb): res = self.skip_op_absorb_helper(parent) prev_absorb_layer.append(parent) if res else prev_absorb_layer.append(None) - else: # When parent to multiple ops, sq transformation could be wrong. + else: # When parent to multiple ops, sq transformation could be wrong. prev_absorb_layer.append(None) else: prev_absorb_layer.append(None) break return prev_absorb_layer - def skip_op_absorb_helper(self, parent_node): for val_user in list(parent_node.outputs())[0].uses(): next_node = val_user.user - if next_node.kind() == 'aten::size': + if next_node.kind() == "aten::size": continue elif next_node.kind() in self.could_absorb_layers: continue @@ -1305,16 +1297,15 @@ def mapping_torch_module_to_aten(self, op_types): return res def _check_valid_conv(self, module): - """ - remove group conv except depthwise conv + """Remove group conv except depthwise conv :param module: + :return: """ if not isinstance(module, torch.nn.Conv2d): return True if module.groups > 1: - if module.in_channels == module.out_channels and \ - module.groups == module.in_channels: + if module.in_channels == module.out_channels and module.groups == module.in_channels: return True else: return False @@ -1322,7 +1313,7 @@ def _check_valid_conv(self, module): def get_absorb_to_layer(self, model, example_input, op_types, skip_unsupported_layers=True): traced_model = self.trace(model, example_input) - if traced_model == None: + if traced_model is None: return None, None aten_op_types = self.mapping_torch_module_to_aten(op_types) @@ -1332,13 +1323,12 @@ def get_absorb_to_layer(self, model, example_input, op_types, skip_unsupported_l absorb_to_layer = {} no_absorb_layers = [] for index, absorb in enumerate(nodes_prev_absorb): - if absorb == None: - no_absorb_layers.append( - '.'.join(nodes[index].scopeName().split('/')[-1].split('.')[1:])) + if absorb is None: + no_absorb_layers.append(".".join(nodes[index].scopeName().split("/")[-1].split(".")[1:])) continue node = nodes[index] - layer_name = '.'.join(node.scopeName().split('/')[-1].split('.')[1:]) - absorb_name = '.'.join(absorb.scopeName().split('/')[-1].split('.')[1:]) + layer_name = ".".join(node.scopeName().split("/")[-1].split(".")[1:]) + absorb_name = ".".join(absorb.scopeName().split("/")[-1].split(".")[1:]) if layer_name == "" or absorb_name == "": continue if absorb_name in absorb_to_layer.keys(): @@ -1371,35 +1361,43 @@ def remove_unsupported_layers(self, model, absorb_to_layer, no_absorb_layers): def update_sq_scale(ipex_config_path, smoothquant_scale_info): - """update ipex_config.json with smoothquant scale info generated by our algorithm. + """Update ipex_config.json with smoothquant scale info generated by our algorithm. + Args: ipex_config_path (str): a path to temporary ipex_config.json file. smoothquant_scale_info (dict): a dict contains smoothquant scale info. """ - with open(ipex_config_path, 'r') as f: + with open(ipex_config_path, "r") as f: ipex_config = json.load(f) for module_name, v in ipex_config.items(): - if 'q_op_infos' in v and v['q_op_infos']: - for op_num, v1 in v['q_op_infos'].items(): + if "q_op_infos" in v and v["q_op_infos"]: + for op_num, v1 in v["q_op_infos"].items(): # update alpha data instead of updating weight scale - op_name = v1['fqn'] # fqn always exists even it's empty. + op_name = v1["fqn"] # fqn always exists even it's empty. if op_name in smoothquant_scale_info: # observers were overridden by the fallback step, setting it back. - v1['activation_observer'] = {'name': 'SmoothQuantActivationObserver', - 'smooth_quant_enabled': False, 'dtype': 'torch.quint8', - 'qscheme': 'torch.per_tensor_affine', 'reduce_range': False, - 'quant_min': 0, 'quant_max': 255, - 'alpha': smoothquant_scale_info[op_name]['alpha'] - } - v1['weight_observer'] = {'name': 'SmoothQuantWeightObserver', - 'smooth_quant_enabled': False, 'dtype': 'torch.qint8', - 'qscheme': 'torch.per_channel_symmetric', 'reduce_range': False, - 'quant_min': -128, 'quant_max': 127, - 'alpha': smoothquant_scale_info[op_name]['alpha'] # only update alpha - } + v1["activation_observer"] = { + "name": "SmoothQuantActivationObserver", + "smooth_quant_enabled": False, + "dtype": "torch.quint8", + "qscheme": "torch.per_tensor_affine", + "reduce_range": False, + "quant_min": 0, + "quant_max": 255, + "alpha": smoothquant_scale_info[op_name]["alpha"], + } + v1["weight_observer"] = { + "name": "SmoothQuantWeightObserver", + "smooth_quant_enabled": False, + "dtype": "torch.qint8", + "qscheme": "torch.per_channel_symmetric", + "reduce_range": False, + "quant_min": -128, + "quant_max": 127, + "alpha": smoothquant_scale_info[op_name]["alpha"], # only update alpha + } f.close() # overwrite ipex_config_path - with open(ipex_config_path, 'w') as f1: + with open(ipex_config_path, "w") as f1: json.dump(ipex_config, f1, indent=4) f1.close() - diff --git a/neural_compressor/adaptor/torch_utils/symbolic_trace.py b/neural_compressor/adaptor/torch_utils/symbolic_trace.py index b95e7e2452e..d5b4aaf51f3 100644 --- a/neural_compressor/adaptor/torch_utils/symbolic_trace.py +++ b/neural_compressor/adaptor/torch_utils/symbolic_trace.py @@ -16,12 +16,10 @@ # limitations under the License. """Symbolic Trace for Torch Utils.""" import torch -from neural_compressor.adaptor.pytorch import ( - PyTorch_FXAdaptor, - get_torch_version, -) from packaging.version import Version +from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, get_torch_version + version = get_torch_version() @@ -36,14 +34,15 @@ def trace_and_fuse_sub_graph(model, prefix, is_qat): Returns: model (object). """ - from torch.quantization.quantize_fx import _fuse_fx import torch.quantization.quantization_mappings as tqqm + from torch.quantization.quantize_fx import _fuse_fx + fx_white_list = tqqm.get_default_qconfig_propagation_list() for name, module in model.named_children(): # FX QAT cannot fallback nn.Dropout from train mode to eval if type(module) == torch.nn.Dropout: # pragma: no cover continue - op_name = prefix + '.' + name if prefix != '' else name + op_name = prefix + "." + name if prefix != "" else name if type(module) in fx_white_list: module = torch.quantization.QuantWrapper(module) if PyTorch_FXAdaptor._check_dynamic_control(module): @@ -74,5 +73,5 @@ def symbolic_trace(model, is_qat=False): try: traced_model = torch.fx.symbolic_trace(model) except: - traced_model = trace_and_fuse_sub_graph(model, prefix='', is_qat=is_qat) + traced_model = trace_and_fuse_sub_graph(model, prefix="", is_qat=is_qat) return traced_model diff --git a/neural_compressor/adaptor/torch_utils/teq.py b/neural_compressor/adaptor/torch_utils/teq.py index 3176929776e..d22302839b2 100644 --- a/neural_compressor/adaptor/torch_utils/teq.py +++ b/neural_compressor/adaptor/torch_utils/teq.py @@ -19,40 +19,34 @@ try: from neural_compressor.utils.utility import LazyImport - torch = LazyImport('torch') + torch = LazyImport("torch") from ...utils import logger -except: # pragma: no cover - import torch +except: # pragma: no cover import logging + + import torch + logger = logging.getLogger() +import transformers + +from .model_wrapper import MulLinear, TEQLinearFakeQuant from .smooth_quant import GraphTrace, get_module, set_module from .weight_only import quant_weight -from .model_wrapper import TEQLinearFakeQuant, MulLinear -import transformers class TEQuantizer: - """ - Weight-only quantization, Trainable Equivalent Transformation (TEQ): linear wrapper to apply scale to input - """ + """Weight-only quantization, Trainable Equivalent Transformation (TEQ): linear wrapper to apply scale to input.""" - def __init__( - self, - model, - weight_config={}, - absorb_to_layer={}, - extra_config={}, - example_inputs=None - ): + def __init__(self, model, weight_config={}, absorb_to_layer={}, extra_config={}, example_inputs=None): """ :param model: the model for quantization :param weight_config (dict, optional): contains all info required by GPTQ. Defaults to {}. - :param example_inputs: inputs for trace + :param example_inputs: inputs for trace """ self.model = model self.weight_config = weight_config - self.folding = extra_config.get('folding', True) + self.folding = extra_config.get("folding", True) self.example_inputs = example_inputs self.device, self.dtype = self._get_device() self.model.eval() @@ -60,31 +54,26 @@ def __init__( self.absorb_to_layer = absorb_to_layer def _get_device(self): - """ - Get the model device - :return:Model device - """ + """Get the model device + :return:Model device.""" for _, p in self.model.named_parameters(): return p.data.device, p.data.dtype def add_tuning_scale(self, sqrt_w_init=False): - """ - The main entry of smooth quant + """The main entry of smooth quant to the paper for more details - :param sqrt_w_init: use sqrt weight to init - """ + :param sqrt_w_init: use sqrt weight to init.""" # freeze model. for n, p in self.model.named_parameters(): p.requires_grad = False for layer_norm in self.absorb_to_layer: - layer_0_name = self.absorb_to_layer[layer_norm][0] module = get_module(self.model, layer_0_name) - if sqrt_w_init: # pragma: no cover + if sqrt_w_init: # pragma: no cover weights = [] for layer_name in self.absorb_to_layer[layer_norm]: module = get_module(self.model, layer_name) @@ -102,7 +91,7 @@ def add_tuning_scale(self, sqrt_w_init=False): self.trained_alphas[layer_norm] = alpha for layer_name in self.absorb_to_layer[layer_norm]: - if self.weight_config.get(layer_name) is None: # pragma: no cover + if self.weight_config.get(layer_name) is None: # pragma: no cover logger.info(f"layer {layer_name} not in weight config, skip.") continue num_bits = self.weight_config[layer_name]["bits"] @@ -110,13 +99,14 @@ def add_tuning_scale(self, sqrt_w_init=False): scheme = self.weight_config[layer_name]["scheme"] module = get_module(self.model, layer_name) - wrapper_module = TEQLinearFakeQuant(orig_layer=module, alpha=alpha, - num_bits=num_bits, group_size=group_size, scheme=scheme) + wrapper_module = TEQLinearFakeQuant( + orig_layer=module, alpha=alpha, num_bits=num_bits, group_size=group_size, scheme=scheme + ) set_module(self.model, layer_name, wrapper_module) for n, m in self.model.named_modules(): if isinstance(m, torch.nn.Linear) and "orig_layer" not in n: - if self.weight_config.get(n) is None: # pragma: no cover + if self.weight_config.get(n) is None: # pragma: no cover logger.info(f"out of absorbed layer {n} not in weight config, skip.") continue num_bits = self.weight_config[layer_name]["bits"] @@ -125,20 +115,19 @@ def add_tuning_scale(self, sqrt_w_init=False): alpha = torch.nn.Parameter(torch.ones(m.weight.shape[1], device=self.device)) alpha.requires_grad_(False) - wrapper_module = TEQLinearFakeQuant(orig_layer=m, alpha=alpha, - num_bits=num_bits, group_size=group_size, scheme=scheme) + wrapper_module = TEQLinearFakeQuant( + orig_layer=m, alpha=alpha, num_bits=num_bits, group_size=group_size, scheme=scheme + ) set_module(self.model, n, wrapper_module) @torch.no_grad() def _absorb_scales(self, layer, scale, layer_name=""): - """ - Absorb the scale to the layer at output channel + """Absorb the scale to the layer at output channel :param layer: The module :param scale: The scale to be absorbed - :param layer_name: The layer name - """ + :param layer_name: The layer name.""" # for insert mul - if not self.folding: # pragma: no cover + if not self.folding: # pragma: no cover if isinstance(layer, MulLinear): set_module(self.model, layer_name, layer.linear) ##recover else: @@ -147,66 +136,63 @@ def _absorb_scales(self, layer, scale, layer_name=""): self.weight_config[layer_name + ".linear"] = self.weight_config[layer_name] return - if isinstance(layer, torch.nn.BatchNorm2d) or isinstance(layer, torch.nn.GroupNorm) or \ - isinstance(layer, torch.nn.InstanceNorm2d): - if layer.affine: # pragma: no cover + if ( + isinstance(layer, torch.nn.BatchNorm2d) + or isinstance(layer, torch.nn.GroupNorm) + or isinstance(layer, torch.nn.InstanceNorm2d) + ): + if layer.affine: # pragma: no cover layer.weight *= scale layer.bias *= scale - else: # pragma: no cover + else: # pragma: no cover layer.affine = True weight = torch.ones(layer.num_features, device=self.device, dtype=self.dtype) * scale - layer.weight = torch.nn.Parameter( - weight, requires_grad=False) + layer.weight = torch.nn.Parameter(weight, requires_grad=False) bias = torch.zeros(layer.num_features, device=self.device, dtype=self.dtype) - layer.bias = torch.nn.Parameter(bias, requires_grad=False - ) + layer.bias = torch.nn.Parameter(bias, requires_grad=False) elif isinstance(layer, torch.nn.LayerNorm): if layer.elementwise_affine: layer.weight *= scale layer.bias *= scale - else: # pragma: no cover + else: # pragma: no cover layer.elementwise_affine = True weight = torch.ones(layer.num_features, device=self.device, dtype=self.dtype) * scale - layer.weight = torch.nn.Parameter( - torch.ones(weight, requires_grad=False)) + layer.weight = torch.nn.Parameter(torch.ones(weight, requires_grad=False)) bias = torch.zeros(layer.num_features, device=self.device, dtype=self.dtype) - layer.bias = torch.nn.Parameter( - bias, requires_grad=False) + layer.bias = torch.nn.Parameter(bias, requires_grad=False) - elif isinstance(layer, torch.nn.Conv2d): # pragma: no cover + elif isinstance(layer, torch.nn.Conv2d): # pragma: no cover ## the order could not be changed - if hasattr(layer, "bias") and (layer.bias != None): + if hasattr(layer, "bias") and (layer.bias is not None): layer.bias *= scale scale = scale.view(scale.shape[0], 1, 1, 1) layer.weight *= scale - elif isinstance(layer, torch.nn.Linear): # pragma: no cover - if hasattr(layer, "bias") and (layer.bias != None): + elif isinstance(layer, torch.nn.Linear): # pragma: no cover + if hasattr(layer, "bias") and (layer.bias is not None): layer.bias *= scale scale = scale.view(scale.shape[0], 1) layer.weight *= scale - - elif layer.__class__.__name__ == "LlamaRMSNorm" \ - or layer.__class__.__name__ == "T5LayerNorm": ##quite tricky + elif layer.__class__.__name__ == "LlamaRMSNorm" or layer.__class__.__name__ == "T5LayerNorm": ##quite tricky layer.weight *= scale - else: # pragma: no cover - logger.info(f"found unsupported layer {type(layer)}, try to multiply scale to " - f"weight and bias directly, this may introduce accuracy issue, please have a check ") - if hasattr(layer, "weight") and layer.weight != None: + else: # pragma: no cover + logger.info( + f"found unsupported layer {type(layer)}, try to multiply scale to " + f"weight and bias directly, this may introduce accuracy issue, please have a check " + ) + if hasattr(layer, "weight") and layer.weight is not None: layer.weight *= scale - if hasattr(layer, "bias") and layer.bias != None: + if hasattr(layer, "bias") and layer.bias is not None: layer.bias *= scale @torch.no_grad() def _scale_layer_weight(self, layer, scale): ##input channel - """ - Scale the layer weights at input channel, depthwise conv output channel + """Scale the layer weights at input channel, depthwise conv output channel :param layer_name: The layer name :param scale: The scale to be multiplied - :return: - """ + :return:""" if layer.__class__.__name__ == "MulLinear": layer = layer.linear @@ -219,9 +205,7 @@ def _scale_layer_weight(self, layer, scale): ##input channel @torch.no_grad() def transform(self): - """ - apply alpha/scale - """ + """Apply alpha/scale.""" for ln_name, layer_names in self.absorb_to_layer.items(): module = get_module(self.model, ln_name) scale = self.trained_alphas[ln_name] @@ -241,22 +225,30 @@ def transform(self): if isinstance(m, TEQLinearFakeQuant): set_module(self.model, n, m.orig_layer) - def train(self, dataloader, train_steps=1000, lr=1e-3, warmup_ratio=0.05, - gradient_accumulation_steps=1, logging_steps=10, - betas=[0.9, 0.9], weight_decay=0, lr_scheduler_type="linear"): - """ - train function - """ + def train( + self, + dataloader, + train_steps=1000, + lr=1e-3, + warmup_ratio=0.05, + gradient_accumulation_steps=1, + logging_steps=10, + betas=[0.9, 0.9], + weight_decay=0, + lr_scheduler_type="linear", + ): + """Train function.""" trained_alphas_list = [] for item in self.trained_alphas.items(): trained_alphas_list.append(item[1]) optimizer = torch.optim.Adam(trained_alphas_list, lr=lr, weight_decay=weight_decay, betas=betas) - lr_scheduler = transformers.get_scheduler( # pylint: disable=E1111 - name=lr_scheduler_type, - optimizer=optimizer, - num_warmup_steps=int(train_steps * warmup_ratio) // gradient_accumulation_steps, - num_training_steps=train_steps // gradient_accumulation_steps) + lr_scheduler = transformers.get_scheduler( # pylint: disable=E1111 + name=lr_scheduler_type, + optimizer=optimizer, + num_warmup_steps=int(train_steps * warmup_ratio) // gradient_accumulation_steps, + num_training_steps=train_steps // gradient_accumulation_steps, + ) logger.info("start training") self.model.train() @@ -283,7 +275,7 @@ def train(self, dataloader, train_steps=1000, lr=1e-3, warmup_ratio=0.05, optimizer.zero_grad() lr_scheduler.step() - if global_steps >= train_steps: # pragma: no cover + if global_steps >= train_steps: # pragma: no cover break logger.info("finish training") @@ -292,21 +284,17 @@ def train(self, dataloader, train_steps=1000, lr=1e-3, warmup_ratio=0.05, @torch.no_grad() def quantize(self): - """ - quantization - """ + """quantization.""" for n, m in self.model.named_modules(): - if self.weight_config.get(n) is None: # pragma: no cover + if self.weight_config.get(n) is None: # pragma: no cover logger.info(f"quantize layer {n} not in weight config, skip.") continue num_bits = self.weight_config[n]["bits"] group_size = self.weight_config[n]["group_size"] scheme = self.weight_config[n]["scheme"] - if isinstance(m, torch.nn.Linear): # pragma: no cover - m.weight.data.copy_( - quant_weight(m.weight, num_bits=num_bits, - group_size=group_size, scheme=scheme)) + if isinstance(m, torch.nn.Linear): # pragma: no cover + m.weight.data.copy_(quant_weight(m.weight, num_bits=num_bits, group_size=group_size, scheme=scheme)) def save(self, save_scale_file="", save_state_dict_file=""): """ @@ -314,8 +302,8 @@ def save(self, save_scale_file="", save_state_dict_file=""): :param save_scale_file: save alpha/scale with torch.save :param save_state_dict_file: save model state_dict """ - if save_scale_file: # pragma: no cover + if save_scale_file: # pragma: no cover torch.save(self.trained_alphas, save_scale_file) - if save_state_dict_file: # pragma: no cover + if save_state_dict_file: # pragma: no cover torch.save(self.model.state_dict(), save_state_dict_file) diff --git a/neural_compressor/adaptor/torch_utils/util.py b/neural_compressor/adaptor/torch_utils/util.py index 465b8599a42..ab8887e1cf6 100644 --- a/neural_compressor/adaptor/torch_utils/util.py +++ b/neural_compressor/adaptor/torch_utils/util.py @@ -16,18 +16,21 @@ # limitations under the License. """Util Class and Functions.""" import copy -import re import json -import numpy as np +import re from collections import UserDict -from packaging.version import Version from functools import partial + +import numpy as np +from packaging.version import Version + from ...utils import logger -from ...utils.utility import LazyImport, CpuInfo +from ...utils.utility import CpuInfo, LazyImport tqdm = LazyImport("tqdm") torch = LazyImport("torch") + def get_embedding_contiguous(model): """This is a helper function for nn.Embedding, and it will get input contiguous. @@ -37,6 +40,7 @@ def get_embedding_contiguous(model): Returns: None """ + def contiguous_hook(module, input): embeddings = input[0].contiguous() modified_input = (embeddings, *input[1:]) @@ -44,7 +48,7 @@ def contiguous_hook(module, input): for child in model.modules(): child_type = child.__class__.__name__ - if child_type == 'Embedding': + if child_type == "Embedding": child.register_forward_pre_hook(contiguous_hook) @@ -58,7 +62,7 @@ def is_fused_module(module): (bool): is fused or not """ op_type = str(type(module)) - if 'fused' in op_type: + if "fused" in op_type: return True else: return False @@ -78,16 +82,10 @@ def collate_torch_preds(results): results = zip(*results) collate_results = [] for output in results: - output = [ - batch.numpy() if isinstance(batch, torch.Tensor) else batch - for batch in output - ] + output = [batch.numpy() if isinstance(batch, torch.Tensor) else batch for batch in output] collate_results.append(np.concatenate(output)) elif isinstance(batch, torch.Tensor): - results = [ - batch.numpy() if isinstance(batch, torch.Tensor) else batch - for batch in results - ] + results = [batch.numpy() if isinstance(batch, torch.Tensor) else batch for batch in results] collate_results = np.concatenate(results) return collate_results @@ -122,37 +120,38 @@ def append_attr(fx_model, model, fx_white_list=[]): """ fx_attr = dir(fx_model) org_attr = dir(model) - ignore_match_patterns = [r"_", r"quant", r"dequant", r"weight", - r"bias", r'activation_post_process'] - ignore_search_patterns = [r"_scale_", r"_zero_point_", - r'_activation_post_process_'] + ignore_match_patterns = [r"_", r"quant", r"dequant", r"weight", r"bias", r"activation_post_process"] + ignore_search_patterns = [r"_scale_", r"_zero_point_", r"_activation_post_process_"] add_special_patterns = [r"_forward_hooks", r"_forward_pre_hooks", r"_backward_hooks"] attr_names = [] - if hasattr(fx_model, 'module') and hasattr(fx_model.module, 'weight'): + if hasattr(fx_model, "module") and hasattr(fx_model.module, "weight"): if not isinstance(fx_model.module.weight, torch.Tensor): fx_model.weight = fx_model.module.weight() else: fx_model.weight = fx_model.module.weight for i in org_attr: - if type(model) in fx_white_list and type(model) != torch.nn.Sequential \ - and any([re.search(p, i) for p in add_special_patterns]): + if ( + type(model) in fx_white_list + and type(model) != torch.nn.Sequential + and any([re.search(p, i) for p in add_special_patterns]) + ): continue - if any([re.search(p, i) for p in add_special_patterns]) \ - or (i not in fx_attr \ - and not any([re.match(p, i) for p in ignore_match_patterns]) \ - and not any([re.search(p, i) for p in ignore_search_patterns])) : + if any([re.search(p, i) for p in add_special_patterns]) or ( + i not in fx_attr + and not any([re.match(p, i) for p in ignore_match_patterns]) + and not any([re.search(p, i) for p in ignore_search_patterns]) + ): attr_names.append(i) for name in attr_names: attr = getattr(model, name, None) - if isinstance(attr, torch.nn.Module) or \ - isinstance(attr, torch.quantization.qconfig.QConfig): + if isinstance(attr, torch.nn.Module) or isinstance(attr, torch.quantization.qconfig.QConfig): continue setattr(fx_model, name, attr) return fx_model -def generate_activation_observer(scheme, algorithm): # pragma: no cover +def generate_activation_observer(scheme, algorithm): # pragma: no cover """This is a helper method to generate an activation observer. Args: @@ -163,23 +162,23 @@ def generate_activation_observer(scheme, algorithm): # pragma: no cover An observer. """ kl_activation_observer = { - 'name': 'HistogramObserver', - 'bins': 2048, - 'upsample_rate': 128, - 'dtype': 'torch.quint8', - 'qscheme': 'torch.per_tensor_affine', - 'reduce_range': False, - 'quant_min': 0, - 'quant_max': 255 - } + "name": "HistogramObserver", + "bins": 2048, + "upsample_rate": 128, + "dtype": "torch.quint8", + "qscheme": "torch.per_tensor_affine", + "reduce_range": False, + "quant_min": 0, + "quant_max": 255, + } minmax_activation_observer = { - "name": "MinMaxObserver", - "dtype": "torch.quint8", - "qscheme": "torch.per_tensor_affine", - "reduce_range": False, - "quant_min": 0, - "quant_max": 255 - } + "name": "MinMaxObserver", + "dtype": "torch.quint8", + "qscheme": "torch.per_tensor_affine", + "reduce_range": False, + "quant_min": 0, + "quant_max": 255, + } REDUCE_RANGE = False if CpuInfo().vnni else True if REDUCE_RANGE: minmax_activation_observer["reduce_range"] = REDUCE_RANGE @@ -198,7 +197,8 @@ def generate_activation_observer(scheme, algorithm): # pragma: no cover if algorithm == "minmax": return minmax_activation_observer -def check_cfg_and_qconfig(tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_op_name): # pragma: no cover + +def check_cfg_and_qconfig(tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_op_name): # pragma: no cover """Check configs and quantization configs. Args: @@ -215,56 +215,58 @@ def check_cfg_and_qconfig(tune_cfg, cfgs, op_infos_from_cfgs, output_tensor_ids_ for i, name in enumerate(op_name[0]): # to int8 ipex_op_cfg = op_infos_from_cfgs[name] - input_tensor_infos = ipex_op_cfg['input_tensor_infos'] + input_tensor_infos = ipex_op_cfg["input_tensor_infos"] for index, input_tensor_info in enumerate(input_tensor_infos): - if 'force_dtype' not in input_tensor_info.keys(): + if "force_dtype" not in input_tensor_info.keys(): continue - if input_tensor_info['force_dtype'] == 'torch.qint8' or \ - input_tensor_info['force_dtype'] == 'torch.quint8': + if ( + input_tensor_info["force_dtype"] == "torch.qint8" + or input_tensor_info["force_dtype"] == "torch.quint8" + ): # int8 -> int8 - if inc_op_cfg['weight']['dtype'] == 'int8': - inc_scheme = inc_op_cfg['activation']['scheme'] - inc_algorithm = inc_op_cfg['activation']['algorithm'] - ipex_op_cfg['input_tensor_infos'] = input_tensor_infos - activation_observer = generate_activation_observer(inc_scheme, - inc_algorithm) - if inc_scheme == 'sym': - input_tensor_infos[index]['force_dtype'] = 'torch.qint8' - if inc_scheme == 'asym': - input_tensor_infos[index]['force_dtype'] = 'torch.quint8' - ipex_op_cfg['activation_observer'] = activation_observer + if inc_op_cfg["weight"]["dtype"] == "int8": + inc_scheme = inc_op_cfg["activation"]["scheme"] + inc_algorithm = inc_op_cfg["activation"]["algorithm"] + ipex_op_cfg["input_tensor_infos"] = input_tensor_infos + activation_observer = generate_activation_observer(inc_scheme, inc_algorithm) + if inc_scheme == "sym": + input_tensor_infos[index]["force_dtype"] = "torch.qint8" + if inc_scheme == "asym": + input_tensor_infos[index]["force_dtype"] = "torch.quint8" + ipex_op_cfg["activation_observer"] = activation_observer # int8 -> fp32 else: - input_tensor_infos[index]['force_dtype'] = 'torch.float32' + input_tensor_infos[index]["force_dtype"] = "torch.float32" # modify pre_op output inf_dtype if i == 0: - input_tensor_id = input_tensor_info['id'] - input_tensor_dtype = input_tensor_info['force_dtype'] + input_tensor_id = input_tensor_info["id"] + input_tensor_dtype = input_tensor_info["force_dtype"] if input_tensor_id in output_tensor_ids_op_name.keys(): pre_op_name = output_tensor_ids_op_name[input_tensor_id] pre_op_module = pre_op_name[0][0] pre_op_state = pre_op_name[0][1] pre_op_index = pre_op_name[0][2] pre_op_infos = cfgs[pre_op_module][pre_op_state][pre_op_index] - pre_op_output_infos = pre_op_infos['output_tensor_infos'] + pre_op_output_infos = pre_op_infos["output_tensor_infos"] for index, pre_op_output in enumerate(pre_op_output_infos): - if pre_op_output['id'] == input_tensor_id: - pre_op_output_infos[index]['inf_dtype'] = input_tensor_dtype + if pre_op_output["id"] == input_tensor_id: + pre_op_output_infos[index]["inf_dtype"] = input_tensor_dtype else: pass - pre_op_infos['output_tensor_infos'] = pre_op_output_infos + pre_op_infos["output_tensor_infos"] = pre_op_output_infos cfgs[pre_op_module][pre_op_state][pre_op_index] = pre_op_infos else: pass cfgs[name[0]][name[1]][name[2]] = ipex_op_cfg return cfgs -def paser_cfgs(cfgs): # pragma: no cover + +def paser_cfgs(cfgs): # pragma: no cover """Parse configs. Args: cfgs (dict): the input configs. - + Returns: ops_name (list): list of op names. @@ -276,7 +278,7 @@ def paser_cfgs(cfgs): # pragma: no cover layer_output_infos_ids = [] op_infos_from_cfgs = {} # record input_tensor_id and op_name - #{"0": [(" ", "q_op_infos", "0"), (" ", "q_op_infos", "1")]} + # {"0": [(" ", "q_op_infos", "0"), (" ", "q_op_infos", "1")]} input_tensor_ids_op_name = {} output_tensor_ids_op_name = {} for module_key in cfgs.keys(): @@ -285,7 +287,7 @@ def paser_cfgs(cfgs): # pragma: no cover for index, op_info in enumerate(cfgs[module_key][state]): name = (module_key, state, index) ops_name.append(name) - layer_output_infos_ids.append(op_info['id']) + layer_output_infos_ids.append(op_info["id"]) op_infos_from_cfgs[name] = op_info continue for op_cfg_id in cfgs[module_key][state].keys(): @@ -294,32 +296,32 @@ def paser_cfgs(cfgs): # pragma: no cover if name not in ops_name: ops_name.append(name) else: - assert False, \ - "Please check IPEX int8 configure json whether have the same name ops" + assert False, "Please check IPEX int8 configure json whether have the same name ops" op_infos_from_cfgs[name] = op_info - input_tensors = op_info['input_tensor_infos'] + input_tensors = op_info["input_tensor_infos"] for input_tensor in input_tensors: - if 'id' not in input_tensor.keys(): + if "id" not in input_tensor.keys(): continue else: - input_tensor_id = input_tensor['id'] + input_tensor_id = input_tensor["id"] if input_tensor_id not in input_tensor_ids_op_name.keys(): input_tensor_ids_op_name[input_tensor_id] = [name] else: input_tensor_ids_op_name[input_tensor_id].append(name) - output_tensors = op_info['output_tensor_infos'] + output_tensors = op_info["output_tensor_infos"] for output_tensor in output_tensors: - if 'id' not in output_tensor.keys(): + if "id" not in output_tensor.keys(): continue else: - output_tensor_id = output_tensor['id'] + output_tensor_id = output_tensor["id"] if output_tensor_id not in output_tensor_ids_op_name.keys(): output_tensor_ids_op_name[output_tensor_id] = [name] else: output_tensor_ids_op_name[output_tensor_id].append(name) return ops_name, op_infos_from_cfgs, input_tensor_ids_op_name, output_tensor_ids_op_name -def get_quantizable_ops_from_cfgs(ops_name, op_infos_from_cfgs, input_tensor_ids_op_name): # pragma: no cover + +def get_quantizable_ops_from_cfgs(ops_name, op_infos_from_cfgs, input_tensor_ids_op_name): # pragma: no cover """Get quantizable ops from configs, combine fused ops as one op. Args: @@ -336,41 +338,40 @@ def get_quantizable_ops_from_cfgs(ops_name, op_infos_from_cfgs, input_tensor_ids start = True if name in seen_ops: continue - elif name[1] not in ['q_op_infos']: + elif name[1] not in ["q_op_infos"]: continue else: # judge fuse ops the first op op_info = op_infos_from_cfgs[name] - output_tensors = op_info['output_tensor_infos'] - input_tensors = op_info['input_tensor_infos'] + output_tensors = op_info["output_tensor_infos"] + input_tensors = op_info["input_tensor_infos"] for input_tensor in input_tensors: - if 'inf_dtype' not in input_tensor.keys(): + if "inf_dtype" not in input_tensor.keys(): continue - if input_tensor['inf_dtype'] == torch.float32: + if input_tensor["inf_dtype"] == torch.float32: pre_op_name = input_tensor_ids_op_name[input_tensor["id"]] - if pre_op_name[1] in ['q_op_infos']: + if pre_op_name[1] in ["q_op_infos"]: print(pre_op_name, "is not the fuse ops first op.") start = False continue if not start: continue # add quantizable ops, include op and fuse ops. - q_ops, stack = [],[(name,[])] + q_ops, stack = [], [(name, [])] while stack: cur_name, cur = stack.pop() seen_ops.append(cur_name) - if cur_name[1] not in ['q_op_infos']: + if cur_name[1] not in ["q_op_infos"]: q_ops.append(cur) break op_info = op_infos_from_cfgs[cur_name] - output_tensors = op_info['output_tensor_infos'] + output_tensors = op_info["output_tensor_infos"] for output_tensor in output_tensors: - if output_tensor['inf_dtype'] == 'torch.qint8' or \ - output_tensor['inf_dtype'] == 'torch.quint8': + if output_tensor["inf_dtype"] == "torch.qint8" or output_tensor["inf_dtype"] == "torch.quint8": q_ops.append(cur + [cur_name]) break try: - next_op_names = input_tensor_ids_op_name[output_tensor['id']] + next_op_names = input_tensor_ids_op_name[output_tensor["id"]] for next_op_name in next_op_names: stack.append((next_op_name, cur + [cur_name])) except: @@ -381,38 +382,36 @@ def get_quantizable_ops_from_cfgs(ops_name, op_infos_from_cfgs, input_tensor_ids quantizable_ops.append(q_op) return quantizable_ops + def update_sq_scale(ipex_config_path, smoothquant_scale_info): - """update ipex_config.json with smoothquant scale info generated by our algorithm. + """Update ipex_config.json with smoothquant scale info generated by our algorithm. Args: ipex_config_path (str): a path to temporary ipex_config.json file. smoothquant_scale_info (dict): a dict contains smoothquant scale info. """ - with open(ipex_config_path, 'r') as f: + with open(ipex_config_path, "r") as f: ipex_config = json.load(f) for module_name, v in ipex_config.items(): - if 'q_op_infos' in v and v['q_op_infos']: - for op_num, v1 in v['q_op_infos'].items(): + if "q_op_infos" in v and v["q_op_infos"]: + for op_num, v1 in v["q_op_infos"].items(): # update alpha data instead of updating weight scale - op_name = v1['fqn'] # fqn always exists even it's empty. + op_name = v1["fqn"] # fqn always exists even it's empty. if op_name in smoothquant_scale_info: - input_scale_for_mul = \ - smoothquant_scale_info[op_name]['input_scale_for_mul'].tolist() - input_scale_after_mul = \ - smoothquant_scale_info[op_name]['input_scale_after_mul'].tolist() - input_zero_point_after_mul = \ - smoothquant_scale_info[op_name]['input_zero_point_after_mul'].tolist() - weight_scale_for_mul = \ - (1 / smoothquant_scale_info[op_name]['input_scale_for_mul']).tolist() - weight_scale_after_mul = \ - smoothquant_scale_info[op_name]['weight_scale_after_mul'].tolist() - v1['input_tensor_infos'][0]['smooth_quant_scaling_factor'] = input_scale_for_mul - v1['input_tensor_infos'][0]['scale'] = input_scale_after_mul - v1['input_tensor_infos'][0]['zero_point'] = input_zero_point_after_mul - v1['weight_tensor_infos'][0]['smooth_quant_scaling_factor'] = weight_scale_for_mul - v1['weight_tensor_infos'][0]['scale'] = weight_scale_after_mul + input_scale_for_mul = smoothquant_scale_info[op_name]["input_scale_for_mul"].tolist() + input_scale_after_mul = smoothquant_scale_info[op_name]["input_scale_after_mul"].tolist() + input_zero_point_after_mul = smoothquant_scale_info[op_name][ + "input_zero_point_after_mul" + ].tolist() + weight_scale_for_mul = (1 / smoothquant_scale_info[op_name]["input_scale_for_mul"]).tolist() + weight_scale_after_mul = smoothquant_scale_info[op_name]["weight_scale_after_mul"].tolist() + v1["input_tensor_infos"][0]["smooth_quant_scaling_factor"] = input_scale_for_mul + v1["input_tensor_infos"][0]["scale"] = input_scale_after_mul + v1["input_tensor_infos"][0]["zero_point"] = input_zero_point_after_mul + v1["weight_tensor_infos"][0]["smooth_quant_scaling_factor"] = weight_scale_for_mul + v1["weight_tensor_infos"][0]["scale"] = weight_scale_after_mul # # observers were overridden by the fallback step, setting it back. - v1['activation_observer'] = { + v1["activation_observer"] = { "name": "SmoothQuantActivationObserver", "smooth_quant_enabled": True, "dtype": "torch.quint8", @@ -420,7 +419,7 @@ def update_sq_scale(ipex_config_path, smoothquant_scale_info): "reduce_range": False, "quant_min": 0, "quant_max": 255, - "alpha": smoothquant_scale_info[op_name]['alpha'], + "alpha": smoothquant_scale_info[op_name]["alpha"], "act_observer": { "name": "HistogramObserver", "bins": 2048, @@ -429,7 +428,7 @@ def update_sq_scale(ipex_config_path, smoothquant_scale_info): "qscheme": "torch.per_tensor_affine", "reduce_range": False, "quant_min": 0, - "quant_max": 255 + "quant_max": 255, }, "act_ic_observer": { "name": "PerChannelMinMaxObserver", @@ -438,10 +437,10 @@ def update_sq_scale(ipex_config_path, smoothquant_scale_info): "qscheme": "torch.per_channel_affine", "reduce_range": False, "quant_min": 0, - "quant_max": 255 - } + "quant_max": 255, + }, } - v1['weight_observer'] = { + v1["weight_observer"] = { "name": "SmoothQuantWeightObserver", "smooth_quant_enabled": True, "dtype": "torch.qint8", @@ -449,7 +448,7 @@ def update_sq_scale(ipex_config_path, smoothquant_scale_info): "reduce_range": False, "quant_min": -128, "quant_max": 127, - "alpha": smoothquant_scale_info[op_name]['alpha'], + "alpha": smoothquant_scale_info[op_name]["alpha"], "wei_observer": { "name": "PerChannelMinMaxObserver", "ch_axis": 0, @@ -457,7 +456,7 @@ def update_sq_scale(ipex_config_path, smoothquant_scale_info): "qscheme": "torch.per_channel_symmetric", "reduce_range": False, "quant_min": -128, - "quant_max": 127 + "quant_max": 127, }, "wei_ic_observer": { "name": "PerChannelMinMaxObserver", @@ -466,15 +465,16 @@ def update_sq_scale(ipex_config_path, smoothquant_scale_info): "qscheme": "torch.per_channel_affine", "reduce_range": False, "quant_min": -128, - "quant_max": 127 - } + "quant_max": 127, + }, } f.close() # overwrite ipex_config_path - with open(ipex_config_path, 'w') as f1: - json.dump(ipex_config, f1, indent = 4) + with open(ipex_config_path, "w") as f1: + json.dump(ipex_config, f1, indent=4) f1.close() + def auto_copy(module): # pragma: no cover """Get an IPEX prepared model and return a fp32 model. @@ -485,35 +485,42 @@ def auto_copy(module): # pragma: no cover fp32 model. """ from intel_extension_for_pytorch.quantization._quantization_state import AutoQuantizationStateModuleDict + def _nn_sequential_patched_forward(cls, x): for module in cls: if not isinstance(module, AutoQuantizationStateModuleDict): x = module(x) return x + new_module = copy.deepcopy(module) - if hasattr(new_module, '_qconf_summary'): + if hasattr(new_module, "_qconf_summary"): del new_module._qconf_summary - if hasattr(new_module, '_fqn_to_auto_quant_state_map'): + if hasattr(new_module, "_fqn_to_auto_quant_state_map"): del new_module._fqn_to_auto_quant_state_map - if hasattr(new_module, 'q_config'): + if hasattr(new_module, "q_config"): del new_module.q_config + def convert_to_dispatch_proxy(x): if isinstance(x, torch.Tensor): return x.as_subclass(CopyTensorProxy) # type: ignore[arg-type] else: return x + global_disable_torch_function_override = False + class CopyTensorProxy(torch.Tensor): @classmethod def __torch_function__(cls, func, types, args=(), kwargs=None): nonlocal global_disable_torch_function_override if ( # global override means disable the override here - global_disable_torch_function_override or + global_disable_torch_function_override + or # to prevent printing things from going into an infinite loop - func == torch.Tensor.__repr__ or + func == torch.Tensor.__repr__ + or # we don't need to override getters in this framework - func.__name__ == '__get__' + func.__name__ == "__get__" ): return super().__torch_function__(func, types, args, kwargs) kwargs = kwargs if kwargs else {} @@ -525,17 +532,21 @@ def __torch_function__(cls, func, types, args=(), kwargs=None): ) assert output is not NotImplemented return output + def __repr__(self): - return f'CopyTensorProxy({super().__repr__()})' + return f"CopyTensorProxy({super().__repr__()})" + cur_module = None - module_stack : List[torch.nn.Module] = [] # pylint: disable=E0602 # noqa: F821 + module_stack: List[torch.nn.Module] = [] # pylint: disable=E0602 # noqa: F821 assert len(module.__class__.__bases__) == 1 + class CopyDispatchModule(module.__class__.__bases__[0]): def __call__(self, *args, **kwargs): new_args = torch.fx.node.map_aggregate(args, convert_to_dispatch_proxy) new_kwargs = torch.fx.node.map_aggregate(kwargs, convert_to_dispatch_proxy) orig_module_call = torch.nn.Module.__call__ orig_nn_sequential_forward = torch.nn.Sequential.forward + def _patched_module_call(self, *args, **kwargs): nonlocal cur_module old_module = cur_module @@ -549,22 +560,27 @@ def _patched_module_call(self, *args, **kwargs): finally: module_stack.pop() cur_module = old_module + torch.nn.Module.__call__ = _patched_module_call torch.nn.Sequential.forward = _nn_sequential_patched_forward # type: ignore[assignment] try: output = super().__call__(*new_args, **new_kwargs) + def unwrap_proxy(a): if isinstance(a, CopyTensorProxy): a.__class__ = torch.Tensor # type: ignore[assignment] return a + output = torch.fx.node.map_aggregate(output, unwrap_proxy) return output finally: torch.nn.Module.__call__ = orig_module_call torch.nn.Sequential.forward = orig_nn_sequential_forward # type: ignore[assignment] + new_module.__class__ = CopyDispatchModule return new_module + def fetch_module(model, op_name): """Get module with a given op name. @@ -576,7 +592,7 @@ def fetch_module(model, op_name): module (object). """ module = model - name_list = op_name.split('.') + name_list = op_name.split(".") for name in name_list: if hasattr(module, name): module = getattr(module, name) @@ -584,6 +600,7 @@ def fetch_module(model, op_name): module = module return module + def set_module(model, op_name, new_module): """Set module with a given op name. @@ -596,7 +613,7 @@ def set_module(model, op_name, new_module): module (object). """ module = model - name_list = op_name.split('.') + name_list = op_name.split(".") for name in name_list[:-1]: if hasattr(module, name): module = getattr(module, name) @@ -605,6 +622,7 @@ def set_module(model, op_name, new_module): setattr(module, name_list[-1], new_module) return module + def simple_inference(model, input): """Record model output tensor. @@ -627,6 +645,7 @@ def simple_inference(model, input): output = model(input) return output + def get_example_input(dataloader, i=1): """Get the example input. @@ -652,8 +671,9 @@ def get_example_input(dataloader, i=1): return example_inp -def get_fallback_order(adaptor, fp32_model, dataloader, tune_cfg, - confidence_batches, fallback=False, requantize_cfgs=None): +def get_fallback_order( + adaptor, fp32_model, dataloader, tune_cfg, confidence_batches, fallback=False, requantize_cfgs=None +): """Get the fall back order for strategy. Args: @@ -681,7 +701,10 @@ def get_fallback_order(adaptor, fp32_model, dataloader, tune_cfg, order_dict[name] = order_dict.get(name, 0) + len(order_dict) - i return ordered_ops + op_cfg_mapping = {} + + def get_mse_order_per_fp32(adaptor, model, example_inp, tune_cfg): """This is a helper method to check the mse influence to last module after QDQ(quant/dequant). @@ -694,20 +717,22 @@ def get_mse_order_per_fp32(adaptor, model, example_inp, tune_cfg): fallback_order (dict/list): The fallback order for strategy. """ inner_output = None + def output_hook(self, input, output): nonlocal inner_output inner_output = output return output op_type_dict = {} - for k, v in tune_cfg['op'].keys(): + for k, v in tune_cfg["op"].keys(): op_type_dict[k] = v - from ..pytorch import _cfg_to_qconfig, _cfgs_to_fx_cfgs, PyTorch_FXAdaptor + from ..pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _cfgs_to_fx_cfgs + op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg["approach"]) # insert hook to get output tesnor from last module last_module_name = list(op_cfgs.keys())[-1] - module = fetch_module(model, last_module_name) # get last module + module = fetch_module(model, last_module_name) # get last module module.register_forward_hook(output_hook) # record fp32 model output tensor at first output_fp32 = simple_inference(model, example_inp) @@ -715,7 +740,7 @@ def output_hook(self, input, output): fx_op_cfgs = {} fallback_order = {} - logger.info('Evaluate the sensitivity for each int8 operation') + logger.info("Evaluate the sensitivity for each int8 operation") for op_name, qconfig in tqdm(op_cfgs.items()): if op_name == "bf16_ops_list": continue @@ -729,29 +754,30 @@ def output_hook(self, input, output): op_cfgs[op_name] = None fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg["approach"]) op_cfgs[op_name] = qconfig - from torch.quantization.quantize_fx import prepare_fx,convert_fx + from torch.quantization.quantize_fx import convert_fx, prepare_fx + # do quantization if adaptor.sub_module_list is None: if adaptor.version.release >= Version("1.13.0").release: # pragma: no cover tmp_model = prepare_fx(tmp_model, fx_op_cfgs, example_inp) else: - tmp_model = prepare_fx(tmp_model, fx_op_cfgs,) + tmp_model = prepare_fx( + tmp_model, + fx_op_cfgs, + ) else: - PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, \ - tmp_model, prefix='') + PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, tmp_model, prefix="") simple_inference(tmp_model, example_inp) if adaptor.sub_module_list is None: tmp_model = convert_fx(tmp_model) else: - PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, \ - tmp_model, prefix='') + PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, tmp_model, prefix="") # insert hook to get output tesnor from last module - module = fetch_module(tmp_model, list(op_cfgs.keys())[-1]) # get last module + module = fetch_module(tmp_model, list(op_cfgs.keys())[-1]) # get last module module.register_forward_hook(output_hook) output_qdq = simple_inference(tmp_model, example_inp) - inner_output_int8 = inner_output.dequantize() if \ - inner_output.dtype == torch.quint8 else inner_output + inner_output_int8 = inner_output.dequantize() if inner_output.dtype == torch.quint8 else inner_output mse_val = (inner_output_fp32 - inner_output_int8).pow(2).sum() fallback_order[(op_name, op_type_dict[op_name])] = mse_val @@ -769,52 +795,53 @@ def output_hook(self, input, output): for op_name in ordered_ops: if min_mse <= fallback_order[op_name] <= (max_mse - min_mse) * 0.1 + min_mse: double_check_list.append(op_name) - - check_num = min(len(ordered_ops)//10 + 1, 5) + + check_num = min(len(ordered_ops) // 10 + 1, 5) double_check_list = ordered_ops[:check_num] logger.debug(f"double check list: {double_check_list}") worst_op_name = ordered_ops[-1] - op_cfgs[worst_op_name[0]] = None # fallback worst module first + op_cfgs[worst_op_name[0]] = None # fallback worst module first new_fallback_order = {} - logger.info('Evaluate the sensitivity gradient for selected operations') + logger.info("Evaluate the sensitivity gradient for selected operations") for op_name, op_type in tqdm(double_check_list): tmp_model = copy.deepcopy(model) qconfig = op_cfgs[op_name] op_cfgs[op_name] = None fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg["approach"]) op_cfgs[op_name] = qconfig - from torch.quantization.quantize_fx import prepare_fx,convert_fx + from torch.quantization.quantize_fx import convert_fx, prepare_fx + # do quantization if adaptor.sub_module_list is None: if adaptor.version.release >= Version("1.13.0").release: # pragma: no cover tmp_model = prepare_fx(tmp_model, fx_op_cfgs, example_inp) else: - tmp_model = prepare_fx(tmp_model, fx_op_cfgs,) + tmp_model = prepare_fx( + tmp_model, + fx_op_cfgs, + ) else: - PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, \ - tmp_model, prefix='') + PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, tmp_model, prefix="") simple_inference(tmp_model, example_inp) if adaptor.sub_module_list is None: tmp_model = convert_fx(tmp_model) else: - PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, \ - tmp_model, prefix='') + PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, tmp_model, prefix="") # insert hook to get output tesnor from last module - module = fetch_module(tmp_model, last_module_name) # get last module + module = fetch_module(tmp_model, last_module_name) # get last module module.register_forward_hook(output_hook) output_qdq = simple_inference(tmp_model, example_inp) - inner_output_int8 = inner_output.dequantize() if \ - inner_output.dtype == torch.quint8 else inner_output + inner_output_int8 = inner_output.dequantize() if inner_output.dtype == torch.quint8 else inner_output mse_val = (inner_output_fp32 - inner_output_int8).pow(2).sum() new_fallback_order[(op_name, op_type_dict[op_name])] = mse_val - ordered_ops = sorted(new_fallback_order.keys(), key=lambda key: new_fallback_order[key], \ - reverse=False) + ordered_ops = sorted(new_fallback_order.keys(), key=lambda key: new_fallback_order[key], reverse=False) return ordered_ops + def get_mse_order_per_int8(adaptor, fp32_model, example_input, tune_cfg): """This is a helper method to check the mse influence to last module after QDQ(quant/dequant). @@ -822,25 +849,27 @@ def get_mse_order_per_int8(adaptor, fp32_model, example_input, tune_cfg): model (torch.fx.GraphModule/torch.nn.Module): A torch model. example_inp (object): example inputs. tune_cfg (dict): dictionary of quantization configuration. - + Returns: fallback_order (dict/list): The fallback order for strategy. """ inner_output = None + def output_hook(self, input, output): nonlocal inner_output inner_output = output return output op_type_dict = {} - for k, v in tune_cfg['op'].keys(): + for k, v in tune_cfg["op"].keys(): op_type_dict[k] = v example_inp = example_input from ..pytorch import _cfg_to_qconfig + op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg["approach"]) - module = fetch_module(fp32_model, list(op_cfgs.keys())[-1]) # get last module + module = fetch_module(fp32_model, list(op_cfgs.keys())[-1]) # get last module # insert hook to get output tesnor from last module module.register_forward_hook(output_hook) # record fp32 model output tensor at first @@ -848,39 +877,41 @@ def output_hook(self, input, output): inner_output_fp32 = inner_output quant_list = [] - for k, v in tune_cfg['op'].items(): - if k[1] in ['LayerNorm', 'Dropout', 'InstanceNorm3d']: + for k, v in tune_cfg["op"].items(): + if k[1] in ["LayerNorm", "Dropout", "InstanceNorm3d"]: continue - if v['weight']['dtype'] == 'fp32': + if v["weight"]["dtype"] == "fp32": quant_list.append(k) fallback_order = {} - logger.info('Evaluate the sensitivity for each fp32 operation') + logger.info("Evaluate the sensitivity for each fp32 operation") for op_name, op_type in tqdm(quant_list): if op_name in op_cfg_mapping: tmp_model = copy.deepcopy(fp32_model) - from ..pytorch import _cfg_to_qconfig, _cfgs_to_fx_cfgs, PyTorch_FXAdaptor + from ..pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _cfgs_to_fx_cfgs + op_cfgs[op_name] = op_cfg_mapping[op_name] fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg["approach"]) - from torch.quantization.quantize_fx import prepare_fx,convert_fx + from torch.quantization.quantize_fx import convert_fx, prepare_fx + # do quantization if adaptor.sub_module_list is None: if adaptor.version.release >= Version("1.13.0").release: # pragma: no cover tmp_model = prepare_fx(tmp_model, fx_op_cfgs, example_inp) else: - tmp_model = prepare_fx(tmp_model, fx_op_cfgs,) + tmp_model = prepare_fx( + tmp_model, + fx_op_cfgs, + ) else: - PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, \ - tmp_model, prefix='') + PyTorch_FXAdaptor.prepare_sub_graph(adaptor.sub_module_list, fx_op_cfgs, tmp_model, prefix="") simple_inference(tmp_model, example_inp) if adaptor.sub_module_list is None: tmp_model = convert_fx(tmp_model) else: - PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, \ - tmp_model, prefix='') - + PyTorch_FXAdaptor.convert_sub_graph(adaptor.sub_module_list, tmp_model, prefix="") # record int8 model output tensor - module = fetch_module(tmp_model, list(op_cfgs.keys())[-1]) # get last module + module = fetch_module(tmp_model, list(op_cfgs.keys())[-1]) # get last module module.register_forward_hook(output_hook) output_qdq = simple_inference(tmp_model, example_inp) inner_output_int8 = inner_output @@ -892,17 +923,18 @@ def output_hook(self, input, output): mse_val = (inner_output_fp32 - inner_output_int8).pow(2).sum() fallback_order[(op_name, op_type_dict[op_name])] = mse_val # re-insert fp32 module into model - ordered_ops = sorted(fallback_order.keys(), key=lambda key: fallback_order[key], \ - reverse=False) + ordered_ops = sorted(fallback_order.keys(), key=lambda key: fallback_order[key], reverse=False) return ordered_ops + def get_torch_version(): """Get torch version.""" from packaging.version import Version + try: - torch_version = torch.__version__.split('+')[0] + torch_version = torch.__version__.split("+")[0] except ValueError as e: # pragma: no cover - assert False, 'Got an unknown version of torch: {}'.format(e) + assert False, "Got an unknown version of torch: {}".format(e) version = Version(torch_version) return version @@ -910,36 +942,41 @@ def get_torch_version(): def match_datatype_pattern(datatype, pattern=None): """Check the datatype pattern.""" import re + if not pattern: pattern = r"(uint|int)([1-8])" match = re.match(pattern, datatype) return match - + + def _get_signed_and_bits(datatype): """Parse sign and bits from datatype.""" - unsigned = datatype[0] == 'u' + unsigned = datatype[0] == "u" if unsigned: num_bits = int(datatype[4:]) else: num_bits = int(datatype[3:]) return unsigned, num_bits + def calculate_quant_min_max(unsigned, num_bits): """Calculate the qmin and qmax according to the datatype.""" # TODO handle reduce range quant_min, quant_max = None, None if unsigned: - quant_min, quant_max =0.0 , 2.0**(num_bits) - 1.0 + quant_min, quant_max = 0.0, 2.0 ** (num_bits) - 1.0 else: - quant_min, quant_max = -1 * 2.0**(num_bits - 1), 2.0**(num_bits - 1) - 1 + quant_min, quant_max = -1 * 2.0 ** (num_bits - 1), 2.0 ** (num_bits - 1) - 1 return quant_min, quant_max + def get_depth(d) -> int: """Query the depth of the dict.""" if isinstance(d, dict): return 1 + max(get_depth(v) for v in d.values()) return 0 + def get_dict_at_depth(d, target_depth, result, depth=0): """Get all sub-dicts that are at a specified depth in a nested dict.""" if depth == target_depth: @@ -947,7 +984,8 @@ def get_dict_at_depth(d, target_depth, result, depth=0): return elif depth < target_depth and isinstance(d, dict): for k, v in d.items(): - get_dict_at_depth(v, target_depth, result, depth=depth+1) + get_dict_at_depth(v, target_depth, result, depth=depth + 1) + def get_element_under_depth(d, ops_lst): """Get all values in a nested dict.""" @@ -957,6 +995,7 @@ def get_element_under_depth(d, ops_lst): else: ops_lst.append(d) + def get_op_type_by_name(op_name, quantizable_ops): """Get op type by op name.""" for pair in quantizable_ops: @@ -964,8 +1003,9 @@ def get_op_type_by_name(op_name, quantizable_ops): return pair[1] return None + def collect_weight_info(model, q_config): - """collect weight info from q_config for dumping into qconfig.json + """Collect weight info from q_config for dumping into qconfig.json. qconfig.json example: ``` @@ -984,36 +1024,38 @@ def collect_weight_info(model, q_config): q_config (_type_): quantization configue """ weight_info = {} - from neural_compressor.utils.logger import level, DEBUG - for op, config in q_config['op'].items(): + from neural_compressor.utils.logger import DEBUG, level + + for op, config in q_config["op"].items(): op_name, op_type = op - if config['weight']['dtype'] == 'fp32': - weight_info[op_name] = {'dtype': 'fp32'} + if config["weight"]["dtype"] == "fp32": + weight_info[op_name] = {"dtype": "fp32"} else: # fetch module type for MulLinear module = fetch_module(model, op_name) if level == DEBUG: weight_info[op_name] = { - 'dtype': config['weight']['dtype'], - 'bits': config['weight']['bits'], - 'group_size': config['weight']['group_size'], - 'scheme': config['weight']['scheme'], - 'module_type': str(type(module)).split('\'')[1], - 'algorithm': config['weight']['algorithm'] + "dtype": config["weight"]["dtype"], + "bits": config["weight"]["bits"], + "group_size": config["weight"]["group_size"], + "scheme": config["weight"]["scheme"], + "module_type": str(type(module)).split("'")[1], + "algorithm": config["weight"]["algorithm"], } else: weight_info[op_name] = { - 'dtype': config['weight']['dtype'], - 'bits': config['weight']['bits'], - 'group_size': config['weight']['group_size'], - 'scheme': config['weight']['scheme'], - 'module_type': str(type(module)).split('\'')[1], + "dtype": config["weight"]["dtype"], + "bits": config["weight"]["bits"], + "group_size": config["weight"]["group_size"], + "scheme": config["weight"]["scheme"], + "module_type": str(type(module)).split("'")[1], } return weight_info -def get_module_input_output(model, module_hook_config={}, dataloader=None, iters=-1, - calib_func=None, input_func=None, output_func=None): +def get_module_input_output( + model, module_hook_config={}, dataloader=None, iters=-1, calib_func=None, input_func=None, output_func=None +): """A help function to get input and output tensor of modules in module_name_list. Args: @@ -1031,40 +1073,43 @@ def get_module_input_output(model, module_hook_config={}, dataloader=None, iters output_func: preprocess output for less memory usage Returns: - total_values: recorded input_values, output_values. - for example: - {'fc1': + total_values: recorded input_values, output_values. + for example: + {'fc1': {'input': [], 'output': []}, } - """ from collections import defaultdict + total_values = defaultdict(defaultdict) + def _save_input_output_hook(name, record_input=False, record_output=False): """ A forward hook to save input and output values of a module param name: the module name return: A hook function """ + def _hook(module, inputs, outputs): if record_input: input = inputs[0] if input_func is not None: input = input_func(input) - if name in total_values and 'input' in total_values[name]: - total_values[name]['input'].append(input) + if name in total_values and "input" in total_values[name]: + total_values[name]["input"].append(input) else: - total_values[name]['input'] = [input] + total_values[name]["input"] = [input] if record_output: output = outputs[0] if isinstance(outputs, tuple) else outputs if output_func is not None: output = output_func(output) if input_func is not None: input = input_func(input) - if name in total_values and 'output' in total_values[name]: - total_values[name]['output'].append(output) + if name in total_values and "output" in total_values[name]: + total_values[name]["output"].append(output) else: - total_values[name]['output'] = [output] + total_values[name]["output"] = [output] + return _hook hook_list = [] @@ -1073,24 +1118,24 @@ def _hook(module, inputs, outputs): require_list = module_hook_config[name] logger.debug(f"required hooks {name}: {require_list}") _hook = _save_input_output_hook( - name, - record_input='input' in require_list, - record_output='output' in require_list, + name, + record_input="input" in require_list, + record_output="output" in require_list, ) require_list = module_hook_config[name] - hook_list.append( - module.register_forward_hook(_hook)) + hook_list.append(module.register_forward_hook(_hook)) if calib_func: calib_func(model) else: from .smooth_quant import model_forward + model_forward(model, dataloader, iters, device=next(model.parameters()).device) for h in hook_list: h.remove() return total_values -def get_absorb_layers(model, example_inputs, supported_layers=['Linear'], folding=False): +def get_absorb_layers(model, example_inputs, supported_layers=["Linear"], folding=False): """Get absorb_to_layer and no_absorb_layer. Args: @@ -1105,18 +1150,17 @@ def get_absorb_layers(model, example_inputs, supported_layers=['Linear'], foldin """ # get modules that can be absorbed. from .smooth_quant import GraphTrace + tg = GraphTrace() - absorb_to_layer, no_absorb_layers = tg.get_absorb_to_layer( - model, example_inputs, supported_layers - ) + absorb_to_layer, no_absorb_layers = tg.get_absorb_to_layer(model, example_inputs, supported_layers) if absorb_to_layer is None or absorb_to_layer == {}: absorb_to_layer = {} - logger.warning('No absorb layer is detected.') + logger.warning("No absorb layer is detected.") # if no_absorb_layers is None, jit trace failed. # collect all linears for next step if no_absorb_layers is None: no_absorb_layers = [] - op_types = ['Linear'] + op_types = ["Linear"] for name, module in model.named_modules(): for op_type in op_types: if op_type == str(module.__class__.__name__): @@ -1125,7 +1169,7 @@ def get_absorb_layers(model, example_inputs, supported_layers=['Linear'], foldin def get_block_prefix(model): - """get prefix and number of blockes + """Get prefix and number of blockes. Args: model (torch.nn.Module): input model @@ -1134,7 +1178,7 @@ def get_block_prefix(model): block_prefix(str): block_list name in model block_num(int): number of block in block_list """ - module_types=[torch.nn.ModuleList] + module_types = [torch.nn.ModuleList] for n, m in model.named_modules(): if type(m) in module_types: block_prefix = n @@ -1146,7 +1190,7 @@ def get_block_prefix(model): def calibration(model, dataloader=None, n_samples=128, calib_func=None): - """ Calibration with dataloader or calib_func + """Calibration with dataloader or calib_func. Args: model (torch.nn.Module): input model @@ -1159,18 +1203,24 @@ def calibration(model, dataloader=None, n_samples=128, calib_func=None): calib_func(model) else: import math + from .smooth_quant import model_forward + batch_size = dataloader.batch_size iters = int(math.ceil(n_samples / batch_size)) if n_samples % batch_size != 0: - logger.info("calibration samples increase from {} to {} due to batch_size is {}".format( - n_samples, iters*batch_size, batch_size, - )) + logger.info( + "calibration samples increase from {} to {} due to batch_size is {}".format( + n_samples, + iters * batch_size, + batch_size, + ) + ) model_forward(model, dataloader, iters, next(model.parameters()).device) def get_hidden_states(model, dataloader=None, n_samples=128, calib_func=None): - """get the input args and kwargs of first block. + """Get the input args and kwargs of first block. Args: model (torch.nn.Module): input model @@ -1188,6 +1238,7 @@ def get_hidden_states(model, dataloader=None, n_samples=128, calib_func=None): # Step 1: replace block_forward to collect block inputs and avoid entire inference total_block_args = [] total_block_kwargs = [] + def forward(layer, *args, **kwargs): # update total_hidden_states, total_block_kwargs, per batch total_block_args.append(list(args)) @@ -1202,12 +1253,14 @@ def forward(layer, *args, **kwargs): # Step 2: replace model_forward to avoid ValueError model_forward_cache = model.forward + def model_forward(model, *args, **kwargs): nonlocal model_forward_cache try: model_forward_cache(*args, **kwargs) except ValueError: pass + model.forward = partial(model_forward, model) # Step 3: execute calibration diff --git a/neural_compressor/adaptor/torch_utils/weight_only.py b/neural_compressor/adaptor/torch_utils/weight_only.py index 1974e629e61..58d7d7749e4 100644 --- a/neural_compressor/adaptor/torch_utils/weight_only.py +++ b/neural_compressor/adaptor/torch_utils/weight_only.py @@ -18,21 +18,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -from copy import deepcopy import math +from copy import deepcopy from typing import OrderedDict -from .util import set_module + from ...utils import logger from ...utils.utility import LazyImport +from .util import set_module tqdm = LazyImport("tqdm") torch = LazyImport("torch") -NF4 = [-1.0, -0.6961928009986877, -0.5250730514526367, -0.39491748809814453, -0.28444138169288635, - -0.18477343022823334, -0.09105003625154495, 0.0, 0.07958029955625534, 0.16093020141124725, - 0.24611230194568634, 0.33791524171829224, 0.44070982933044434, 0.5626170039176941, - 0.7229568362236023, 1.0] +NF4 = [ + -1.0, + -0.6961928009986877, + -0.5250730514526367, + -0.39491748809814453, + -0.28444138169288635, + -0.18477343022823334, + -0.09105003625154495, + 0.0, + 0.07958029955625534, + 0.16093020141124725, + 0.24611230194568634, + 0.33791524171829224, + 0.44070982933044434, + 0.5626170039176941, + 0.7229568362236023, + 1.0, +] FP4_BNB = [-12.0, -8.0, -6.0, -4.0, -3.0, -2.0, -0.0625, 0, 0.0625, 2.0, 3.0, 4.0, 6.0, 8.0, 12.0] FP4_E2M1 = [-6.0, -4.0, -3.0, -2.0, -1.5, -1.0, -0.0625, 0, 0.0625, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0] @@ -43,11 +58,11 @@ FP4_BNB_BIT = [-5, -6, -3, -4, -1, -2, -7, 0, 1, 6, 7, 4, 5, 2, 3] FP4_E2M1_BIT = [-1, -2, -3, -4, -5, -6, -7, 0, 1, 2, 3, 4, 5, 6, 7] -FLOAT_MAPPING = {'nf4': NF4, 'fp4': FP4_BNB, 'fp4_e2m1_bnb': FP4_BNB, 'fp4_e2m1': FP4_E2M1} -INT_MAPPING = {'nf4': NF4_BIT, 'fp4': FP4_BNB_BIT, 'fp4_e2m1_bnb': FP4_BNB_BIT, - 'fp4_e2m1': FP4_E2M1_BIT} +FLOAT_MAPPING = {"nf4": NF4, "fp4": FP4_BNB, "fp4_e2m1_bnb": FP4_BNB, "fp4_e2m1": FP4_E2M1} +INT_MAPPING = {"nf4": NF4_BIT, "fp4": FP4_BNB_BIT, "fp4_e2m1_bnb": FP4_BNB_BIT, "fp4_e2m1": FP4_E2M1_BIT} -def quantize_4bit(tensor, quantile=1.0, data_type='nf4', return_int=False): + +def quantize_4bit(tensor, quantile=1.0, data_type="nf4", return_int=False): """Quantize tensor to NF4/FP4 data type. Args: @@ -66,16 +81,16 @@ def quantize_4bit(tensor, quantile=1.0, data_type='nf4', return_int=False): scale = tensor.max(1)[0] * quantile / max(allow_data) scale.unsqueeze_(dim=-1) tensor = tensor / scale - mid_data = [(allow_data[i] + allow_data[i+1])/2 for i in range(len(allow_data)-1)] + mid_data = [(allow_data[i] + allow_data[i + 1]) / 2 for i in range(len(allow_data) - 1)] q_tensor = torch.zeros_like(tensor) for i in range(len(allow_data)): data = allow_data_bit[i] if return_int else allow_data[i] if i == 0: q_tensor += torch.where(tensor <= mid_data[i], data, 0) elif i == len(allow_data) - 1: - q_tensor += torch.where(tensor > mid_data[i-1], data, 0) + q_tensor += torch.where(tensor > mid_data[i - 1], data, 0) else: - q_tensor += torch.where((mid_data[i-1] < tensor) & (tensor <= mid_data[i]), data, 0) + q_tensor += torch.where((mid_data[i - 1] < tensor) & (tensor <= mid_data[i]), data, 0) if return_int: return q_tensor.type(torch.int8), scale.type(torch.float), None return q_tensor * scale @@ -94,7 +109,7 @@ def qdq_weight_asym(weight, num_bits=4, quantile=1.0, return_int=False): Returns: output: qdq weight """ - maxq = torch.tensor(2 ** num_bits - 1) + maxq = torch.tensor(2**num_bits - 1) zeros = torch.zeros(weight.shape[0], device=weight.device) wmin = torch.minimum(weight.min(1)[0], zeros) wmax = torch.maximum(weight.max(1)[0], zeros) @@ -133,7 +148,7 @@ def qdq_weight_sym(weight, num_bits=4, quantile=1.0, return_int=False, full_rang """ # assert num_bits > 1, "symmetric scheme only supports num_bits > 1" maxq = torch.tensor(2 ** (num_bits - 1) - 1).to(weight.device) - minq = torch.tensor(-2 ** (num_bits - 1)).to(weight.device) + minq = torch.tensor(-(2 ** (num_bits - 1))).to(weight.device) if num_bits == 1: # pragma: no cover maxq = torch.tensor(2 ** (num_bits - 1)) minq = torch.tensor(2 ** (num_bits - 1) - 1) @@ -142,13 +157,13 @@ def qdq_weight_sym(weight, num_bits=4, quantile=1.0, return_int=False, full_rang flip_flag = torch.abs(max_val) > torch.abs(min_val) wmax = torch.max(torch.abs(max_val), torch.abs(min_val)) wmax = wmax * quantile - tmp = (wmax == 0) + tmp = wmax == 0 wmax[tmp] = +1 if full_range: # use -8, 8 to make sure amax is not changed after fake quant scale = wmax / (-minq) tmp = scale * flip_flag.int() - scale -= 2*tmp # set negetive scale with flip_flag + scale -= 2 * tmp # set negetive scale with flip_flag else: scale = wmax / maxq scale.unsqueeze_(dim=-1) @@ -158,8 +173,7 @@ def qdq_weight_sym(weight, num_bits=4, quantile=1.0, return_int=False, full_rang return scale * q -def qdq_weight_actor(weight, num_bits, scheme, quantile=1.0, data_type='int', - return_int=False, full_range=False): +def qdq_weight_actor(weight, num_bits, scheme, quantile=1.0, data_type="int", return_int=False, full_range=False): """Quant and dequant tensor per channel. Args: @@ -175,17 +189,17 @@ def qdq_weight_actor(weight, num_bits, scheme, quantile=1.0, data_type='int', output: qdq weight """ assert num_bits > 0, "num_bits should be larger than 0" - if 'int' not in data_type and num_bits == 4: - return quantize_4bit(weight, quantile=quantile, data_type=data_type, - return_int=return_int) + if "int" not in data_type and num_bits == 4: + return quantize_4bit(weight, quantile=quantile, data_type=data_type, return_int=return_int) if scheme == "sym": return qdq_weight_sym(weight, num_bits, quantile, return_int, full_range) else: return qdq_weight_asym(weight, num_bits, quantile, return_int) -def quant_weight(weight, num_bits=4, group_size=-1, scheme="asym", quantile=1.0, - data_type='int', return_int=False, full_range=False): +def quant_weight( + weight, num_bits=4, group_size=-1, scheme="asym", quantile=1.0, data_type="int", return_int=False, full_range=False +): """Quant and dequant tensor with group size. Args: @@ -205,17 +219,28 @@ def quant_weight(weight, num_bits=4, group_size=-1, scheme="asym", quantile=1.0, if num_bits <= 0: return weight if group_size == -1 or weight.shape[1] < group_size: - return qdq_weight_actor(weight, num_bits, scheme=scheme, quantile=quantile, - return_int=return_int, full_range=full_range, - data_type=data_type,) + return qdq_weight_actor( + weight, + num_bits, + scheme=scheme, + quantile=quantile, + return_int=return_int, + full_range=full_range, + data_type=data_type, + ) orig_shape = weight.shape if weight.shape[1] % group_size == 0: weight = weight.reshape(-1, group_size) if return_int: weight, scale, zp = qdq_weight_actor( - weight, num_bits, scheme=scheme, quantile=quantile, - return_int=True, full_range=full_range, data_type=data_type, + weight, + num_bits, + scheme=scheme, + quantile=quantile, + return_int=True, + full_range=full_range, + data_type=data_type, ) weight = weight.reshape(orig_shape) scale = scale.reshape(orig_shape[0], -1) @@ -224,8 +249,7 @@ def quant_weight(weight, num_bits=4, group_size=-1, scheme="asym", quantile=1.0, return weight, scale, zp else: weight = qdq_weight_actor( - weight, num_bits, scheme=scheme, data_type=data_type, - quantile=quantile, full_range=full_range + weight, num_bits, scheme=scheme, data_type=data_type, quantile=quantile, full_range=full_range ) return weight.reshape(orig_shape) else: @@ -234,23 +258,32 @@ def quant_weight(weight, num_bits=4, group_size=-1, scheme="asym", quantile=1.0, weight1 = weight1.reshape(-1, group_size) if return_int: weight1, scale1, zp1 = qdq_weight_actor( - weight1, num_bits, scheme=scheme, data_type=data_type, - quantile=quantile, return_int=True, full_range=full_range + weight1, + num_bits, + scheme=scheme, + data_type=data_type, + quantile=quantile, + return_int=True, + full_range=full_range, ) scale1 = scale1.reshape(orig_shape[0], -1) if zp1 is not None: zp1 = zp1.reshape(orig_shape[0], -1) else: weight1 = qdq_weight_actor( - weight1, num_bits, scheme=scheme, quantile=quantile, - data_type=data_type, full_range=full_range + weight1, num_bits, scheme=scheme, quantile=quantile, data_type=data_type, full_range=full_range ) weight1 = weight1.reshape(orig_shape[0], split_index) weight2 = weight[:, split_index:] if return_int: weight2, scale2, zp2 = qdq_weight_actor( - weight2, num_bits, scheme=scheme, data_type=data_type, - quantile=quantile, return_int=True, full_range=full_range, + weight2, + num_bits, + scheme=scheme, + data_type=data_type, + quantile=quantile, + return_int=True, + full_range=full_range, ) weight = torch.cat([weight1, weight2], dim=1) scale = torch.cat([scale1, scale2], dim=1) @@ -261,15 +294,13 @@ def quant_weight(weight, num_bits=4, group_size=-1, scheme="asym", quantile=1.0, return weight, scale, zp else: weight2 = qdq_weight_actor( - weight2, num_bits, scheme=scheme, data_type=data_type, - quantile=quantile, full_range=full_range + weight2, num_bits, scheme=scheme, data_type=data_type, quantile=quantile, full_range=full_range ) weight = torch.cat([weight1, weight2], dim=1) return weight -def search_clip(m, num_bits=4, group_size=32, scheme='asym', - data_type='int', sym_full_range=False): +def search_clip(m, num_bits=4, group_size=32, scheme="asym", data_type="int", sym_full_range=False): """Search best clip range of each linears in current block. Args: @@ -279,24 +310,23 @@ def search_clip(m, num_bits=4, group_size=32, scheme='asym', scheme (str, optional): sym or asym. data_type (str, optional): select from int, nf4, fp4. Defaults to int. sym_full_range (bool, optional): Choose sym range whether use -2**(bits-1). - + Returns: best_clip_ratio (float): best percentile of clip - """ org_weight = m.weight.data logger.info("Searching the best clip range with RTN algorithm") - best_error = float('inf') + best_error = float("inf") best_clip_ratio = None n_grid = 200 max_shrink = 0.2 history = [] for i_s in range(int(max_shrink * n_grid)): - ratio = (1 - i_s / n_grid) # 1, 0.805-1.0 + ratio = 1 - i_s / n_grid # 1, 0.805-1.0 cur_weight = quant_weight( m.weight.data, - num_bits=num_bits, - group_size=group_size, + num_bits=num_bits, + group_size=group_size, scheme=scheme, data_type=data_type, full_range=sym_full_range, @@ -312,10 +342,20 @@ def search_clip(m, num_bits=4, group_size=32, scheme='asym', logger.debug("The best clip ratio is {}".format(best_clip_ratio)) return best_clip_ratio -def rtn_quantize(model, num_bits=4, group_size=32, scheme="asym", - quantile=1.0, weight_config={}, - return_int=False, data_type='int', - sym_full_range=False, mse_range=False, **kwargs): + +def rtn_quantize( + model, + num_bits=4, + group_size=32, + scheme="asym", + quantile=1.0, + weight_config={}, + return_int=False, + data_type="int", + sym_full_range=False, + mse_range=False, + **kwargs, +): """Quant the model with round to nearst method. Args: @@ -326,12 +366,12 @@ def rtn_quantize(model, num_bits=4, group_size=32, scheme="asym", quantile (float, optional): percentile of clip. Defaults to 1.0. data_type (str, optional): select from int, nf4, fp4. Defaults to int. weight_config (dict, optional): specific layer wise configirations. Defaults to {}. - For example, + For example, weight_config={ 'fc2': { - 'bits': 4, - 'group_size': 32, + 'bits': 4, + 'group_size': 32, 'scheme': 'sym' 'gptq_perm': [1, 1, ...] # for gptq perm } @@ -347,27 +387,29 @@ def rtn_quantize(model, num_bits=4, group_size=32, scheme="asym", model: fake quantized torch module """ assert isinstance(model, torch.nn.Module), "only support torch module" - supported_layers = ['Linear'] + supported_layers = ["Linear"] if return_int: compression_dtype = kwargs.get("compression_dtype", torch.int32) compression_dim = kwargs.get("compression_dim", 1) scale_dtype = kwargs.get("scale_dtype", torch.float32) - device = kwargs.get("device", 'cpu') + device = kwargs.get("device", "cpu") for name, m in model.named_modules(): if m.__class__.__name__ not in supported_layers: continue if name in weight_config: # pragma: no cover - num_bits = weight_config[name]['bits'] - group_size = weight_config[name]['group_size'] - scheme = weight_config[name]['scheme'] - quantile = weight_config[name].get('quantile', 1.0) + num_bits = weight_config[name]["bits"] + group_size = weight_config[name]["group_size"] + scheme = weight_config[name]["scheme"] + quantile = weight_config[name].get("quantile", 1.0) logger.debug(f"RTN quantized module:{name, m}") - #import pdb; pdb.set_trace() - log_msg = f"RTN quantization config: num_bits={num_bits}, group_size={group_size}, " + \ - f"scheme={scheme}, quantile={quantile}" - if data_type != 'int': + # import pdb; pdb.set_trace() + log_msg = ( + f"RTN quantization config: num_bits={num_bits}, group_size={group_size}, " + + f"scheme={scheme}, quantile={quantile}" + ) + if data_type != "int": log_msg += f", dtype={data_type}" - elif scheme == 'sym': # nf4/fp4 is always [-7,7] + elif scheme == "sym": # nf4/fp4 is always [-7,7] log_msg += f", sym_full_range={sym_full_range}" logger.debug(log_msg) if num_bits <= 0: @@ -378,37 +420,55 @@ def rtn_quantize(model, num_bits=4, group_size=32, scheme="asym", quantile = search_clip(m, num_bits, group_size, scheme, data_type, sym_full_range) if return_int: from .model_wrapper import WeightOnlyLinear + int_weight, scale, zp = quant_weight( - weight, num_bits, group_size, scheme, quantile, - data_type=data_type, return_int=True, full_range=sym_full_range, + weight, + num_bits, + group_size, + scheme, + quantile, + data_type=data_type, + return_int=True, + full_range=sym_full_range, ) new_module = WeightOnlyLinear( - m.in_features, m.out_features, num_bits, group_size, + m.in_features, + m.out_features, + num_bits, + group_size, dtype=data_type, - zp=zp is not None, bias=m.bias is not None, - compression_dtype=compression_dtype, - compression_dim=compression_dim, - scale_dtype=scale_dtype, + zp=zp is not None, + bias=m.bias is not None, + compression_dtype=compression_dtype, + compression_dim=compression_dim, + scale_dtype=scale_dtype, device=device, ) new_module.pack(int_weight, scale, zp, m.bias) - if name == '': + if name == "": return new_module else: set_module(model, name, new_module) else: q_weight = quant_weight( - weight, num_bits, group_size, scheme, quantile, - data_type=data_type, full_range=sym_full_range, + weight, + num_bits, + group_size, + scheme, + quantile, + data_type=data_type, + full_range=sym_full_range, ) m.weight.data.copy_(q_weight) return model -def gptq_quantize(model, weight_config={}, dataloader=None, nsamples=128, use_max_length = True, device=None): - """Run weight-only quantization with """ + +def gptq_quantize(model, weight_config={}, dataloader=None, nsamples=128, use_max_length=True, device=None): + """Run weight-only quantization with.""" # TODO: unify weight_config keys, add docstring, and support default config assert isinstance(model, torch.nn.Module), "only support torch module" from .gptq import GPTQuantizer + gptq_quantizer = GPTQuantizer(model, weight_config, dataloader, nsamples, use_max_length, device) fp32_modified_model, gptq_config = gptq_quantizer.execute_quantization() logger.info("GPTQ quantizing done.") @@ -416,23 +476,36 @@ def gptq_quantize(model, weight_config={}, dataloader=None, nsamples=128, use_ma @torch.no_grad() -def awq_quantize(model, bits=4, group_size=32, scheme='asym', weight_config={}, - example_inputs=None, dataloader=None, n_samples=128, calib_func=None, - auto_scale=True, mse_range=True, folding=False, return_int=False, - sym_full_range=False, data_type='int'): +def awq_quantize( + model, + bits=4, + group_size=32, + scheme="asym", + weight_config={}, + example_inputs=None, + dataloader=None, + n_samples=128, + calib_func=None, + auto_scale=True, + mse_range=True, + folding=False, + return_int=False, + sym_full_range=False, + data_type="int", +): """Quant the model with Activation-aware Weight quantization(AWQ) method. Args: model (torch.nn.Module): torch model. example_inputs: example_inputs. weight_config (dict, optional): contains all info required by AWQ. Defaults to {}. - For example, + For example, weight_config={ 'fc2': { - # 'absorb_layer': 'fc1', - 'bits': 4, - 'group_size': 32, + # 'absorb_layer': 'fc1', + 'bits': 4, + 'group_size': 32, 'scheme': 'sym' } } @@ -456,17 +529,18 @@ def awq_quantize(model, bits=4, group_size=32, scheme='asym', weight_config={}, model: fake quantized model """ from .awq import ActAwareWeightQuant + assert isinstance(model, torch.nn.Module), "only support torch module" awq = ActAwareWeightQuant( - model, - example_inputs=example_inputs, - calib_func=calib_func, - dataloader=dataloader, + model, + example_inputs=example_inputs, + calib_func=calib_func, + dataloader=dataloader, n_samples=n_samples, - bits=bits, - group_size=group_size, - scheme=scheme, - sym_full_range=sym_full_range, + bits=bits, + group_size=group_size, + scheme=scheme, + sym_full_range=sym_full_range, weight_config=weight_config, data_type=data_type, ) @@ -479,24 +553,26 @@ def awq_quantize(model, bits=4, group_size=32, scheme='asym', weight_config={}, return qdq_model -def teq_quantize(model, weight_config={}, absorb_to_layer={}, extra_config={}, - dataloader= None, calib_func=None, example_inputs=None): - """Run weight-only quantization with """ +def teq_quantize( + model, weight_config={}, absorb_to_layer={}, extra_config={}, dataloader=None, calib_func=None, example_inputs=None +): + """Run weight-only quantization with.""" assert isinstance(model, torch.nn.Module), "only support torch module" logger.info("TEQ quantizing start.") if example_inputs is None: - if dataloader is None: # pragma: no cover + if dataloader is None: # pragma: no cover assert False, "Please provide dataloader or example_inputs for TEQ algorithm." try: for idx, (input, label) in enumerate(dataloader): example_inputs = input break - except: # pragma: no cover + except: # pragma: no cover for idx, input in enumerate(dataloader): example_inputs = input break from .teq import TEQuantizer + teq_quantizer = TEQuantizer(model, weight_config, absorb_to_layer, extra_config, example_inputs) # 1. wrapper tuning scale to model @@ -504,10 +580,10 @@ def teq_quantize(model, weight_config={}, absorb_to_layer={}, extra_config={}, # 2. tuning # custom train function, there calls calib_func - if calib_func: # pragma: no cover + if calib_func: # pragma: no cover calib_func(teq_quantizer.model) else: - if dataloader is None: # pragma: no cover + if dataloader is None: # pragma: no cover assert False, "Please provide dataloader to train." teq_quantizer.train(dataloader) @@ -517,7 +593,7 @@ def teq_quantize(model, weight_config={}, absorb_to_layer={}, extra_config={}, # 4. get quantized model teq_quantizer.quantize() - #quantization_data = gptq_quantizer.execute_quantization() + # quantization_data = gptq_quantizer.execute_quantization() logger.info("TEQ quantizing done.") return teq_quantizer.model @@ -539,18 +615,18 @@ def quant_weight_w_scale(weight, scale, zp, group_size=-1): if zp is not None: zp = zp.to(device) if group_size == -1: - return torch.round(weight/scale) if zp is None else torch.round(weight/scale + zp) + return torch.round(weight / scale) if zp is None else torch.round(weight / scale + zp) int_weight = torch.zeros(weight.shape).to(device) leng = weight.shape[1] // group_size tail_flag = False if weight.shape[1] % group_size == 0 else True for i in range(leng): - int_weight_tmp = weight[:, i*group_size: (i+1)*group_size] / scale[:, i].unsqueeze(1) + int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size] / scale[:, i].unsqueeze(1) if zp is not None: int_weight_tmp += zp[:, i].unsqueeze(1) - int_weight[:, i*group_size: (i+1)*group_size] = torch.round(int_weight_tmp) + int_weight[:, i * group_size : (i + 1) * group_size] = torch.round(int_weight_tmp) if tail_flag: - int_weight_tmp = weight[:, leng*group_size:] / scale[:, -1].unsqueeze(1) + int_weight_tmp = weight[:, leng * group_size :] / scale[:, -1].unsqueeze(1) if zp is not None: int_weight_tmp += zp[:, -1].unsqueeze(1) - int_weight[:, leng*group_size:] = torch.round(int_weight_tmp) + int_weight[:, leng * group_size :] = torch.round(int_weight_tmp) return int_weight diff --git a/neural_compressor/algorithm/__init__.py b/neural_compressor/algorithm/__init__.py index 38dc2941459..3b329282edb 100644 --- a/neural_compressor/algorithm/__init__.py +++ b/neural_compressor/algorithm/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Fetch all files and init all algorithms.""" from .algorithm import ALGORITHMS, Algorithm, AlgorithmScheduler, algorithm_registry @@ -24,9 +23,8 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) __all__ = ["ALGORITHMS", "Algorithm", "AlgorithmScheduler", "algorithm_registry"] - diff --git a/neural_compressor/algorithm/algorithm.py b/neural_compressor/algorithm/algorithm.py index 2751cc82dba..d28a38afdf4 100644 --- a/neural_compressor/algorithm/algorithm.py +++ b/neural_compressor/algorithm/algorithm.py @@ -14,15 +14,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Register algorithms.""" from abc import abstractmethod + from neural_compressor.utils.create_obj_from_config import get_algorithm # {location: {algorithm_type: cls}} registry_algorithms = {} + def algorithm_registry(algorithm_type, location): """Decorate and register all Algorithm subclasses. @@ -34,16 +35,19 @@ def algorithm_registry(algorithm_type, location): Returns: cls: The class of register. """ + def decorator_algorithm(cls): if location in registry_algorithms and algorithm_type in registry_algorithms[location]: - raise ValueError('Cannot have two algorithms with the same name') + raise ValueError("Cannot have two algorithms with the same name") if location not in registry_algorithms: registry_algorithms[location] = {} registry_algorithms[location][algorithm_type] = cls() return cls + return decorator_algorithm + class ALGORITHMS(object): """Build a dict for registered algorithms.""" @@ -66,19 +70,19 @@ def __getitem__(self, algorithm_type): assert result, "algorithm type only support {}".format(self.support_algorithms()) return result - @classmethod def support_algorithms(self): """Get all algorithms. - Returns: + Returns: Set: A set of all algorithms. """ supported_algos = set([self.algorithms[key] for key in self.algorithms]) return supported_algos + class AlgorithmScheduler(object): - """control the Algorithm in different phase.""" + """Control the Algorithm in different phase.""" def __init__(self, conf): """Initialize AlgorithmScheduler. @@ -92,7 +96,7 @@ def __init__(self, conf): self._dataloader = None self._adaptor = None self._calib_iter = None - + def append_algorithm(self, location, algorithm): """Append algorithm to list of executed algorithms. @@ -102,7 +106,7 @@ def append_algorithm(self, location, algorithm): """ self._exec_algorithms[location] = self._exec_algorithms.get(location, []) self._exec_algorithms[location].append(algorithm) - + def reset_exec_algorithms(self): """Reset the list of executed algorithms.""" self._exec_algorithms = {} @@ -113,18 +117,14 @@ def __call__(self, location): Returns: model: The framework model. """ - assert self._q_model, 'set q_model for algorithm' + assert self._q_model, "set q_model for algorithm" if len(self._exec_algorithms.get(location, [])) == 0: return self._q_model - assert self._origin_model, 'set origin model for algorithm' - assert self._adaptor, 'set adaptor for algorithm' - assert self._calib_iter, 'set calibration iteration for algorithm' + assert self._origin_model, "set origin model for algorithm" + assert self._adaptor, "set adaptor for algorithm" + assert self._calib_iter, "set calibration iteration for algorithm" for algo in self._exec_algorithms.get(location, []): - self._q_model = algo(self._origin_model, - self._q_model, \ - self._adaptor, \ - self._dataloader, \ - self._calib_iter) + self._q_model = algo(self._origin_model, self._q_model, self._adaptor, self._dataloader, self._calib_iter) return self._q_model @property @@ -217,6 +217,7 @@ def calib_iter(self, calib_iter): """ self._calib_iter = calib_iter + class Algorithm(object): """The base class of algorithm.""" @@ -228,4 +229,3 @@ def __call__(self, *args, **kwargs): NotImplementedError: NotImplementedError """ raise NotImplementedError - diff --git a/neural_compressor/algorithm/fast_bias_correction.py b/neural_compressor/algorithm/fast_bias_correction.py index c97cebaf308..5b131a17238 100644 --- a/neural_compressor/algorithm/fast_bias_correction.py +++ b/neural_compressor/algorithm/fast_bias_correction.py @@ -14,14 +14,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Build FastBiasCorrection algorithm class.""" import numpy as np -from .algorithm import Algorithm, algorithm_registry + from ..utils import logger +from .algorithm import Algorithm, algorithm_registry -@algorithm_registry(algorithm_type='fast_bias_correction', location='post_quantization') + +@algorithm_registry(algorithm_type="fast_bias_correction", location="post_quantization") class FastBiasCorrection(Algorithm): """FastBiasCorrection algorithm class.""" @@ -52,21 +53,33 @@ def __call__(self, origin_model, q_model, adaptor, dataloader, iterations): # (TODO) assume int8 model also use fp32 op list # in adaptor fp32 op will be mapped to corresponding int8 op graph_info = origin_model.graph_info - op_list = [op_name for op_name, op_type in graph_info.items() if 'conv' in op_type.lower()] - iteration_list = list(range(1, iterations+1)) - fp32_data = adaptor.inspect_tensor(origin_model.graph_def, dataloader, \ - op_list=op_list, iteration_list=iteration_list, \ - inspect_type='all', save_to_disk=False, save_path='', - quantization_cfg=self.quantization_cfg) - q_data = adaptor.inspect_tensor(q_model.graph_def, dataloader, \ - op_list=op_list, iteration_list=iteration_list, \ - inspect_type='all', save_to_disk=False, save_path='', - quantization_cfg=self.quantization_cfg) - - fp32_weights = fp32_data['weight'] - q_weights = q_data['weight'] - fp32_activations_list = fp32_data['activation'] - q_activations_list = q_data['activation'] + op_list = [op_name for op_name, op_type in graph_info.items() if "conv" in op_type.lower()] + iteration_list = list(range(1, iterations + 1)) + fp32_data = adaptor.inspect_tensor( + origin_model.graph_def, + dataloader, + op_list=op_list, + iteration_list=iteration_list, + inspect_type="all", + save_to_disk=False, + save_path="", + quantization_cfg=self.quantization_cfg, + ) + q_data = adaptor.inspect_tensor( + q_model.graph_def, + dataloader, + op_list=op_list, + iteration_list=iteration_list, + inspect_type="all", + save_to_disk=False, + save_path="", + quantization_cfg=self.quantization_cfg, + ) + + fp32_weights = fp32_data["weight"] + q_weights = q_data["weight"] + fp32_activations_list = fp32_data["activation"] + q_activations_list = q_data["activation"] def take_out_array(value_dict): value_list = [] @@ -83,19 +96,17 @@ def take_out_array(value_dict): if isinstance(name, tuple): name = name[0] if name in fp32_activations: - fp32_activations[name] = np.concatenate(\ - (fp32_activations[name], take_out_array(value))) + fp32_activations[name] = np.concatenate((fp32_activations[name], take_out_array(value))) else: fp32_activations[name] = take_out_array(value) - q_activations ={} + q_activations = {} for i, _ in enumerate(iteration_list): for name, value in q_activations_list[i].items(): if isinstance(name, tuple): name = name[0] if name in q_activations: - q_activations[name] = np.concatenate((\ - q_activations[name], take_out_array(value))) + q_activations[name] = np.concatenate((q_activations[name], take_out_array(value))) else: q_activations[name] = take_out_array(value) tensor_dict = {} @@ -103,13 +114,13 @@ def take_out_array(value_dict): for fp32_op in op_list: # (TODO) assume adaptor will map the fp32_op to q_op, so directly assign here q_op = fp32_op - #(TODO) assume fp32 op output and weight all mapped from the first node name + # (TODO) assume fp32 op output and weight all mapped from the first node name # fp32 op and quantized op should all have bias if fp32_op not in fp32_weights or not len(fp32_weights[fp32_op]) == 2: continue - fp32_weight, fp32_weight_name = None, '' - fp32_bias, fp32_bias_name = None, '' + fp32_weight, fp32_weight_name = None, "" + fp32_bias, fp32_bias_name = None, "" for name, value in fp32_weights[fp32_op].items(): if len(value.shape) > 1: fp32_weight = value @@ -118,8 +129,8 @@ def take_out_array(value_dict): fp32_bias = value fp32_bias_name = name - q_weight, q_weight_name = None, '' - q_bias, q_bias_name = None, '' + q_weight, q_weight_name = None, "" + q_bias, q_bias_name = None, "" for name, value in q_weights[q_op].items(): if len(value.shape) > 1: q_weight = value @@ -127,7 +138,7 @@ def take_out_array(value_dict): if len(value.shape) == 1: q_bias = value q_bias_name = name - + # (TODO) assume use conv output first tensor fp32_output = fp32_activations[fp32_op] q_output = q_activations[q_op] @@ -141,8 +152,7 @@ def take_out_array(value_dict): bias_shift = bias_shift.reshape(bias_shift.shape[0], -1) bias_shift = np.mean(bias_shift, axis=1) - tensor_dict[q_bias_name] = fp32_bias + bias_shift - + tensor_dict[q_bias_name] = fp32_bias + bias_shift if len(tensor_dict) > 0: adaptor.set_tensor(q_model, tensor_dict) diff --git a/neural_compressor/algorithm/smooth_quant.py b/neural_compressor/algorithm/smooth_quant.py index 48620b6522b..faffca4c2e7 100644 --- a/neural_compressor/algorithm/smooth_quant.py +++ b/neural_compressor/algorithm/smooth_quant.py @@ -14,15 +14,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Build SmoothQuant algorithm class.""" import numpy as np -from .algorithm import Algorithm, algorithm_registry + from ..utils import logger +from .algorithm import Algorithm, algorithm_registry -@algorithm_registry(algorithm_type='smooth_quant', location='pre_quantization') +@algorithm_registry(algorithm_type="smooth_quant", location="pre_quantization") class SmoothQuant(Algorithm): """Fake input channel quantization. @@ -72,14 +72,14 @@ def __call__(self, origin_model, q_model, adaptor, dataloader, calib_iter): model: A modified onnx model """ kwargs = {} ##different backends may have different default values - if self.op_types != None: + if self.op_types is not None: kwargs["op_types"] = self.op_types - if self.percentile != None: - kwargs['percentile'] = self.percentile - if self.scales_per_op != None: - kwargs['scales_per_op'] = self.scales_per_op - kwargs['folding'] = self.folding - kwargs['record_max_info'] = True + if self.percentile is not None: + kwargs["percentile"] = self.percentile + if self.scales_per_op is not None: + kwargs["scales_per_op"] = self.scales_per_op + kwargs["folding"] = self.folding + kwargs["record_max_info"] = True q_model = adaptor.smooth_quant( origin_model, dataloader, diff --git a/neural_compressor/algorithm/weight_correction.py b/neural_compressor/algorithm/weight_correction.py index 905756b7616..5f100bcfd0d 100644 --- a/neural_compressor/algorithm/weight_correction.py +++ b/neural_compressor/algorithm/weight_correction.py @@ -14,13 +14,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Build FastBiasCorrection algorithm class.""" import numpy as np + from .algorithm import Algorithm, algorithm_registry -@algorithm_registry(algorithm_type='weight_correction', location='post_quantization') + +@algorithm_registry(algorithm_type="weight_correction", location="post_quantization") class WeightCorrection(Algorithm): """FastBiasCorrection algorithm class. @@ -60,33 +61,43 @@ def __call__(self, origin_model, q_model, adaptor, dataloader, iterations): # (TODO) assume int8 model also use fp32 op list # in adaptor fp32 op will be mapped to corresponding int8 op graph_info = origin_model.graph_info - op_list = [op_name for op_name, op_type in graph_info.items() if 'conv' in op_type.lower()] + op_list = [op_name for op_name, op_type in graph_info.items() if "conv" in op_type.lower()] - #(TODO) assume the weight format should be(oc, ic, h, w) + # (TODO) assume the weight format should be(oc, ic, h, w) cap = adaptor.query_fw_capability(origin_model) - quantize_cfg = {'op': cap['opwise']} - fp32_data = adaptor.inspect_tensor(origin_model, dataloader, op_list=op_list, - iteration_list=list(range(1, iterations+1)), - inspect_type='weight', quantization_cfg = quantize_cfg) - q_data = adaptor.inspect_tensor(q_model, dataloader, op_list=op_list, - iteration_list=list(range(1, iterations+1)), - inspect_type='weight', quantization_cfg = quantize_cfg) - - fp32_weights = fp32_data['weight'] - q_weights = q_data['weight'] + quantize_cfg = {"op": cap["opwise"]} + fp32_data = adaptor.inspect_tensor( + origin_model, + dataloader, + op_list=op_list, + iteration_list=list(range(1, iterations + 1)), + inspect_type="weight", + quantization_cfg=quantize_cfg, + ) + q_data = adaptor.inspect_tensor( + q_model, + dataloader, + op_list=op_list, + iteration_list=list(range(1, iterations + 1)), + inspect_type="weight", + quantization_cfg=quantize_cfg, + ) + + fp32_weights = fp32_data["weight"] + q_weights = q_data["weight"] tensor_dict = {} # for fp32_op, q_op in node_mapping.items(): for fp32_op in op_list: # (TODO) assume adaptor will map the fp32_op to q_op, so directly assign here q_op = fp32_op - #(TODO) assume fp32 op output and weight all mapped from the first node name + # (TODO) assume fp32 op output and weight all mapped from the first node name # fp32 op and quantized op should all have bias - if fp32_op not in fp32_weights or not len(fp32_weights[fp32_op]) >= 1: + if fp32_op not in fp32_weights or not len(fp32_weights[fp32_op]) >= 1: continue - fp32_weight, fp32_weight_name = None, '' - fp32_bias, fp32_bias_name = None, '' + fp32_weight, fp32_weight_name = None, "" + fp32_bias, fp32_bias_name = None, "" for name, value in fp32_weights[fp32_op].items(): if len(value.shape) > 1: fp32_weight = value @@ -95,8 +106,8 @@ def __call__(self, origin_model, q_model, adaptor, dataloader, iterations): fp32_bias = value fp32_bias_name = name - q_weight, q_weight_name = None, '' - q_bias, q_bias_name = None, '' + q_weight, q_weight_name = None, "" + q_bias, q_bias_name = None, "" for name, value in q_weights[q_op].items(): if len(value.shape) > 1: q_weight = value @@ -116,19 +127,18 @@ def __call__(self, origin_model, q_model, adaptor, dataloader, iterations): t_q_weight = np.transpose(q_weight, transpose_shape) t_q_weight = t_q_weight.reshape(t_q_weight.shape[0], -1) - channel_variance = np.std(t_fp32_weight, axis=1) / \ - (np.std(t_q_weight, axis=1) + self.eps) + channel_variance = np.std(t_fp32_weight, axis=1) / (np.std(t_q_weight, axis=1) + self.eps) broad_shape = np.ones(len(fp32_weight.shape), dtype=np.int32) broad_shape[self.channel_axis] = len(channel_variance) channel_variance = channel_variance.reshape(broad_shape) variance_q_weight = q_weight * channel_variance variance_q_weight = np.transpose(variance_q_weight, transpose_shape) - variance_q_weight = variance_q_weight.reshape(\ - variance_q_weight.shape[0], -1) + variance_q_weight = variance_q_weight.reshape(variance_q_weight.shape[0], -1) - channel_mean = np.mean(t_fp32_weight, axis=self.channel_axis) - \ - np.mean(variance_q_weight, axis=self.channel_axis) + channel_mean = np.mean(t_fp32_weight, axis=self.channel_axis) - np.mean( + variance_q_weight, axis=self.channel_axis + ) channel_mean = channel_mean.reshape(broad_shape) tensor_dict[q_weight_name] = channel_variance * fp32_weight + channel_mean diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index cb6a041736f..fdc136d5172 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -28,19 +28,17 @@ from neural_compressor.profiling.parser.factory import ParserFactory from neural_compressor.profiling.profiler.factory import ProfilerFactory + from .adaptor import FRAMEWORKS -from .config import BenchmarkConfig -from .config import options +from .config import BenchmarkConfig, options from .data import check_dataloader from .model import BaseModel, Model from .objective import MultiObjective from .profiling.parser.parser import ProfilingParser from .profiling.profiler.profiler import Profiler -from .utils import alias_param, logger, OPTIONS -from .utils.neural_insights_utils import register_neural_insights_workload, \ - update_neural_insights_workload -from .utils.utility import GLOBAL_STATE, MODE, print_table, dump_table -from .utils.utility import Statistics +from .utils import OPTIONS, alias_param, logger +from .utils.neural_insights_utils import register_neural_insights_workload, update_neural_insights_workload +from .utils.utility import GLOBAL_STATE, MODE, Statistics, dump_table, print_table def set_env_var(env_var, value, overwrite_existing=False): @@ -60,15 +58,14 @@ def set_all_env_var(conf, overwrite_existing=False): Neural Compressor only uses physical cores """ cpu_counts = psutil.cpu_count(logical=False) - assert isinstance(conf, BenchmarkConfig), \ - 'input has to be a Config object' + assert isinstance(conf, BenchmarkConfig), "input has to be a Config object" if conf.cores_per_instance is not None: - assert conf.cores_per_instance * conf.num_of_instance <= cpu_counts, \ - 'num_of_instance * cores_per_instance should <= cpu physical cores' + assert ( + conf.cores_per_instance * conf.num_of_instance <= cpu_counts + ), "num_of_instance * cores_per_instance should <= cpu physical cores" else: - assert conf.num_of_instance <= cpu_counts, \ - 'num_of_instance should <= cpu counts' + assert conf.num_of_instance <= cpu_counts, "num_of_instance should <= cpu counts" conf.cores_per_instance = int(cpu_counts / conf.num_of_instance) for var, value in dict(conf).items(): set_env_var(var.upper(), value, overwrite_existing) @@ -79,9 +76,9 @@ def get_architecture(): p1 = subprocess.Popen("lscpu", stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "Architecture"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) - res=None - for line in iter(p3.stdout.readline, b''): - res=line.decode("utf-8").strip() + res = None + for line in iter(p3.stdout.readline, b""): + res = line.decode("utf-8").strip() return res @@ -91,40 +88,40 @@ def get_threads_per_core(): p2 = subprocess.Popen(["grep", "Thread(s) per core"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = None - for line in iter(p3.stdout.readline, b''): - res=line.decode("utf-8").strip() + for line in iter(p3.stdout.readline, b""): + res = line.decode("utf-8").strip() return res def get_threads(): """Get the list of threads.""" - p1 = subprocess.Popen(["cat","/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "processor"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res def get_physical_ids(): """Get the list of sockets.""" - p1 = subprocess.Popen(["cat","/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "physical id"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res def get_core_ids(): """Get the ids list of the cores.""" - p1 = subprocess.Popen(["cat","/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "core id"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res @@ -156,45 +153,40 @@ def run_instance(model, conf, b_dataloader=None, b_func=None): results = {} if b_func is None: GLOBAL_STATE.STATE = MODE.BENCHMARK - framework_specific_info = {'device': conf.device, - 'approach': None, - 'random_seed': options.random_seed, - 'backend': conf.backend if conf.backend is not None else 'default', - 'format': 'default'} + framework_specific_info = { + "device": conf.device, + "approach": None, + "random_seed": options.random_seed, + "backend": conf.backend if conf.backend is not None else "default", + "format": "default", + } framework = conf.framework.lower() - if 'tensorflow' in framework: - framework_specific_info.update({"inputs": conf.inputs, \ - "outputs": conf.outputs, \ - "recipes": {}, \ - 'workspace_path': options.workspace}) - if framework == 'keras': - framework_specific_info.update({'workspace_path': options.workspace}) - if framework == 'mxnet': + if "tensorflow" in framework: + framework_specific_info.update( + {"inputs": conf.inputs, "outputs": conf.outputs, "recipes": {}, "workspace_path": options.workspace} + ) + if framework == "keras": + framework_specific_info.update({"workspace_path": options.workspace}) + if framework == "mxnet": framework_specific_info.update({"b_dataloader": b_dataloader}) - if 'onnx' in framework: + if "onnx" in framework: framework_specific_info.update( - {'workspace_path': options.workspace, \ - 'graph_optimization': OPTIONS[framework].graph_optimization}) - if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - framework_specific_info.update({"workspace_path": options.workspace, - "q_dataloader": None}) + {"workspace_path": options.workspace, "graph_optimization": OPTIONS[framework].graph_optimization} + ) + if framework == "pytorch_ipex" or framework == "pytorch" or framework == "pytorch_fx": + framework_specific_info.update({"workspace_path": options.workspace, "q_dataloader": None}) - assert isinstance(model, BaseModel), 'need set neural_compressor Model for quantization....' + assert isinstance(model, BaseModel), "need set neural_compressor Model for quantization...." adaptor = FRAMEWORKS[framework](framework_specific_info) assert b_dataloader is not None, "dataloader should not be None" from neural_compressor.utils.create_obj_from_config import create_eval_func - b_func = create_eval_func(conf.framework, - b_dataloader, - adaptor, - None, - iteration=conf.iteration) - objectives = MultiObjective(["performance"], - {'relative': 0.1}, - is_measure=True) + b_func = create_eval_func(conf.framework, b_dataloader, adaptor, None, iteration=conf.iteration) + + objectives = MultiObjective(["performance"], {"relative": 0.1}, is_measure=True) val = objectives.evaluate(b_func, model) # measurer contain info not only performance(eg, memory, model_size) @@ -217,7 +209,7 @@ def run_instance(model, conf, b_dataloader=None, b_func=None): logger.debug("Iteration {} result {}:".format(i, res)) logger.info("Batch size = {}".format(batch_size)) logger.info("Latency: {:.3f} ms".format(latency * 1000)) - logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + logger.info("Throughput: {:.3f} images/sec".format(1.0 / latency)) return results else: b_func(model.model) @@ -229,36 +221,38 @@ def generate_prefix(core_list): Args: core_list: a list of core indexes bound with specific instances """ - if sys.platform in ['linux'] and os.system('numactl --show >/dev/null 2>&1') == 0: - return 'OMP_NUM_THREADS={} numactl --localalloc --physcpubind={}'.format(\ - len(core_list), ','.join(core_list.astype(str))) - elif sys.platform in ['win32']: # pragma: no cover + if sys.platform in ["linux"] and os.system("numactl --show >/dev/null 2>&1") == 0: + return "OMP_NUM_THREADS={} numactl --localalloc --physcpubind={}".format( + len(core_list), ",".join(core_list.astype(str)) + ) + elif sys.platform in ["win32"]: # pragma: no cover # (TODO) should we move the hw_info from ux? from neural_compressor.utils.utility import get_number_of_sockets + num_of_socket = int(get_number_of_sockets()) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + cores_per_instance = int(os.environ.get("CORES_PER_INSTANCE")) cores_per_socket = int(psutil.cpu_count(logical=False)) / num_of_socket socket_id = int(core_list[0] // cores_per_socket) # cores per socket should integral multiple of cores per instance, else not bind core if cores_per_socket % cores_per_instance == 0: from functools import reduce - hex_core = hex(reduce(lambda x, y : x | y, [1 << p for p in core_list])) - return 'start /b /WAIT /node {} /affinity {} CMD /c'.format(socket_id, hex_core) + + hex_core = hex(reduce(lambda x, y: x | y, [1 << p for p in core_list])) + return "start /b /WAIT /node {} /affinity {} CMD /c".format(socket_id, hex_core) else: - return '' + return "" def call_one(cmd, log_file): """Execute one command for one instance in one thread and dump the log (for Windows).""" - proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - shell=True) # nosec + proc = subprocess.Popen( + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True + ) # nosec with open(log_file, "w", 1, encoding="utf-8") as log_file: log_file.write(f"[ COMMAND ] {cmd} \n") for line in proc.stdout: decoded_line = line.decode("utf-8", errors="ignore").strip() - logger.info(decoded_line) # redirect to terminal + logger.info(decoded_line) # redirect to terminal log_file.write(decoded_line + "\n") @@ -268,49 +262,49 @@ def config_instance(raw_cmd): Args: raw_cmd: raw command used for benchmark """ - multi_instance_cmd = '' - num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + multi_instance_cmd = "" + num_of_instance = int(os.environ.get("NUM_OF_INSTANCE")) + cores_per_instance = int(os.environ.get("CORES_PER_INSTANCE")) logger.info("num of instance: {}".format(num_of_instance)) logger.info("cores per instance: {}".format(cores_per_instance)) - if (sys.platform in ['linux'] and get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1): - raise OSError('Currently no support on ARM with hyperthreads') - elif sys.platform in ['linux']: + if sys.platform in ["linux"] and get_architecture() == "aarch64" and int(get_threads_per_core()) > 1: + raise OSError("Currently no support on ARM with hyperthreads") + elif sys.platform in ["linux"]: bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids()) for i in range(0, num_of_instance): - if sys.platform in ['linux'] and get_architecture() == 'x86_64': + if sys.platform in ["linux"] and get_architecture() == "x86_64": core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance core_list = np.array(bounded_threads)[core_list_idx] else: core_list = np.arange(0, cores_per_instance) + i * cores_per_instance # bind cores only allowed in linux/mac os with numactl enabled prefix = generate_prefix(core_list) - instance_cmd = '{} {}'.format(prefix, raw_cmd) - if sys.platform in ['linux']: - instance_log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i) - multi_instance_cmd += '{} 2>&1|tee {} & \\\n'.format( - instance_cmd, instance_log) + instance_cmd = "{} {}".format(prefix, raw_cmd) + if sys.platform in ["linux"]: + instance_log = "{}_{}_{}.log".format(num_of_instance, cores_per_instance, i) + multi_instance_cmd += "{} 2>&1|tee {} & \\\n".format(instance_cmd, instance_log) else: # pragma: no cover - multi_instance_cmd += '{} \n'.format(instance_cmd) + multi_instance_cmd += "{} \n".format(instance_cmd) - multi_instance_cmd += 'wait' if sys.platform in ['linux'] else '' + multi_instance_cmd += "wait" if sys.platform in ["linux"] else "" logger.info("Running command is\n{}".format(multi_instance_cmd)) # each instance will execute single instance - set_env_var('NC_ENV_CONF', True, overwrite_existing=True) - if sys.platform in ['linux']: - p = subprocess.Popen(multi_instance_cmd, preexec_fn=os.setsid, shell=True) # nosec - elif sys.platform in ['win32']: # pragma: no cover + set_env_var("NC_ENV_CONF", True, overwrite_existing=True) + if sys.platform in ["linux"]: + p = subprocess.Popen(multi_instance_cmd, preexec_fn=os.setsid, shell=True) # nosec + elif sys.platform in ["win32"]: # pragma: no cover cmd_list = multi_instance_cmd.split("\n")[:-1] threads = [] for idx, cmd in enumerate(cmd_list): # wrap each execution of windows bat file in one thread # write the log to the log file of the corresponding instance - logger.info('Will dump to {}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx)) - threads.append(Thread(target=call_one, args=(cmd, - '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx)))) + logger.info("Will dump to {}_{}_{}.log".format(num_of_instance, cores_per_instance, idx)) + threads.append( + Thread(target=call_one, args=(cmd, "{}_{}_{}.log".format(num_of_instance, cores_per_instance, idx))) + ) for command_thread in threads: command_thread.start() logger.info("Worker threads start") @@ -327,13 +321,13 @@ def config_instance(raw_cmd): def summary_benchmark(): """Get the summary of the benchmark.""" - if sys.platform in ['linux']: - num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + if sys.platform in ["linux"]: + num_of_instance = int(os.environ.get("NUM_OF_INSTANCE")) + cores_per_instance = int(os.environ.get("CORES_PER_INSTANCE")) latency_l = [] throughput_l = [] for i in range(0, num_of_instance): - log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i) + log = "{}_{}_{}.log".format(num_of_instance, cores_per_instance, i) with open(log, "r") as f: for line in f: latency = re.search(r"[L,l]atency:\s+(\d+(\.\d+)?)", line) @@ -341,18 +335,18 @@ def summary_benchmark(): throughput = re.search(r"[T,t]hroughput:\s+(\d+(\.\d+)?)", line) throughput_l.append(float(throughput.group(1))) if throughput and throughput.group(1) else None if throughput_l and latency_l: - assert len(latency_l)==len(throughput_l)==num_of_instance, \ - "Multiple instance benchmark failed with some instance!" + assert ( + len(latency_l) == len(throughput_l) == num_of_instance + ), "Multiple instance benchmark failed with some instance!" output_data = [ - ["Latency average [second/sample]", "{:.6f}".format((sum(latency_l)/len(latency_l))/1000)], - ["Throughput sum [samples/second]", "{:.3f}".format(sum(throughput_l))] + ["Latency average [second/sample]", "{:.6f}".format((sum(latency_l) / len(latency_l)) / 1000)], + ["Throughput sum [samples/second]", "{:.3f}".format(sum(throughput_l))], ] logger.info("********************************************") Statistics( - output_data, - header='Multiple Instance Benchmark Summary', - field_names=["Items", "Result"]).print_stat() + output_data, header="Multiple Instance Benchmark Summary", field_names=["Items", "Result"] + ).print_stat() else: # (TODO) should add summary after win32 benchmark has log pass @@ -480,7 +474,7 @@ def benchmark_with_raw_cmd(raw_cmd, conf=None): if conf is not None: if conf.backend == "ipex": import intel_extension_for_pytorch - assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' + assert sys.platform in ["linux", "win32"], "only support platform windows and linux..." # disable multi-instance for running bechmark on GPU device set_all_env_var(conf) @@ -488,7 +482,7 @@ def benchmark_with_raw_cmd(raw_cmd, conf=None): summary_benchmark() -@alias_param("conf", param_alias='config') +@alias_param("conf", param_alias="config") def fit(model, conf, b_dataloader=None, b_func=None): """Benchmark the model performance with the configure. @@ -517,13 +511,13 @@ def fit(model, conf, b_dataloader=None, b_func=None): if b_dataloader is not None: check_dataloader(b_dataloader) - assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' + assert sys.platform in ["linux", "win32"], "only support platform windows and linux..." # disable multi-instance for running benchmark on GPU device set_all_env_var(conf) - if conf.device == 'gpu': - set_env_var('NC_ENV_CONF', True, overwrite_existing=True) + if conf.device == "gpu": + set_env_var("NC_ENV_CONF", True, overwrite_existing=True) - if conf.diagnosis and os.environ.get('NC_ENV_CONF', None) in [None, 'False']: + if conf.diagnosis and os.environ.get("NC_ENV_CONF", None) in [None, "False"]: logger.info("Start to run Profiling") ni_workload_id = register_neural_insights_workload( workload_location=os.path.abspath(os.path.abspath(options.workspace)), @@ -539,7 +533,7 @@ def fit(model, conf, b_dataloader=None, b_func=None): update_neural_insights_workload(ni_workload_id, "failure") logger.info("Start to run Benchmark.") - if os.environ.get('NC_ENV_CONF') == 'True': + if os.environ.get("NC_ENV_CONF") == "True": return run_instance(model=wrapped_model, conf=conf, b_dataloader=b_dataloader, b_func=b_func) - raw_cmd = sys.executable + ' ' + ' '.join(sys.argv) + raw_cmd = sys.executable + " " + " ".join(sys.argv) benchmark_with_raw_cmd(raw_cmd) diff --git a/neural_compressor/compression/__init__.py b/neural_compressor/compression/__init__.py index 71e6a06cd29..b9b85c89c7e 100644 --- a/neural_compressor/compression/__init__.py +++ b/neural_compressor/compression/__init__.py @@ -15,4 +15,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks \ No newline at end of file +from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 94398b1c97f..03eace3f0f7 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -14,25 +14,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """This is a module for Component class. The Component class will be inherited by the class 'QuantizationAwareTrainingCallbacks', 'PruningCallbacks' and 'DistillationCallbacks'. """ -from .distillation.criterions import Criterions -from ..utils import logger -from ..utils.utility import LazyImport from ..model import BaseModel, Model from ..model.model import MODELS -from .pruner.utils import process_config, parse_to_prune, get_sparsity_ratio -from .pruner.utils import parse_to_prune_tf, get_sparsity_ratio_tf -from .pruner.pruners import get_pruner, PRUNERS +from ..utils import logger +from ..utils.utility import LazyImport +from .distillation.criterions import Criterions +from .pruner.pruners import PRUNERS, get_pruner +from .pruner.utils import get_sparsity_ratio, get_sparsity_ratio_tf, parse_to_prune, parse_to_prune_tf, process_config + +LazyImport("torch.nn") +torch = LazyImport("torch") +tf = LazyImport("tensorflow") -LazyImport('torch.nn') -torch = LazyImport('torch') -tf = LazyImport('tensorflow') class BaseCallbacks(object): """This is base class of Neural Compressor Callbacks. @@ -58,63 +57,62 @@ def __init__(self, conf=None, model=None): self.model = model self.adaptor = None self.hooks = { - 'on_train_begin': self.on_train_begin, - 'on_train_end': self.on_train_end, - 'on_epoch_begin': self.on_epoch_begin, - 'on_epoch_end': self.on_epoch_end, - 'on_step_begin': self.on_step_begin, - 'on_step_end': self.on_step_end, - 'on_after_compute_loss': self.on_after_compute_loss, - 'on_before_optimizer_step': self.on_before_optimizer_step, - 'on_after_optimizer_step': self.on_after_optimizer_step, - 'on_before_eval': self.on_before_eval, - 'on_after_eval': self.on_after_eval + "on_train_begin": self.on_train_begin, + "on_train_end": self.on_train_end, + "on_epoch_begin": self.on_epoch_begin, + "on_epoch_end": self.on_epoch_end, + "on_step_begin": self.on_step_begin, + "on_step_end": self.on_step_end, + "on_after_compute_loss": self.on_after_compute_loss, + "on_before_optimizer_step": self.on_before_optimizer_step, + "on_after_optimizer_step": self.on_after_optimizer_step, + "on_before_eval": self.on_before_eval, + "on_after_eval": self.on_after_eval, } self.hooks_dict = { - 'on_train_begin': [], - 'on_train_end': [], - 'on_epoch_begin': [], - 'on_epoch_end': [], - 'on_step_begin': [], - 'on_step_end': [], - 'on_after_compute_loss': [], - 'on_before_optimizer_step': [], - 'on_after_optimizer_step': [], - 'on_before_eval': [], - 'on_after_eval': [] + "on_train_begin": [], + "on_train_end": [], + "on_epoch_begin": [], + "on_epoch_end": [], + "on_step_begin": [], + "on_step_end": [], + "on_after_compute_loss": [], + "on_before_optimizer_step": [], + "on_after_optimizer_step": [], + "on_before_eval": [], + "on_after_eval": [], } def on_train_begin(self, dataloader=None): """Be called before the beginning of training.""" - for on_train_begin_hook in self.hooks_dict['on_train_begin']: + for on_train_begin_hook in self.hooks_dict["on_train_begin"]: on_train_begin_hook(dataloader) def on_train_end(self): """Be called after the end of training.""" - for on_train_end_hook in self.hooks_dict['on_train_end']: + for on_train_end_hook in self.hooks_dict["on_train_end"]: on_train_end_hook() def on_epoch_begin(self, epoch): """Be called on the beginning of epochs.""" - for on_epoch_begin_hook in self.hooks_dict['on_epoch_begin']: + for on_epoch_begin_hook in self.hooks_dict["on_epoch_begin"]: on_epoch_begin_hook(epoch) def on_step_begin(self, batch_id): """Be called on the beginning of batches.""" - if len(self.hooks_dict['on_step_begin']) > 0: + if len(self.hooks_dict["on_step_begin"]) > 0: res_list = [] - for on_step_begin_hook in self.hooks_dict['on_step_begin']: + for on_step_begin_hook in self.hooks_dict["on_step_begin"]: res_list.append(on_step_begin_hook(batch_id)) return res_list else: return None - def on_after_compute_loss(self, input, student_output, \ - student_loss, teacher_output=None): + def on_after_compute_loss(self, input, student_output, student_loss, teacher_output=None): """Be called on the end of loss computation.""" - if len(self.hooks_dict['on_after_compute_loss']) > 0: + if len(self.hooks_dict["on_after_compute_loss"]) > 0: loss = student_loss - for on_after_compute_loss_hook in self.hooks_dict['on_after_compute_loss']: + for on_after_compute_loss_hook in self.hooks_dict["on_after_compute_loss"]: loss = on_after_compute_loss_hook(input, student_output, loss, teacher_output) return loss else: @@ -122,29 +120,29 @@ def on_after_compute_loss(self, input, student_output, \ def on_before_optimizer_step(self): """Be called before optimizer step.""" - for on_before_optimizer_step_hook in self.hooks_dict['on_before_optimizer_step']: + for on_before_optimizer_step_hook in self.hooks_dict["on_before_optimizer_step"]: on_before_optimizer_step_hook() def on_after_optimizer_step(self): """Be called after optimizer step.""" - for on_after_optimizer_step_hook in self.hooks_dict['on_after_optimizer_step']: + for on_after_optimizer_step_hook in self.hooks_dict["on_after_optimizer_step"]: on_after_optimizer_step_hook() def on_before_eval(self): """Be called before evaluation.""" - for on_before_eval_hook in self.hooks_dict['on_before_eval']: + for on_before_eval_hook in self.hooks_dict["on_before_eval"]: on_before_eval_hook() def on_after_eval(self): """Be called after evaluation.""" - for on_after_eval_hook in self.hooks_dict['on_after_eval']: + for on_after_eval_hook in self.hooks_dict["on_after_eval"]: on_after_eval_hook() def on_step_end(self): """Be called on the end of batches.""" - if len(self.hooks_dict['on_step_end']) > 0: + if len(self.hooks_dict["on_step_end"]) > 0: res_list = [] - for on_step_end_hook in self.hooks_dict['on_step_end']: + for on_step_end_hook in self.hooks_dict["on_step_end"]: res_list.append(on_step_end_hook()) return res_list else: @@ -154,7 +152,7 @@ def on_epoch_end(self): """Be called on the end of epochs.""" res_list = [] - for on_epoch_end_hook in self.hooks_dict['on_epoch_end']: + for on_epoch_end_hook in self.hooks_dict["on_epoch_end"]: res_list.append(on_epoch_end_hook()) return res_list @@ -193,8 +191,8 @@ def __init__(self, conf=None, model=None, adaptor=None): model: Model to be quantized in this object. It should be neural compressor model. """ super(QuantizationAwareTrainingCallbacks, self).__init__(conf=conf, model=model) - self.register_hook('on_train_begin', adaptor._pre_hook_for_qat) - self.register_hook('on_train_end', adaptor._post_hook_for_qat) + self.register_hook("on_train_begin", adaptor._pre_hook_for_qat) + self.register_hook("on_train_end", adaptor._post_hook_for_qat) def __repr__(self): """Represent this class.""" @@ -222,16 +220,16 @@ def __init__(self, conf=None, model=None): def on_train_end(self): """Be called after the end of training.""" - for on_train_end_hook in self.hooks_dict['on_train_end']: + for on_train_end_hook in self.hooks_dict["on_train_end"]: on_train_end_hook() - if self.conf.framework == 'pytorch' and isinstance(self.model.model, torch.nn.Module): + if self.conf.framework == "pytorch" and isinstance(self.model.model, torch.nn.Module): get_sparsity_ratio(self.pruners, self.model) - elif self.conf.framework == 'keras' and isinstance(self.model.model, tf.keras.Model): + elif self.conf.framework == "keras" and isinstance(self.model.model, tf.keras.Model): get_sparsity_ratio_tf(self.pruners, self.model) def __repr__(self): """Return the class's string representation.""" - return 'Pruning Callbacks' + return "Pruning Callbacks" def generate_hooks(self): """Register hooks for pruning.""" @@ -242,14 +240,15 @@ def generate_hooks(self): def _generate_pruners(self): """Obtain Pruner objects.""" - if self.conf.framework == 'pytorch' and isinstance(self.model.model, torch.nn.Module): + if self.conf.framework == "pytorch" and isinstance(self.model.model, torch.nn.Module): # model auto slim related from .pruner.model_slim.pattern_analyzer import SelfMHASearcher + for info in self.pruners_info: - if 'mha' in info['pattern']: + if "mha" in info["pattern"]: # head pruning pa_obj = SelfMHASearcher(self.model.model) - modules, _ = pa_obj.search(split_qkv_ffn = False) + modules, _ = pa_obj.search(split_qkv_ffn=False) modules = pa_obj.obtain_mha_module(modules) modules = pa_obj.from_layer_name_to_object(modules) if len(modules) == 0: @@ -262,11 +261,12 @@ def _generate_pruners(self): logger.warning("one pruner hooks no layers, please have a check") self.pruners.append(get_pruner(info, modules)) - info['modules'] = [key for key in modules.keys()] - info['len_of_modules'] = len(info['modules']) + info["modules"] = [key for key in modules.keys()] + info["len_of_modules"] = len(info["modules"]) logger.info(info) - elif self.conf.framework == 'keras' and isinstance(self.model.model, tf.keras.Model): + elif self.conf.framework == "keras" and isinstance(self.model.model, tf.keras.Model): from tensorflow.python.ops.numpy_ops import np_config + np_config.enable_numpy_behavior() for info in self.pruners_info: # original pruning types, e.g NxM or N:M @@ -274,12 +274,12 @@ def _generate_pruners(self): if modules == {}: logger.warning("one pruner hooks no layers, please have a check") - self.pruners.append(get_pruner(info, modules, 'keras')) - info['modules'] = [key for key in modules.keys()] - info['len_of_modules'] = len(info['modules']) + self.pruners.append(get_pruner(info, modules, "keras")) + info["modules"] = [key for key in modules.keys()] + info["len_of_modules"] = len(info["modules"]) logger.info(info) else: - assert False, 'now only support {}'.format(PRUNERS.keys()) + assert False, "now only support {}".format(PRUNERS.keys()) class DistillationCallbacks(BaseCallbacks): @@ -320,11 +320,8 @@ def __init__(self, conf=None, model=None): def _parse_model_class(self, model): """Parse model class for getting framework.""" - from neural_compressor.model.tensorflow_model import ( - TensorflowBaseModel, - TensorflowModel, - TensorflowQATModel, - ) + from neural_compressor.model.tensorflow_model import TensorflowBaseModel, TensorflowModel, TensorflowQATModel + if isinstance(model, TensorflowQATModel): return type(model) if isinstance(model, TensorflowBaseModel): @@ -333,7 +330,7 @@ def _parse_model_class(self, model): def _on_step_begin(self, batch_id): """Operations called on the beginning of batches.""" - if self.criterion is not None and hasattr(self.criterion, 'clear_features'): + if self.criterion is not None and hasattr(self.criterion, "clear_features"): self.criterion.clear_features() def _on_after_compute_loss(self, input, student_output, student_loss, teacher_output=None): @@ -350,13 +347,10 @@ def _on_after_compute_loss(self, input, student_output, student_loss, teacher_ou """ if self.criterion is None: self.create_criterion() - assert self.criterion, \ - 'criterion must be set in yaml config file.' + assert self.criterion, "criterion must be set in yaml config file." if teacher_output is None: - assert self.teacher_model, 'teacher_model must be set.' - teacher_output = self.criterion.teacher_model_forward( - input, teacher_model=self.teacher_model._model - ) + assert self.teacher_model, "teacher_model must be set." + teacher_output = self.criterion.teacher_model_forward(input, teacher_model=self.teacher_model._model) return self.criterion.loss_cal_sloss(student_output, teacher_output, student_loss) def init_train_cfg(self): @@ -364,25 +358,29 @@ def init_train_cfg(self): if self._train_cfg is None: # train section of distillation section in yaml file should be configured. self._train_cfg = self.conf.criterion - assert self._train_cfg, "train field of distillation section in yaml file must " \ - "be configured for distillation if train_func is NOT set." + assert self._train_cfg, ( + "train field of distillation section in yaml file must " + "be configured for distillation if train_func is NOT set." + ) def create_criterion(self): """Create the criterion for training.""" self.init_train_cfg() if self.criterion is None: - assert self._train_cfg.config is not None, \ - "criterion part in train field of distillation section in yaml file " \ + assert self._train_cfg.config is not None, ( + "criterion part in train field of distillation section in yaml file " "must be configured for distillation if criterion is NOT set." + ) criterion_cfg = self._train_cfg.config - assert len(criterion_cfg) == 1, "There must be exactly one loss in " \ - "criterion part, instead got {} loss.".format(len(criterion_cfg)) + assert ( + len(criterion_cfg) == 1 + ), "There must be exactly one loss in " "criterion part, instead got {} loss.".format(len(criterion_cfg)) loss = [i for i in criterion_cfg.keys()][0] loss_cfg = criterion_cfg[loss] criterion_builder = Criterions(self.framework)[loss](loss_cfg) criterion_tuple = criterion_builder() if self.teacher_model and self.student_model: - if self.framework == 'tensorflow': # new, for tf + if self.framework == "tensorflow": # new, for tf teacher_model = self.teacher_model._model student_model = self.student_model._model else: # for pytorch and other frameworks @@ -402,8 +400,8 @@ def generate_hooks(self): Register necessary hooks for distillation pipeline. """ if not self.hooks_registered: - self.register_hook('on_step_begin', self._on_step_begin) - self.register_hook('on_after_compute_loss', self._on_after_compute_loss) + self.register_hook("on_step_begin", self._on_step_begin) + self.register_hook("on_after_compute_loss", self._on_after_compute_loss) self.hooks_registered = True @property @@ -451,7 +449,6 @@ def teacher_model(self, user_model): Another corner case is slim model of tensorflow, be careful of the name of model configured in yaml file, make sure the name is in supported slim model list. - """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") @@ -479,4 +476,4 @@ def train_cfg(self): def __repr__(self): """Class representation.""" - return 'Distillation Callbacks' + return "Distillation Callbacks" diff --git a/neural_compressor/compression/distillation/__init__.py b/neural_compressor/compression/distillation/__init__.py index 2f6ce778f97..f40a19949ce 100644 --- a/neural_compressor/compression/distillation/__init__.py +++ b/neural_compressor/compression/distillation/__init__.py @@ -1,4 +1,4 @@ -"""distillation init.""" +"""Distillation init.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -14,4 +14,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/neural_compressor/compression/distillation/criterions.py b/neural_compressor/compression/distillation/criterions.py index 2ae27cff594..7e4c2c4fb5b 100644 --- a/neural_compressor/compression/distillation/criterions.py +++ b/neural_compressor/compression/distillation/criterions.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Initialize critetion classes. Classes includes: @@ -23,14 +22,16 @@ """ from collections import Counter -from neural_compressor.utils.utility import LazyImport, singleton -from neural_compressor.utils import logger -from neural_compressor.adaptor.pytorch import pytorch_forward_wrapper import numpy as np -torch = LazyImport('torch') -tf = LazyImport('tensorflow') +from neural_compressor.adaptor.pytorch import pytorch_forward_wrapper +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport, singleton + +torch = LazyImport("torch") +tf = LazyImport("tensorflow") + @singleton class TensorflowCriterions(object): @@ -41,6 +42,7 @@ def __init__(self): self.criterions = {} self.criterions.update(TENSORFLOW_CRITERIONS) + @singleton class PyTorchCriterions(object): """Record criterions in PyTorchCriterions class.""" @@ -50,17 +52,23 @@ def __init__(self): self.criterions = {} self.criterions.update(PYTORCH_CRITERIONS) -framework_criterions = {"tensorflow": TensorflowCriterions, - "pytorch": PyTorchCriterions, - "pytorch_fx": PyTorchCriterions} + +framework_criterions = { + "tensorflow": TensorflowCriterions, + "pytorch": PyTorchCriterions, + "pytorch_fx": PyTorchCriterions, +} # user/model specific criterions will be registered here TENSORFLOW_CRITERIONS = {} -PYTORCH_CRITERIONS= {} +PYTORCH_CRITERIONS = {} + +registry_criterions = { + "tensorflow": TENSORFLOW_CRITERIONS, + "pytorch": PYTORCH_CRITERIONS, + "pytorch_fx": PYTORCH_CRITERIONS, +} -registry_criterions = {"tensorflow": TENSORFLOW_CRITERIONS, - "pytorch": PYTORCH_CRITERIONS, - "pytorch_fx": PYTORCH_CRITERIONS} class Criterions(object): """Integrate criterions of different framework.""" @@ -71,8 +79,7 @@ def __init__(self, framework): Args: framework (string): framework name. """ - assert framework in ("tensorflow", "pytorch", "pytorch_fx"), \ - "framework support tensorflow pytorch" + assert framework in ("tensorflow", "pytorch", "pytorch_fx"), "framework support tensorflow pytorch" self.criterions = framework_criterions[framework]().criterions def __getitem__(self, criterion_type): @@ -84,8 +91,7 @@ def __getitem__(self, criterion_type): Returns: cls: criterion class. """ - assert criterion_type in self.criterions.keys(), "only support criterions in {}".\ - format(self.criterions.keys()) + assert criterion_type in self.criterions.keys(), "only support criterions in {}".format(self.criterions.keys()) return self.criterions[criterion_type] @@ -96,9 +102,10 @@ def register(self, name, criterion_cls): name (string): criterion name/type. criterion_cls (string): criterion class. """ - assert name not in self.criterions.keys(), 'registered criterion name already exists.' + assert name not in self.criterions.keys(), "registered criterion name already exists." self.criterions.update({name: criterion_cls}) + def criterion_registry(criterion_type, framework): """Use to register criterion classes in registry_criterions. @@ -112,17 +119,17 @@ def criterion_registry(criterion_type, framework): def decorator_criterion(cls): """Decorate criterion class to check framework and criterion name.""" - for fw in [fwk.strip() for fwk in framework.split(',')]: - assert fw in [ - "tensorflow", - "pytorch"], "The framework support tensorflow pytorch" + for fw in [fwk.strip() for fwk in framework.split(",")]: + assert fw in ["tensorflow", "pytorch"], "The framework support tensorflow pytorch" if criterion_type in registry_criterions[fw].keys(): - raise ValueError('Cannot have two criterions with the same name') + raise ValueError("Cannot have two criterions with the same name") registry_criterions[fw][criterion_type] = cls return cls + return decorator_criterion + class KnowledgeDistillationFramework(object): """Knowledge Distillation Framework.""" @@ -156,11 +163,13 @@ def teacher_model(self, model): """Setter of teacher model.""" self._teacher_model = model + class KnowledgeDistillationLoss(KnowledgeDistillationFramework): """Initialize the KnowledgeDistillationLoss class.""" - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], - loss_weights=[0.5, 0.5], student_model=None, teacher_model=None): + def __init__( + self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None + ): """Initialize Knowledge Distillation Loss class. Args: @@ -171,16 +180,16 @@ def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], student_model (model, optional): student model. Defaults to None. teacher_model (model, optional): teacher model. Defaults to None. """ - super(KnowledgeDistillationLoss, self).__init__(student_model=student_model, - teacher_model=teacher_model) + super(KnowledgeDistillationLoss, self).__init__(student_model=student_model, teacher_model=teacher_model) self.teacher_outputs = None self.temperature = temperature self.loss_weights = loss_weights self.loss_types = loss_types self.teacher_student_loss = self.student_targets_loss = None - assert len(loss_weights) == len(loss_types) == 2, 'Wrong length for ' + \ - 'loss_weights or loss_types, should be 2.' - assert sum(loss_weights) == 1.0, 'Sum of loss_weights should be 1.0.' + assert len(loss_weights) == len(loss_types) == 2, ( + "Wrong length for " + "loss_weights or loss_types, should be 2." + ) + assert sum(loss_weights) == 1.0, "Sum of loss_weights should be 1.0." def teacher_model_forward(self, input, teacher_model=None): """Define parameters for teacher_model_forward function. @@ -192,8 +201,7 @@ def teacher_model_forward(self, input, teacher_model=None): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function teacher_model_forward ' - 'should be framework related.') + raise NotImplementedError("Function teacher_model_forward " "should be framework related.") def teacher_student_loss_cal(self, student_outputs, teacher_outputs): """Define parameters for teacher_student_loss_cal function. @@ -205,8 +213,7 @@ def teacher_student_loss_cal(self, student_outputs, teacher_outputs): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function teacher_student_loss_cal ' - 'should be framework related.') + raise NotImplementedError("Function teacher_student_loss_cal " "should be framework related.") def student_targets_loss_cal(self, student_outputs, targets): """Define parameters for student_targets_loss_cal function. @@ -218,8 +225,7 @@ def student_targets_loss_cal(self, student_outputs, targets): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function student_targets_loss_cal ' - 'should be framework related.') + raise NotImplementedError("Function student_targets_loss_cal " "should be framework related.") def loss_cal(self, student_outputs, targets): """Calculate loss of student model. @@ -253,12 +259,11 @@ def loss_cal_sloss(self, student_outputs, teacher_outputs, student_loss): student_out_ = student_outputs / self.temperature teacher_out_ = teacher_outputs / self.temperature distillation_loss = self.teacher_student_loss_cal(student_out_, teacher_out_) - distillation_loss *= self.temperature ** 2 + distillation_loss *= self.temperature**2 else: distillation_loss = 0 - self.loss = origin_loss * self.loss_weights[0] + \ - distillation_loss * self.loss_weights[1] + self.loss = origin_loss * self.loss_weights[0] + distillation_loss * self.loss_weights[1] return self.loss def __call__(self, student_outputs, targets): @@ -273,11 +278,13 @@ def __call__(self, student_outputs, targets): """ return self.loss_cal(student_outputs, targets) + class PyTorchKnowledgeDistillationLoss(KnowledgeDistillationLoss): """The PyTorchKnowledgeDistillationLoss class inherits from KnowledgeDistillationLoss.""" - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], - loss_weights=[0.5, 0.5], student_model=None, teacher_model=None): + def __init__( + self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None + ): """Initialize PyTorch Knowledge Distillation Loss class. Args: @@ -292,33 +299,37 @@ def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], NotImplementedError: NotImplementedError NotImplementedError: NotImplementedError """ - super(PyTorchKnowledgeDistillationLoss, self).__init__(temperature=temperature, - loss_types=loss_types, - loss_weights=loss_weights, - student_model=student_model, - teacher_model=teacher_model) + super(PyTorchKnowledgeDistillationLoss, self).__init__( + temperature=temperature, + loss_types=loss_types, + loss_weights=loss_weights, + student_model=student_model, + teacher_model=teacher_model, + ) if self.student_targets_loss is None: - if self.loss_types[0] == 'CE': + if self.loss_types[0] == "CE": self.student_targets_loss = torch.nn.CrossEntropyLoss() - elif self.loss_types[0] == 'MSE': + elif self.loss_types[0] == "MSE": self.student_targets_loss = torch.nn.MSELoss() else: - raise NotImplementedError('Now we only support CrossEntropyLoss and MSELoss ' - 'for loss of student model output with respect to targets.') - logger.info('student_targets_loss: {}, {}'.format(self.loss_types[0], \ - self.loss_weights[0])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss and MSELoss " + "for loss of student model output with respect to targets." + ) + logger.info("student_targets_loss: {}, {}".format(self.loss_types[0], self.loss_weights[0])) if self.teacher_student_loss is None: - if self.loss_types[1] == 'CE': + if self.loss_types[1] == "CE": self.teacher_student_loss = self.SoftCrossEntropy - elif self.loss_types[1] == 'KL': + elif self.loss_types[1] == "KL": self.teacher_student_loss = self.KullbackLeiblerDivergence - elif self.loss_types[1] == 'MSE': + elif self.loss_types[1] == "MSE": self.teacher_student_loss = torch.nn.MSELoss() else: - raise NotImplementedError('Now we only support CrossEntropyLoss KL Divergence' - ' and MSELoss for loss of student model output with respect to teacher model ouput.') - logger.info('teacher_student_loss: {}, {}'.format(self.loss_types[1], \ - self.loss_weights[1])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss KL Divergence" + " and MSELoss for loss of student model output with respect to teacher model ouput." + ) + logger.info("teacher_student_loss: {}, {}".format(self.loss_types[1], self.loss_weights[1])) def SoftCrossEntropy(self, logits, targets): """Return SoftCrossEntropy. @@ -332,7 +343,7 @@ def SoftCrossEntropy(self, logits, targets): """ log_prob = torch.nn.functional.log_softmax(logits, dim=-1) targets_prob = torch.nn.functional.softmax(targets, dim=-1) - return (- targets_prob * log_prob).sum(dim=-1).mean() + return (-targets_prob * log_prob).sum(dim=-1).mean() def KullbackLeiblerDivergence(self, logits, targets): """Return KullbackLeiblerDivergence. @@ -362,8 +373,9 @@ def teacher_model_forward(self, input, teacher_model=None, device=None): outputs = None if self.loss_weights[1] > 0: model = self.teacher_model if teacher_model is None else teacher_model - assert isinstance(model, torch.nn.Module), \ - 'Teacher model should be a torch Module instead of {}'.format(type(model)) + assert isinstance(model, torch.nn.Module), "Teacher model should be a torch Module instead of {}".format( + type(model) + ) model.eval() try: model_device = next(model.parameters()).device @@ -388,7 +400,7 @@ def teacher_student_loss_cal(self, student_outputs, teacher_outputs): Returns: tensor: loss """ - assert self.teacher_student_loss, 'teacher_student_loss not specified.' + assert self.teacher_student_loss, "teacher_student_loss not specified." return self.teacher_student_loss(student_outputs, teacher_outputs) def student_targets_loss_cal(self, student_outputs, targets): @@ -401,10 +413,11 @@ def student_targets_loss_cal(self, student_outputs, targets): Returns: tensor: loss """ - assert self.student_targets_loss, 'student_targets_loss not specified.' + assert self.student_targets_loss, "student_targets_loss not specified." return self.student_targets_loss(student_outputs, targets) -@criterion_registry('KnowledgeDistillationLoss', 'pytorch') + +@criterion_registry("KnowledgeDistillationLoss", "pytorch") class PyTorchKnowledgeDistillationLossWrapper(object): """PyTorchKnowledgeDistillationLossWrapper wraps PyTorchKnowledgeDistillationLoss.""" @@ -418,22 +431,22 @@ def __init__(self, param_dict): def _param_check(self): param_dict = self.param_dict - _params = ['temperature', 'loss_types', 'loss_weights'] - assert all(key in param_dict for key in _params),\ - 'Keys {} must be in input parameters.'.format(_params) - assert param_dict['temperature'] > 0.0,\ - 'Value of temperature must be positive.' - assert len(param_dict['loss_types']) == len(param_dict['loss_weights']),\ - 'Length of loss_types and loss_weights must be the same.' - assert all(type(param_dict[k]) in [list, tuple] \ - for k in ['loss_types', 'loss_weights']),\ - 'Type of loss_types and loss_weights must be list or tuple.' - assert all(any(isinstance(e, t) for t in [str, torch.nn.Module]) \ - for e in param_dict['loss_types']), \ - 'Type of loss_types element must be str or torch Module.' - assert all(0. <= e <= 1. for e in param_dict['loss_weights']) and \ - abs(sum(param_dict['loss_weights']) - 1.0) < 1e-9, \ - 'Element of loss_weights must be in interval [0, 1] and summed to 1.0.' + _params = ["temperature", "loss_types", "loss_weights"] + assert all(key in param_dict for key in _params), "Keys {} must be in input parameters.".format(_params) + assert param_dict["temperature"] > 0.0, "Value of temperature must be positive." + assert len(param_dict["loss_types"]) == len( + param_dict["loss_weights"] + ), "Length of loss_types and loss_weights must be the same." + assert all( + type(param_dict[k]) in [list, tuple] for k in ["loss_types", "loss_weights"] + ), "Type of loss_types and loss_weights must be list or tuple." + assert all( + any(isinstance(e, t) for t in [str, torch.nn.Module]) for e in param_dict["loss_types"] + ), "Type of loss_types element must be str or torch Module." + assert ( + all(0.0 <= e <= 1.0 for e in param_dict["loss_weights"]) + and abs(sum(param_dict["loss_weights"]) - 1.0) < 1e-9 + ), "Element of loss_weights must be in interval [0, 1] and summed to 1.0." new_dict = {} for k in _params: new_dict[k] = param_dict[k] @@ -448,11 +461,13 @@ def __call__(self, **kwargs): """ return PyTorchKnowledgeDistillationLoss, self._param_check() + class TensorflowKnowledgeDistillationLoss(KnowledgeDistillationLoss): """The TensorflowKnowledgeDistillationLoss class inherits from KnowledgeDistillationLoss.""" - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], - loss_weights=[0.5, 0.5], student_model=None, teacher_model=None): + def __init__( + self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None + ): """Initialize Tensorflow Knowledge Distillation Loss class. Args: @@ -467,29 +482,33 @@ def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], NotImplementedError: NotImplementedError NotImplementedError: NotImplementedError """ - super(TensorflowKnowledgeDistillationLoss, self).__init__(temperature=temperature, - loss_types=loss_types, - loss_weights=loss_weights, - student_model=student_model, - teacher_model=teacher_model) + super(TensorflowKnowledgeDistillationLoss, self).__init__( + temperature=temperature, + loss_types=loss_types, + loss_weights=loss_weights, + student_model=student_model, + teacher_model=teacher_model, + ) if self.student_targets_loss is None: - if self.loss_types[0] == 'CE': + if self.loss_types[0] == "CE": self.student_targets_loss = tf.keras.losses.SparseCategoricalCrossentropy() else: - raise NotImplementedError('Now we only support CrossEntropyLoss ' - 'for loss of student model output with respect to targets.') - logger.info('student_targets_loss: {}, {}'.format(self.loss_types[0], \ - self.loss_weights[0])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss " "for loss of student model output with respect to targets." + ) + logger.info("student_targets_loss: {}, {}".format(self.loss_types[0], self.loss_weights[0])) if self.teacher_student_loss is None: - if self.loss_types[1] == 'CE': + if self.loss_types[1] == "CE": self.teacher_student_loss = self.SoftCrossEntropy - elif self.loss_types[1] == 'KL': + elif self.loss_types[1] == "KL": self.teacher_student_loss = tf.keras.losses.KLDivergence() else: - raise NotImplementedError('Now we only support CrossEntropyLoss' - ' for loss of student model output with respect to teacher model ouput.') - logger.info('teacher_student_loss: {}, {}'.format(self.loss_types[1], \ - self.loss_weights[1])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss" + " for loss of student model output with respect to teacher model ouput." + ) + logger.info("teacher_student_loss: {}, {}".format(self.loss_types[1], self.loss_weights[1])) + def SoftCrossEntropy(self, targets, logits): """Return SoftCrossEntropy. @@ -502,7 +521,7 @@ def SoftCrossEntropy(self, targets, logits): """ log_prob = tf.math.log(logits) targets_prob = targets - return tf.math.reduce_mean(tf.math.reduce_sum(- targets_prob * log_prob, axis=-1), axis=-1) + return tf.math.reduce_mean(tf.math.reduce_sum(-targets_prob * log_prob, axis=-1), axis=-1) def teacher_model_forward(self, input, teacher_model=None): """Teacher model forward. @@ -537,7 +556,7 @@ def teacher_student_loss_cal(self, student_outputs, teacher_outputs): Returns: tensor: loss """ - assert self.teacher_student_loss, 'teacher_student_loss not specified.' + assert self.teacher_student_loss, "teacher_student_loss not specified." return self.teacher_student_loss(teacher_outputs, student_outputs) def student_targets_loss_cal(self, student_outputs, targets): @@ -550,7 +569,7 @@ def student_targets_loss_cal(self, student_outputs, targets): Returns: tensor: loss """ - assert self.student_targets_loss, 'student_targets_loss not specified.' + assert self.student_targets_loss, "student_targets_loss not specified." return self.student_targets_loss(targets, student_outputs) def __call__(self, student_outputs, targets): @@ -568,7 +587,8 @@ def __call__(self, student_outputs, targets): targets = tmp return self.loss_cal(student_outputs, targets) -@criterion_registry('KnowledgeDistillationLoss', 'tensorflow') + +@criterion_registry("KnowledgeDistillationLoss", "tensorflow") class TensorflowKnowledgeDistillationLossWrapper(object): """TensorflowKnowledgeDistillationLossWrapper wraps TensorflowKnowledgeDistillationLoss.""" @@ -582,22 +602,22 @@ def __init__(self, param_dict): def _param_check(self): param_dict = self.param_dict - _params = ['temperature', 'loss_types', 'loss_weights'] - assert all(key in param_dict for key in _params),\ - 'Keys {} must be in input parameters.'.format(_params) - assert param_dict['temperature'] > 0.0,\ - 'Value of temperature must be positive.' - assert len(param_dict['loss_types']) == len(param_dict['loss_weights']),\ - 'Length of loss_types and loss_weights must be the same.' - assert all(type(param_dict[k]) in [list, tuple] \ - for k in ['loss_types', 'loss_weights']),\ - 'Type of loss_types and loss_weights must be list or tuple.' - assert all(any(isinstance(e, t) for t in [str, tf.keras]) \ - for e in param_dict['loss_types']), \ - 'Type of loss_types element must be str or torch Module.' - assert all(0. <= e <= 1. for e in param_dict['loss_weights']) and \ - abs(sum(param_dict['loss_weights']) - 1.0) < 1e-9, \ - 'Element of loss_weights must be in interval [0, 1] and summed to 1.0.' + _params = ["temperature", "loss_types", "loss_weights"] + assert all(key in param_dict for key in _params), "Keys {} must be in input parameters.".format(_params) + assert param_dict["temperature"] > 0.0, "Value of temperature must be positive." + assert len(param_dict["loss_types"]) == len( + param_dict["loss_weights"] + ), "Length of loss_types and loss_weights must be the same." + assert all( + type(param_dict[k]) in [list, tuple] for k in ["loss_types", "loss_weights"] + ), "Type of loss_types and loss_weights must be list or tuple." + assert all( + any(isinstance(e, t) for t in [str, tf.keras]) for e in param_dict["loss_types"] + ), "Type of loss_types element must be str or torch Module." + assert ( + all(0.0 <= e <= 1.0 for e in param_dict["loss_weights"]) + and abs(sum(param_dict["loss_weights"]) - 1.0) < 1e-9 + ), "Element of loss_weights must be in interval [0, 1] and summed to 1.0." new_dict = {} for k in _params: new_dict[k] = param_dict[k] @@ -612,11 +632,13 @@ def __call__(self, **kwargs): """ return TensorflowKnowledgeDistillationLoss, self._param_check() + class TensorflowKnowledgeDistillationLossExternal(KnowledgeDistillationLoss): """TensorflowKnowledgeDistillationLossExternal inherits from KnowledgeDistillationLoss.""" - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], - loss_weights=[0.5, 0.5], student_model=None, teacher_model=None): + def __init__( + self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None + ): """Initialize Tensorflow Knowledge Distillation Loss class. Args: @@ -632,29 +654,31 @@ def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], NotImplementedError: NotImplementedError """ super(TensorflowKnowledgeDistillationLossExternal, self).__init__( - temperature=temperature, - loss_types=loss_types, - loss_weights=loss_weights, - student_model=student_model, - teacher_model=teacher_model) + temperature=temperature, + loss_types=loss_types, + loss_weights=loss_weights, + student_model=student_model, + teacher_model=teacher_model, + ) if self.student_targets_loss is None: - if self.loss_types[0] == 'CE': + if self.loss_types[0] == "CE": self.student_targets_loss = tf.keras.losses.CategoricalCrossentropy() else: - raise NotImplementedError('Now we only support CrossEntropyLoss ' - 'for loss of student model output with respect to targets.') - logger.info('student_targets_loss: {}, {}'.format(self.loss_types[0], \ - self.loss_weights[0])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss " "for loss of student model output with respect to targets." + ) + logger.info("student_targets_loss: {}, {}".format(self.loss_types[0], self.loss_weights[0])) if self.teacher_student_loss is None: - if self.loss_types[1] == 'CE': + if self.loss_types[1] == "CE": self.teacher_student_loss = tf.keras.losses.CategoricalCrossentropy() - elif self.loss_types[1] == 'KL': + elif self.loss_types[1] == "KL": self.teacher_student_loss = tf.keras.losses.KLDivergence() else: - raise NotImplementedError('Now we only support CrossEntropyLoss' - ' for loss of student model output with respect to teacher model ouput.') - logger.info('teacher_student_loss: {}, {}'.format(self.loss_types[1], \ - self.loss_weights[1])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss" + " for loss of student model output with respect to teacher model ouput." + ) + logger.info("teacher_student_loss: {}, {}".format(self.loss_types[1], self.loss_weights[1])) def teacher_model_forward(self, input, teacher_model=None): """Teacher model forward. @@ -689,7 +713,7 @@ def teacher_student_loss_cal(self, student_outputs, teacher_outputs): Returns: tensor: loss """ - assert self.teacher_student_loss, 'teacher_student_loss not specified.' + assert self.teacher_student_loss, "teacher_student_loss not specified." return self.teacher_student_loss(teacher_outputs, student_outputs) def student_targets_loss_cal(self, student_outputs, targets): @@ -702,15 +726,22 @@ def student_targets_loss_cal(self, student_outputs, targets): Returns: tensor: loss """ - assert self.student_targets_loss, 'student_targets_loss not specified.' + assert self.student_targets_loss, "student_targets_loss not specified." return self.student_targets_loss(targets, student_outputs) class IntermediateLayersKnowledgeDistillationLoss(KnowledgeDistillationFramework): """The IntermediateLayersKnowledgeDistillationLoss class inherits from KnowledgeDistillationLoss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, - add_origin_loss=False, student_model=None, teacher_model=None): + def __init__( + self, + layer_mappings=[], + loss_types=None, + loss_weights=None, + add_origin_loss=False, + student_model=None, + teacher_model=None, + ): """Initialize PyTorch Knowledge Distillation Loss class. Args: @@ -726,41 +757,42 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, NotImplementedError: NotImplementedError """ super(IntermediateLayersKnowledgeDistillationLoss, self).__init__( - student_model=student_model, - teacher_model=teacher_model - ) + student_model=student_model, teacher_model=teacher_model + ) self.student_features = {} self.teacher_features = {} self.layer_mappings = [] self.layer_output_process = [] for item in layer_mappings: - assert len(item) == 1 or len(item) == 2, 'Each item in layer_mappings ' + \ - 'should be a list or tuple containing 1 list or 2 lists, with format ' + \ - '[(layer_name, )] or [(student_layer_name, ), (teacher_layer_name, )], ' + \ - 'first one is the abbreviation for cases that student_layer_name and teacher_layer_name ' + \ - 'are the same. The length of tuples in the list could be either 1 like previous cases, ' + \ - 'or 2, like [(layer_name, layer_output_process)] or ' + \ - '[(student_layer_name, student_layer_output_process), ' + \ - '(teacher_layer_name, teacher_layer_output_process)].' + \ - 'For example, with 2 tuples of length 2, element looks like ' + \ - '[(\'student_model.layer1.attention\', \'1\'), (\'teacher_model.layer1.attention\', \'1\')], ' + \ - 'where \'student_model.layer1.attention\' and \'teacher_model.layer1.attention\' ' + \ - 'represent attention module on layer 1 of the student model and the ' + \ - 'teacher model respectively, two \'1\' represent the index to retrieve the ' + \ - 'desired output from the defined module\'s outputs, in this case, the above ' + \ - 'two module\'s outputs are lists, with desired output in index 1 of these ' + \ - 'lists, in cases of dict output, retrieving can be done by defining the ' + \ - 'corresponding key, in cases of module\'s output is the desired output, ' + \ - 'just adopt the format such as [(\'student_model.layer1.output' + \ - '.output\', ), (\'teacher_model.layer1.output\', )].' + assert len(item) == 1 or len(item) == 2, ( + "Each item in layer_mappings " + + "should be a list or tuple containing 1 list or 2 lists, with format " + + "[(layer_name, )] or [(student_layer_name, ), (teacher_layer_name, )], " + + "first one is the abbreviation for cases that student_layer_name and teacher_layer_name " + + "are the same. The length of tuples in the list could be either 1 like previous cases, " + + "or 2, like [(layer_name, layer_output_process)] or " + + "[(student_layer_name, student_layer_output_process), " + + "(teacher_layer_name, teacher_layer_output_process)]." + + "For example, with 2 tuples of length 2, element looks like " + + "[('student_model.layer1.attention', '1'), ('teacher_model.layer1.attention', '1')], " + + "where 'student_model.layer1.attention' and 'teacher_model.layer1.attention' " + + "represent attention module on layer 1 of the student model and the " + + "teacher model respectively, two '1' represent the index to retrieve the " + + "desired output from the defined module's outputs, in this case, the above " + + "two module's outputs are lists, with desired output in index 1 of these " + + "lists, in cases of dict output, retrieving can be done by defining the " + + "corresponding key, in cases of module's output is the desired output, " + + "just adopt the format such as [('student_model.layer1.output" + + ".output', ), ('teacher_model.layer1.output', )]." + ) if len(item) == 1: item = [item[0], item[0]] for i in range(len(item)): if not isinstance(item[i], (list, tuple)): - item[i] = [item[i], ''] + item[i] = [item[i], ""] elif len(item[i]) == 1: - item[i] = [item[i][0], ''] + item[i] = [item[i][0], ""] else: assert len(item[i]) == 2, "Expect {} to be a tuple of length 1 or 2.".format(item[i]) self.layer_mappings.append((item[0][0], item[1][0])) @@ -769,18 +801,21 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, self.student_features[student_layer] = [] self.teacher_features[teacher_layer] = [] - self.loss_weights = [1.0 / len(layer_mappings)] * len(layer_mappings) \ - if (loss_weights is None or loss_weights == []) else loss_weights - self.loss_types = ['MSE'] * len(layer_mappings) \ - if (loss_types is None or loss_types == []) else loss_types + self.loss_weights = ( + [1.0 / len(layer_mappings)] * len(layer_mappings) + if (loss_weights is None or loss_weights == []) + else loss_weights + ) + self.loss_types = ["MSE"] * len(layer_mappings) if (loss_types is None or loss_types == []) else loss_types self.add_origin_loss = add_origin_loss self.loss_funcs = [] self.feature_matchers = None self.init_loss_funcs() - assert len(self.layer_mappings) == len(self.loss_weights) == len(self.loss_types), \ - f'Wrong length for layer_mappings:{self.layer_mappings}, ' + \ - f'loss_weights:{self.loss_weights} or loss_types:{self.loss_types}, ' + \ - 'all should be the same.' + assert len(self.layer_mappings) == len(self.loss_weights) == len(self.loss_types), ( + f"Wrong length for layer_mappings:{self.layer_mappings}, " + + f"loss_weights:{self.loss_weights} or loss_types:{self.loss_types}, " + + "all should be the same." + ) def init_loss_funcs(self): """Init loss funcs. @@ -788,8 +823,7 @@ def init_loss_funcs(self): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function init_loss_funcs ' - 'should be framework related.') + raise NotImplementedError("Function init_loss_funcs " "should be framework related.") def init_feature_matcher(self, student_feature, teacher_feature): """Init feature matcher. @@ -797,8 +831,7 @@ def init_feature_matcher(self, student_feature, teacher_feature): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function init_feature_matcher ' - 'should be framework related.') + raise NotImplementedError("Function init_feature_matcher " "should be framework related.") def teacher_model_forward(self, input, teacher_model=None): """Teacher model forward. @@ -806,8 +839,7 @@ def teacher_model_forward(self, input, teacher_model=None): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function teacher_model_forward ' - 'should be framework related.') + raise NotImplementedError("Function teacher_model_forward " "should be framework related.") def loss_cal(self): """Calculate loss. @@ -815,7 +847,7 @@ def loss_cal(self): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function loss_cal should be framework related.') + raise NotImplementedError("Function loss_cal should be framework related.") def loss_cal_sloss(self, student_outputs, teacher_outputs, student_loss): """Calculate all losses between student model and teacher model. @@ -842,13 +874,18 @@ def __call__(self, student_outputs, targets): return 0 -class PyTorchIntermediateLayersKnowledgeDistillationLoss( - IntermediateLayersKnowledgeDistillationLoss - ): +class PyTorchIntermediateLayersKnowledgeDistillationLoss(IntermediateLayersKnowledgeDistillationLoss): """PyTorch Intermediate Layers Knowledge Distillation Loss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, - add_origin_loss=False, student_model=None, teacher_model=None): + def __init__( + self, + layer_mappings=[], + loss_types=None, + loss_weights=None, + add_origin_loss=False, + student_model=None, + teacher_model=None, + ): """Initialize PyTorch Knowledge Distillation Loss class. Args: @@ -864,12 +901,13 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, NotImplementedError: NotImplementedError """ super(PyTorchIntermediateLayersKnowledgeDistillationLoss, self).__init__( - layer_mappings=layer_mappings, - loss_types=loss_types, - loss_weights=loss_weights, - add_origin_loss=add_origin_loss, - student_model=student_model, - teacher_model=teacher_model) + layer_mappings=layer_mappings, + loss_types=loss_types, + loss_weights=loss_weights, + add_origin_loss=add_origin_loss, + student_model=student_model, + teacher_model=teacher_model, + ) self.register_hooks_for_models() def register_hooks_for_models(self): @@ -879,33 +917,28 @@ def register_hooks_for_models(self): AttributeError: AttributeError """ from neural_compressor.experimental.common import torch_utils - def register_model_forward_hook(model, path, output_process='', student=False): + + def register_model_forward_hook(model, path, output_process="", student=False): module = model - if path != '': - nodes = path.split('.') + if path != "": + nodes = path.split(".") for node in nodes: try: module = module.__getattr__(node) except: - raise AttributeError('There is no path {} in the model.'.format(path)) - return module.register_forward_hook( - torch_utils.get_activation(path, output_process, student) - ) + raise AttributeError("There is no path {} in the model.".format(path)) + return module.register_forward_hook(torch_utils.get_activation(path, output_process, student)) - assert isinstance(self.student_model, torch.nn.Module) and \ - isinstance(self.teacher_model, torch.nn.Module), \ - 'Expect student_model and teacher_model to be an torch.nn.Module object, ' + \ - 'got student_model:{} and teacher_model:{}'.format( - type(self.student_model), type(self.teacher_model) - ) + assert isinstance(self.student_model, torch.nn.Module) and isinstance(self.teacher_model, torch.nn.Module), ( + "Expect student_model and teacher_model to be an torch.nn.Module object, " + + "got student_model:{} and teacher_model:{}".format(type(self.student_model), type(self.teacher_model)) + ) self.hook_handles = [] for idx in range(len(self.layer_mappings)): student_layer, teacher_layer = self.layer_mappings[idx] student_output_process, teacher_output_process = self.layer_output_process[idx] - st_handle = register_model_forward_hook(self.student_model, student_layer, - student_output_process, True) - te_handle = register_model_forward_hook(self.teacher_model, teacher_layer, - teacher_output_process) + st_handle = register_model_forward_hook(self.student_model, student_layer, student_output_process, True) + te_handle = register_model_forward_hook(self.teacher_model, teacher_layer, teacher_output_process) torch_utils.STUDENT_FEATURES = self.student_features torch_utils.TEACHER_FEATURES = self.teacher_features self.hook_handles.extend([st_handle, te_handle]) @@ -918,16 +951,18 @@ def remove_all_hooks(self): def init_loss_funcs(self): """Init loss funcs.""" for loss_type in self.loss_types: - if loss_type == 'MSE': + if loss_type == "MSE": loss_func = torch.nn.MSELoss() - elif loss_type == 'KL': + elif loss_type == "KL": loss_func = torch.nn.KLDivLoss() - elif loss_type == 'L1': + elif loss_type == "L1": loss_func = torch.nn.L1Loss() else: - raise NotImplementedError(f'Unsupported loss type {loss_type}, supported loss is ' \ - 'MSE for mean squared error, KL for Kullback-Leibler divergence and ' \ - 'L1 for L1 loss.') + raise NotImplementedError( + f"Unsupported loss type {loss_type}, supported loss is " + "MSE for mean squared error, KL for Kullback-Leibler divergence and " + "L1 for L1 loss." + ) self.loss_funcs.append(loss_func) def init_feature_matcher(self, student_feature, teacher_feature): @@ -940,12 +975,14 @@ def init_feature_matcher(self, student_feature, teacher_feature): Returns: pytorch_linear_feature_matcher """ + class pytorch_linear_feature_matcher(torch.nn.Module): def __init__(self, src_shape, dst_shape): super().__init__() shape_diff = [abs(i - j) for i, j in zip(dst_shape, src_shape)] - assert shape_diff.count(0) == len(shape_diff) - 1, 'Expect only one ' + \ - 'different dimension between student_feature and teacher_feature.' + assert shape_diff.count(0) == len(shape_diff) - 1, ( + "Expect only one " + "different dimension between student_feature and teacher_feature." + ) self.dim_idx = np.argmax(shape_diff) self.dense = torch.nn.Linear(src_shape[self.dim_idx], dst_shape[self.dim_idx]) @@ -957,19 +994,20 @@ def forward(self, input): output = torch.transpose(output, self.dim_idx, -1) return output - assert isinstance(student_feature, (torch.Tensor, np.ndarray)) and \ - isinstance(teacher_feature, (torch.Tensor, np.ndarray)), \ - 'Expect student_feature and teacher_feature to be torch.Tensor or np.ndarray ' + \ - 'objects, got student_feature a {st} object, teacher_feature a {tt} object.'.format( + assert isinstance(student_feature, (torch.Tensor, np.ndarray)) and isinstance( + teacher_feature, (torch.Tensor, np.ndarray) + ), ( + "Expect student_feature and teacher_feature to be torch.Tensor or np.ndarray " + + "objects, got student_feature a {st} object, teacher_feature a {tt} object.".format( st=type(student_feature), tt=type(teacher_feature) ) - assert len(student_feature.shape) == len(teacher_feature.shape), \ - 'Expect student_feature and teacher_feature to have the same length of shape, ' + \ - 'got student_feature of {}, teacher_feature of {}.'.format( - student_feature.shape, teacher_feature.shape - ) + ) + assert len(student_feature.shape) == len(teacher_feature.shape), ( + "Expect student_feature and teacher_feature to have the same length of shape, " + + "got student_feature of {}, teacher_feature of {}.".format(student_feature.shape, teacher_feature.shape) + ) if sum([abs(i - j) for i, j in zip(student_feature.shape, teacher_feature.shape)]) == 0: - return lambda x:x + return lambda x: x return pytorch_linear_feature_matcher(student_feature.shape, teacher_feature.shape) def teacher_model_forward(self, input, teacher_model=None, device=None): @@ -983,8 +1021,9 @@ def teacher_model_forward(self, input, teacher_model=None, device=None): NotImplementedError: NotImplementedError """ model = self.teacher_model if teacher_model is None else teacher_model - assert isinstance(model, torch.nn.Module), \ - 'Teacher model should be a torch Module instead of {}'.format(type(model)) + assert isinstance(model, torch.nn.Module), "Teacher model should be a torch Module instead of {}".format( + type(model) + ) model.eval() try: model_device = next(model.parameters()).device @@ -1029,28 +1068,32 @@ def loss_cal(self): student_layer, teacher_layer = self.layer_mappings[idx] student_feature = self.student_features[student_layer] teacher_feature = self.teacher_features[teacher_layer] - assert len(student_feature) == len(teacher_feature) and len(student_feature) > 0, \ - 'Lengths of student_feature and teacher_feature should be the same and larger than 0, ' + \ - 'instead of {} and {}, '.format(len(student_feature), len(teacher_feature)) + \ - 'please run student and teacher model forward properly before calculating the loss.' + assert len(student_feature) == len(teacher_feature) and len(student_feature) > 0, ( + "Lengths of student_feature and teacher_feature should be the same and larger than 0, " + + "instead of {} and {}, ".format(len(student_feature), len(teacher_feature)) + + "please run student and teacher model forward properly before calculating the loss." + ) + def device2feature_gen(features): devices_count = Counter([f.device for f in features]) - assert [1] * len(devices_count) == [_ for _ in devices_count.values()], \ - 'Currently only support 1 feature tensor per device, ' + \ - 'got {}.'.format(devices_count) - return {feat.device:feat for feat in features} + assert [1] * len(devices_count) == [ + _ for _ in devices_count.values() + ], "Currently only support 1 feature tensor per device, " + "got {}.".format(devices_count) + return {feat.device: feat for feat in features} student_feature = device2feature_gen(student_feature) teacher_feature = device2feature_gen(teacher_feature) - assert student_feature.keys() == teacher_feature.keys(), \ - 'Features from student model have different devices with that of ' + \ - 'teacher model, got student: {}, teacher: {}.'.format(student_feature.keys(), - teacher_feature.keys()) - output_device = torch.device('cuda:0') \ - if torch.device('cuda:0') in student_feature.keys() else torch.device('cpu') + assert student_feature.keys() == teacher_feature.keys(), ( + "Features from student model have different devices with that of " + + "teacher model, got student: {}, teacher: {}.".format(student_feature.keys(), teacher_feature.keys()) + ) + output_device = ( + torch.device("cuda:0") if torch.device("cuda:0") in student_feature.keys() else torch.device("cpu") + ) if init_feature_matchers: - feature_matcher = self.init_feature_matcher(student_feature[output_device], - teacher_feature[output_device]) + feature_matcher = self.init_feature_matcher( + student_feature[output_device], teacher_feature[output_device] + ) self.feature_matchers[student_layer] = feature_matcher tmp_loss = 0 @@ -1059,15 +1102,14 @@ def device2feature_gen(features): teacher_feature[device] = teacher_feature[device].to(output_device) stfeat, tefeat = student_feature[device], teacher_feature[device] stfeat = self.feature_matchers[student_layer](stfeat) - if self.loss_types[idx] == 'KL': - check_is_not_prob = \ - lambda x:(torch.abs(x.sum(dim=-1) - 1.0) > 0.2).any().item() + if self.loss_types[idx] == "KL": + check_is_not_prob = lambda x: (torch.abs(x.sum(dim=-1) - 1.0) > 0.2).any().item() if isinstance(self.feature_matchers[student_layer], torch.nn.Module): stfeat = torch.nn.LogSoftmax(dim=-1)(stfeat) else: if check_is_not_prob(stfeat): stfeat = torch.softmax(stfeat, dim=-1) - stfeat = torch.log(stfeat+1e-9) + stfeat = torch.log(stfeat + 1e-9) if check_is_not_prob(tefeat): tefeat = torch.softmax(tefeat, dim=-1) tmp_loss += self.loss_funcs[idx](stfeat, tefeat) * self.loss_weights[idx] @@ -1075,7 +1117,8 @@ def device2feature_gen(features): self.clear_features() return self.loss -@criterion_registry('IntermediateLayersKnowledgeDistillationLoss', 'pytorch') + +@criterion_registry("IntermediateLayersKnowledgeDistillationLoss", "pytorch") class PyTorchIntermediateLayersKnowledgeDistillationLossWrapper(object): """PyTorch Intermediate Layers Knowledge Distillation Loss Wrapper.""" @@ -1089,49 +1132,50 @@ def __init__(self, param_dict): def _param_check(self): param_dict = self.param_dict - _params = ['layer_mappings', 'loss_types', 'loss_weights', 'add_origin_loss'] - layer_mappings = param_dict['layer_mappings'] - if 'loss_types' not in param_dict or param_dict['loss_types'] == []: - param_dict['loss_types'] = ['MSE'] * len(layer_mappings) - if 'loss_weights' not in param_dict or param_dict['loss_weights'] == []: - param_dict['loss_weights'] = [1.0 / len(layer_mappings)] * len(layer_mappings) - if 'add_origin_loss' not in param_dict: - param_dict['add_origin_loss'] = False - assert 'layer_mappings' in param_dict, \ - 'Key layer_mappings must be in input parameters.' - assert all(type(param_dict[k]) in [list, tuple] \ - for k in ['layer_mappings', 'loss_types', 'loss_weights']), \ - 'Type of loss_types and loss_weights must be list or tuple.' - assert isinstance(param_dict['add_origin_loss'], bool), \ - 'Type of add_origin_loss should be bool.' - assert len(param_dict['layer_mappings']) == \ - len(param_dict['loss_types']) == len(param_dict['loss_weights']),\ - 'Length of layer_mappings, loss_types and loss_weights must be the same.' - assert all(type(it) in [list, tuple] and (len(it) == 1 or len(it) == 2) \ - for it in param_dict['layer_mappings']), \ - 'Each item in layer_mappings should be a list containing 1 tuple or 2 tuples, with format ' + \ - '[(layer_name, )] or [(student_layer_name, ), (teacher_layer_name, )], ' + \ - 'first one is the abbreviation for cases that student_layer_name and teacher_layer_name ' + \ - 'are the same. The length of tuples in the list could be either 1 like previous cases, ' + \ - 'or 2, like [(layer_name, layer_output_process)] or ' + \ - '[(student_layer_name, student_layer_output_process), ' + \ - '(teacher_layer_name, teacher_layer_output_process)].' + \ - 'For example, with 2 tuples of length 2, element looks like ' + \ - '[(\'student_model.layer1.attention\', \'1\'), (\'teacher_model.layer1.attention\', \'1\')], ' + \ - 'where \'student_model.layer1.attention\' and \'teacher_model.layer1.attention\' ' + \ - 'represent attention module on layer 1 of the student model and the ' + \ - 'teacher model respectively, two \'1\' represent the index to retrieve the ' + \ - 'desired output from the defined module\'s outputs, in this case, the above ' + \ - 'two module\'s outputs are lists, with desired output in index 1 of these ' + \ - 'lists, in cases of dict output, retrieving can be done by defining the ' + \ - 'corresponding key, in cases of module\'s output is the desired output, ' + \ - 'just adopt the format such as [(\'student_model.layer1.output' + \ - '.output\', ), (\'teacher_model.layer1.output\', )].' - assert all(any(isinstance(e, t) for t in [str, torch.nn.Module]) \ - for e in param_dict['loss_types']), \ - 'Type of loss_types element must be str or torch Module.' - assert all(0. <= e <= 1. for e in param_dict['loss_weights']), \ - 'Element of loss_weights must be in interval [0, 1].' + _params = ["layer_mappings", "loss_types", "loss_weights", "add_origin_loss"] + layer_mappings = param_dict["layer_mappings"] + if "loss_types" not in param_dict or param_dict["loss_types"] == []: + param_dict["loss_types"] = ["MSE"] * len(layer_mappings) + if "loss_weights" not in param_dict or param_dict["loss_weights"] == []: + param_dict["loss_weights"] = [1.0 / len(layer_mappings)] * len(layer_mappings) + if "add_origin_loss" not in param_dict: + param_dict["add_origin_loss"] = False + assert "layer_mappings" in param_dict, "Key layer_mappings must be in input parameters." + assert all( + type(param_dict[k]) in [list, tuple] for k in ["layer_mappings", "loss_types", "loss_weights"] + ), "Type of loss_types and loss_weights must be list or tuple." + assert isinstance(param_dict["add_origin_loss"], bool), "Type of add_origin_loss should be bool." + assert ( + len(param_dict["layer_mappings"]) == len(param_dict["loss_types"]) == len(param_dict["loss_weights"]) + ), "Length of layer_mappings, loss_types and loss_weights must be the same." + assert all( + type(it) in [list, tuple] and (len(it) == 1 or len(it) == 2) for it in param_dict["layer_mappings"] + ), ( + "Each item in layer_mappings should be a list containing 1 tuple or 2 tuples, with format " + + "[(layer_name, )] or [(student_layer_name, ), (teacher_layer_name, )], " + + "first one is the abbreviation for cases that student_layer_name and teacher_layer_name " + + "are the same. The length of tuples in the list could be either 1 like previous cases, " + + "or 2, like [(layer_name, layer_output_process)] or " + + "[(student_layer_name, student_layer_output_process), " + + "(teacher_layer_name, teacher_layer_output_process)]." + + "For example, with 2 tuples of length 2, element looks like " + + "[('student_model.layer1.attention', '1'), ('teacher_model.layer1.attention', '1')], " + + "where 'student_model.layer1.attention' and 'teacher_model.layer1.attention' " + + "represent attention module on layer 1 of the student model and the " + + "teacher model respectively, two '1' represent the index to retrieve the " + + "desired output from the defined module's outputs, in this case, the above " + + "two module's outputs are lists, with desired output in index 1 of these " + + "lists, in cases of dict output, retrieving can be done by defining the " + + "corresponding key, in cases of module's output is the desired output, " + + "just adopt the format such as [('student_model.layer1.output" + + ".output', ), ('teacher_model.layer1.output', )]." + ) + assert all( + any(isinstance(e, t) for t in [str, torch.nn.Module]) for e in param_dict["loss_types"] + ), "Type of loss_types element must be str or torch Module." + assert all( + 0.0 <= e <= 1.0 for e in param_dict["loss_weights"] + ), "Element of loss_weights must be in interval [0, 1]." new_dict = {} for k in _params: new_dict[k] = param_dict[k] @@ -1150,8 +1194,16 @@ def __call__(self, **kwargs): class SelfKnowledgeDistillationLoss(KnowledgeDistillationFramework): """SelfKnowledge Distillation Loss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temperature=1.0,add_origin_loss=False, - student_model=None, teacher_model=None): + def __init__( + self, + layer_mappings=[], + loss_types=None, + loss_weights=None, + temperature=1.0, + add_origin_loss=False, + student_model=None, + teacher_model=None, + ): """Initialize SelfKnowledge Distillation Loss class. Args: @@ -1166,28 +1218,30 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temper student_model (optional): student model. Defaults to None. teacher_model (optional): teacher model. Defaults to None. """ - super(SelfKnowledgeDistillationLoss, self).__init__(student_model=student_model, - teacher_model=teacher_model) + super(SelfKnowledgeDistillationLoss, self).__init__(student_model=student_model, teacher_model=teacher_model) self.temperature = temperature self.layer_mappings = [] for items in layer_mappings: for value in items: - assert len(value) == 2, 'Each item in layer_mappings ' + \ - 'should be a list or tuple of length 2, with format ' + \ - '[student_layer_name, teacher_layer_name].' + assert len(value) == 2, ( + "Each item in layer_mappings " + + "should be a list or tuple of length 2, with format " + + "[student_layer_name, teacher_layer_name]." + ) self.layer_mappings.append(items) - self.loss_weights = [1.0 / len(self.layer_mappings)] * len(self.layer_mappings) \ - if loss_weights is None else loss_weights - self.loss_types = ['CE'] * len(self.layer_mappings) \ - if loss_types is None else loss_types + self.loss_weights = ( + [1.0 / len(self.layer_mappings)] * len(self.layer_mappings) if loss_weights is None else loss_weights + ) + self.loss_types = ["CE"] * len(self.layer_mappings) if loss_types is None else loss_types self.add_origin_loss = add_origin_loss self.loss_funcs = [] self.init_loss_funcs() - assert len(self.layer_mappings) == len(self.loss_weights) == len(self.loss_types), \ - f'Wrong length for layer_mappings:{self.layer_mappings}, ' + \ - f'loss_weights:{self.loss_weights} or loss_types:{self.loss_types}, ' + \ - 'all should be the same.' + assert len(self.layer_mappings) == len(self.loss_weights) == len(self.loss_types), ( + f"Wrong length for layer_mappings:{self.layer_mappings}, " + + f"loss_weights:{self.loss_weights} or loss_types:{self.loss_types}, " + + "all should be the same." + ) def init_loss_funcs(self): """Init loss funcs. @@ -1195,8 +1249,7 @@ def init_loss_funcs(self): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function init_loss_funcs ' - 'should be framework related.') + raise NotImplementedError("Function init_loss_funcs " "should be framework related.") def teacher_model_forward(self, input, teacher_model=None): """Teacher model forward. @@ -1204,8 +1257,7 @@ def teacher_model_forward(self, input, teacher_model=None): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function teacher_model_forward ' - 'should be framework related.') + raise NotImplementedError("Function teacher_model_forward " "should be framework related.") def loss_cal(self, student_outputs): """Calculate loss. @@ -1213,8 +1265,7 @@ def loss_cal(self, student_outputs): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError( - 'Function loss_cal should be framework related.') + raise NotImplementedError("Function loss_cal should be framework related.") def loss_cal_sloss(self, student_outputs, teacher_outputs, student_loss): """Calculate all losses between student model and teacher model. @@ -1237,12 +1288,19 @@ def __call__(self, student_outputs, targets): return 0 -class PyTorchSelfKnowledgeDistillationLoss( - SelfKnowledgeDistillationLoss -): +class PyTorchSelfKnowledgeDistillationLoss(SelfKnowledgeDistillationLoss): """PyTorch SelfKnowledge Distillation Loss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temperature=1.0,add_origin_loss=False, - student_model=None, teacher_model=None): + + def __init__( + self, + layer_mappings=[], + loss_types=None, + loss_weights=None, + temperature=1.0, + add_origin_loss=False, + student_model=None, + teacher_model=None, + ): """Initialize PyTorch SelfKnowledge Distillation Loss class. Args: @@ -1264,7 +1322,8 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temper temperature=temperature, add_origin_loss=add_origin_loss, student_model=student_model, - teacher_model=teacher_model) + teacher_model=teacher_model, + ) def SoftCrossEntropy(self, logits, targets): """Return SoftCrossEntropy. @@ -1309,16 +1368,18 @@ def L2Divergence(self, feature1, feature2): def init_loss_funcs(self): """Init loss funcs.""" for loss_type in self.loss_types: - if loss_type == 'CE': + if loss_type == "CE": loss_func = self.SoftCrossEntropy - elif loss_type == 'KL': + elif loss_type == "KL": loss_func = self.KullbackLeiblerDivergence - elif loss_type == 'L2': + elif loss_type == "L2": loss_func = self.L2Divergence else: - raise NotImplementedError(f'Unsupported loss type {loss_type}, supported loss is' \ - ' CE for software CE, KL for Kullback-Leibler divergence and' \ - ' L2 for L2 distance.') + raise NotImplementedError( + f"Unsupported loss type {loss_type}, supported loss is" + " CE for software CE, KL for Kullback-Leibler divergence and" + " L2 for L2 distance." + ) self.loss_funcs.append(loss_func) def loss_cal(self, student_outputs): @@ -1330,7 +1391,7 @@ def loss_cal(self, student_outputs): Returns: tensor: loss """ - self.loss = torch.FloatTensor([0.]) + self.loss = torch.FloatTensor([0.0]) tmp_loss = 0 temperature = self.temperature for loss_idx in range(len(self.layer_mappings)): @@ -1340,11 +1401,14 @@ def loss_cal(self, student_outputs): student_feature = student_outputs[student_layer] teacher_feature = student_outputs[teacher_layer] if loss_idx == 1: # soft logit - tmp_loss += self.loss_funcs[loss_idx]( - student_feature/temperature, teacher_feature/temperature) * self.loss_weights[loss_idx] + tmp_loss += ( + self.loss_funcs[loss_idx](student_feature / temperature, teacher_feature / temperature) + * self.loss_weights[loss_idx] + ) else: # feature learning - tmp_loss += self.loss_funcs[loss_idx]( - student_feature, teacher_feature) * self.loss_weights[loss_idx] + tmp_loss += ( + self.loss_funcs[loss_idx](student_feature, teacher_feature) * self.loss_weights[loss_idx] + ) if tmp_loss.device != self.loss.device: self.loss = self.loss.to(tmp_loss.device) self.loss += tmp_loss @@ -1364,8 +1428,9 @@ def teacher_model_forward(self, input, teacher_model=None, device=None): outputs = None if self.loss_weights[1] > 0: model = self.teacher_model if teacher_model is None else teacher_model - assert isinstance(model, torch.nn.Module), \ - 'Teacher model should be a torch Module instead of {}'.format(type(model)) + assert isinstance(model, torch.nn.Module), "Teacher model should be a torch Module instead of {}".format( + type(model) + ) model.eval() try: model_device = next(model.parameters()).device @@ -1381,9 +1446,10 @@ def teacher_model_forward(self, input, teacher_model=None, device=None): return outputs -@criterion_registry('SelfKnowledgeDistillationLoss', 'pytorch') +@criterion_registry("SelfKnowledgeDistillationLoss", "pytorch") class PyTorchSelfKnowledgeDistillationLossWrapper(object): """PyTorch SelfKnowledge Distillation Loss Wrapper.""" + def __init__(self, param_dict): """Initialize PyTorchSelfKnowledgeDistillationLossWrapper class. @@ -1394,44 +1460,40 @@ def __init__(self, param_dict): def _param_check(self): param_dict = self.param_dict - _params = ['temperature', 'layer_mappings', - 'loss_types', 'loss_weights', 'add_origin_loss'] - layer_mappings = param_dict['layer_mappings'] - if 'loss_types' not in param_dict: - param_dict['loss_types'] = ['CE'] * len(layer_mappings) - if 'loss_weights' not in param_dict: - param_dict['loss_weights'] = [ - 1.0 / len(layer_mappings)] * len(layer_mappings) - if 'add_origin_loss' not in param_dict: - param_dict['add_origin_loss'] = False - if 'temperature' not in param_dict: - param_dict['temperature'] = 1.0 - assert 'layer_mappings' in param_dict, \ - 'Key layer_mappings must be in input parameters.' - assert all(type(param_dict[k]) in [list, tuple] - for k in ['layer_mappings', 'loss_types', 'loss_weights']), \ - 'Type of loss_types and loss_weights must be list or tuple.' - assert isinstance(param_dict['add_origin_loss'], bool), \ - 'Type of add_origin_loss should be bool.' - assert len(param_dict['layer_mappings']) == \ - len(param_dict['loss_types']) == len(param_dict['loss_weights']),\ - 'Length of layer_mappings, loss_types and loss_weights must be the same.' - assert param_dict['temperature'] > 0.0,\ - 'Value of temperature must be positive.' - for items in param_dict['layer_mappings']: - assert all(type(it) in [list, tuple] and (len(it) == 2) - for it in items), \ - 'Elements of layer_mappings must be list or tuple and with length of 2.' + \ - 'element looks like [\'resblock.1.feature.output,' + \ - '\'resblock.deepst.feature.output\'], where ' + \ - '\'resblock.1.feature.output\' and \'resblock.deepst.feature.output\' ' + \ - 'represent resblock feature output of the student model and feature output of the' + \ - 'teacher model respectively.' - assert all(any(isinstance(e, t) for t in [str]) - for e in param_dict['loss_types']), \ - 'Type of loss_types element must be str.' - assert all(0. <= e <= 1. for e in param_dict['loss_weights']), \ - 'Element of loss_weights must be in interval [0, 1].' + _params = ["temperature", "layer_mappings", "loss_types", "loss_weights", "add_origin_loss"] + layer_mappings = param_dict["layer_mappings"] + if "loss_types" not in param_dict: + param_dict["loss_types"] = ["CE"] * len(layer_mappings) + if "loss_weights" not in param_dict: + param_dict["loss_weights"] = [1.0 / len(layer_mappings)] * len(layer_mappings) + if "add_origin_loss" not in param_dict: + param_dict["add_origin_loss"] = False + if "temperature" not in param_dict: + param_dict["temperature"] = 1.0 + assert "layer_mappings" in param_dict, "Key layer_mappings must be in input parameters." + assert all( + type(param_dict[k]) in [list, tuple] for k in ["layer_mappings", "loss_types", "loss_weights"] + ), "Type of loss_types and loss_weights must be list or tuple." + assert isinstance(param_dict["add_origin_loss"], bool), "Type of add_origin_loss should be bool." + assert ( + len(param_dict["layer_mappings"]) == len(param_dict["loss_types"]) == len(param_dict["loss_weights"]) + ), "Length of layer_mappings, loss_types and loss_weights must be the same." + assert param_dict["temperature"] > 0.0, "Value of temperature must be positive." + for items in param_dict["layer_mappings"]: + assert all(type(it) in [list, tuple] and (len(it) == 2) for it in items), ( + "Elements of layer_mappings must be list or tuple and with length of 2." + + "element looks like ['resblock.1.feature.output," + + "'resblock.deepst.feature.output'], where " + + "'resblock.1.feature.output' and 'resblock.deepst.feature.output' " + + "represent resblock feature output of the student model and feature output of the" + + "teacher model respectively." + ) + assert all( + any(isinstance(e, t) for t in [str]) for e in param_dict["loss_types"] + ), "Type of loss_types element must be str." + assert all( + 0.0 <= e <= 1.0 for e in param_dict["loss_weights"] + ), "Element of loss_weights must be in interval [0, 1]." new_dict = {} for k in _params: new_dict[k] = param_dict[k] diff --git a/neural_compressor/compression/hpo/__init__.py b/neural_compressor/compression/hpo/__init__.py index 54dfe3cd14c..3ed5a53f4a5 100644 --- a/neural_compressor/compression/hpo/__init__.py +++ b/neural_compressor/compression/hpo/__init__.py @@ -19,4 +19,3 @@ from .search_space import * from .sa_optimizer import * from .search_algorithms import * - diff --git a/neural_compressor/compression/hpo/sa_optimizer.py b/neural_compressor/compression/hpo/sa_optimizer.py index ba0c8b7e680..0c143af694c 100644 --- a/neural_compressor/compression/hpo/sa_optimizer.py +++ b/neural_compressor/compression/hpo/sa_optimizer.py @@ -1,4 +1,4 @@ -"""Simulated Annealing Optimizer""" +"""Simulated Annealing Optimizer.""" # -*- coding: utf-8 -*- # # Copyright (c) 2021 Intel Corporation @@ -15,8 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import time import math +import time from random import random import numpy as np @@ -25,21 +25,22 @@ from neural_compressor.utils import logger except: import logging + logger = logging.getLogger("sa_optimizer") class SimulatedAnnealingOptimizer(object): def __init__( - self, - generate_func=None, - T0=100, - Tf=0.01, - higher_is_better=True, - alpha=None, - iter=500, - early_stop=50, - log_interval=50 - ): + self, + generate_func=None, + T0=100, + Tf=0.01, + higher_is_better=True, + alpha=None, + iter=500, + early_stop=50, + log_interval=50, + ): """Initialize.""" self.generate_func = generate_func self.T0 = T0 @@ -50,9 +51,9 @@ def __init__( self.iter = iter self.early_stop = early_stop self.log_interval = log_interval - self.best = (float('-inf'), None) if self.higher_is_better else (float('inf'), None) - self.history = {'T': [], 'F': []} - + self.best = (float("-inf"), None) if self.higher_is_better else (float("inf"), None) + self.history = {"T": [], "F": []} + def _metrospolis(self, f, f_new): if (not self.higher_is_better and f_new <= f) or (self.higher_is_better and f_new >= f): return 1 @@ -65,7 +66,7 @@ def _metrospolis(self, f, f_new): return 1 else: return 0 - + def _generate_new_points(self, points): new_points = np.array(points) new_points += self.T * (np.random.random(new_points.shape) - np.random.random(new_points.shape)) @@ -76,10 +77,10 @@ def gen_next_params(self, func, points): count = 0 last_modify = 0 self.T = self.T0 - self.best = (float('-inf'), None) if self.higher_is_better else (float('inf'), None) + self.best = (float("-inf"), None) if self.higher_is_better else (float("inf"), None) scores = func(points) - self.history = {'T': [], 'F': [], 'P': []} + self.history = {"T": [], "F": [], "P": []} st = time.time() while self.T > self.Tf: @@ -93,18 +94,19 @@ def gen_next_params(self, func, points): if self._metrospolis(scores[i], s): points[i] = new_points[i] scores[i] = s - if (not self.higher_is_better and scores[i] < self.best[0]) \ - or (self.higher_is_better and scores[i] > self.best[0]): + if (not self.higher_is_better and scores[i] < self.best[0]) or ( + self.higher_is_better and scores[i] > self.best[0] + ): last_modify = count self.best = (scores[i], [float(v) for v in points[i]]) - - self.history['T'].append(self.T) + + self.history["T"].append(self.T) if self.higher_is_better: - self.history['F'].append(max(scores)) - self.history['P'].append(points[np.argmax(scores)]) + self.history["F"].append(max(scores)) + self.history["P"].append(points[np.argmax(scores)]) else: - self.history['F'].append(min(scores)) - self.history['P'].append(points[np.argmax(scores)]) + self.history["F"].append(min(scores)) + self.history["P"].append(points[np.argmax(scores)]) if self.alpha: self.T *= self.alpha @@ -114,9 +116,11 @@ def gen_next_params(self, func, points): if self.log_interval and count % self.log_interval == 0: elapse = time.time() - st - logger.debug(f'SA iter: {count}\tlast_update: {last_modify}\t \ + logger.debug( + f"SA iter: {count}\tlast_update: {last_modify}\t \ max score: {self.best[0]}\tpoint: {self.best[1]}\t \ - temp: {self.T}\telasped: {elapse}') + temp: {self.T}\telasped: {elapse}" + ) if count - last_modify > self.early_stop: break diff --git a/neural_compressor/compression/hpo/search_algorithms.py b/neural_compressor/compression/hpo/search_algorithms.py index bcdbfed10bd..e3a51d36049 100644 --- a/neural_compressor/compression/hpo/search_algorithms.py +++ b/neural_compressor/compression/hpo/search_algorithms.py @@ -19,31 +19,33 @@ import xgboost as xgb from neural_compressor.strategy.bayesian import BayesianOptimization -from ...config import HPOConfig -from .search_space import BaseSearchSpace, DiscreteSearchSpace, ContinuousSearchSpace +from ...config import HPOConfig from .sa_optimizer import SimulatedAnnealingOptimizer +from .search_space import BaseSearchSpace, ContinuousSearchSpace, DiscreteSearchSpace try: from neural_compressor.utils import logger except: import logging + logger = logging.getLogger(__name__) SEARCHERS = {} - def prepare_hpo(config): - assert isinstance(config, HPOConfig), f'config should be {HPOConfig.__name__}' + assert isinstance(config, HPOConfig), f"config should be {HPOConfig.__name__}" assert config.searcher in SEARCHERS.keys(), f"current only support search algorithms: {SEARCHERS.keys()}" - if config.searcher == 'xgb': - return SEARCHERS[config.searcher](config.search_space, - higher_is_better=config.higher_is_better, - loss_type=config.loss_type, - min_train_samples=config.min_train_samples, - seed=config.seed) + if config.searcher == "xgb": + return SEARCHERS[config.searcher]( + config.search_space, + higher_is_better=config.higher_is_better, + loss_type=config.loss_type, + min_train_samples=config.min_train_samples, + seed=config.seed, + ) else: return SEARCHERS[config.searcher](config.search_space) @@ -61,9 +63,11 @@ def register_searcher(name): Returns: cls: The class of register. """ + def register(searcher): SEARCHERS[name] = searcher return searcher + return register @@ -73,17 +77,20 @@ class Searcher(object): Args: search_space (dict): A dictionary for defining the search space. """ + def __init__(self, search_space): - assert isinstance(search_space, dict) and search_space, \ - "Expect search_space to be a dict." + assert isinstance(search_space, dict) and search_space, "Expect search_space to be a dict." self.search_space = search_space self.search_space_keys = sorted(search_space.keys()) self.search_space_pool = self._create_search_space_pool() self.best = None for k in self.search_space_keys: - assert isinstance(self.search_space[k], (list, tuple, BaseSearchSpace)), \ - "Value of key \'{}\' must be a list, tuple,\ - CountinuousSearchSpace or DiscreteSearchSpace to specify choices".format(k) + assert isinstance( + self.search_space[k], (list, tuple, BaseSearchSpace) + ), "Value of key '{}' must be a list, tuple,\ + CountinuousSearchSpace or DiscreteSearchSpace to specify choices".format( + k + ) def _create_search_space_pool(self): """Build the search space pool.""" @@ -98,8 +105,7 @@ def _create_search_space_pool(self): def suggest(self): """Suggest the model hyperparameter.""" - raise NotImplementedError( - 'Depends on specific search algorithm.') # pragma: no cover + raise NotImplementedError("Depends on specific search algorithm.") # pragma: no cover def get_feedback(self, metric): """Get metric feedback for the search algorithm.""" @@ -113,8 +119,9 @@ def params_vec2params_dict(self, para_vec): Returns: Parameters dictionary defining the model hyperparameter. """ - assert len(para_vec) == len(self.search_space_keys), \ - "Length of para_vec and search_space_keys should be the same." + assert len(para_vec) == len( + self.search_space_keys + ), "Length of para_vec and search_space_keys should be the same." return {k: para_vec[i] for i, k in enumerate(self.search_space_keys)} @@ -127,15 +134,14 @@ class GridSearcher(Searcher): Args: search_space (dict): A dictionary for defining the search space. """ + def __init__(self, search_space): """Initialize the attributes.""" super().__init__(search_space) for space in self.search_space_pool: - if space.type == 'continuous': - raise TypeError( - "GridSearcher not support continuous datatype, please use other algorithm." - ) + if space.type == "continuous": + raise TypeError("GridSearcher not support continuous datatype, please use other algorithm.") self.idx = [0] * len(self.search_space_pool) @@ -164,7 +170,7 @@ def suggest(self): for i in range(len(self.idx)): param.append(self.search_space_pool[i].get_value(self.idx[i])) if not self._add_idx(): - logger.warning('run out of search space pool, rebuild...') + logger.warning("run out of search space pool, rebuild...") self.idx = [0] * len(self.search_space_pool) return self.params_vec2params_dict(param) @@ -178,6 +184,7 @@ class RandomSearcher(Searcher): Args: search_space (dict): A dictionary for defining the search space. """ + def __init__(self, search_space): """Initialize the attributes.""" super().__init__(search_space) @@ -201,6 +208,7 @@ class BayesianOptimizationSearcher(Searcher): Args: search_space (dict): A dictionary for defining the search space. """ + def __init__(self, search_space, seed=42): """Initialize the attributes.""" super().__init__(search_space) @@ -210,8 +218,7 @@ def __init__(self, search_space, seed=42): idx_search_space[key] = tuple(space.bound) else: idx_search_space[key] = (0, space.total_num - 1) - self.bo_agent = BayesianOptimization(idx_search_space, - random_seed=seed) + self.bo_agent = BayesianOptimization(idx_search_space, random_seed=seed) def suggest(self): """Suggest the model hyperparameter. @@ -221,21 +228,20 @@ def suggest(self): """ param_indices = self.bo_agent.gen_next_params() self.last_param_indices = param_indices - return self.params_vec2params_dict( - self.indices2params_vec(param_indices)) + return self.params_vec2params_dict(self.indices2params_vec(param_indices)) def get_feedback(self, metric): """Get metric feedback and register this metric.""" - assert self.last_param_indices is not None, "Need run suggest first " + \ - "to get parameters and the input metric is corresponding to this parameters." + assert self.last_param_indices is not None, ( + "Need run suggest first " + "to get parameters and the input metric is corresponding to this parameters." + ) try: self.bo_agent._space.register(self.last_param_indices, metric) except KeyError: # pragma: no cover logger.debug("Find registered params, skip it.") pass if self.best is None or self.best[1] < metric: - param = self.params_vec2params_dict( - self.indices2params_vec(self.last_param_indices)) + param = self.params_vec2params_dict(self.indices2params_vec(self.last_param_indices)) self.best = (param, metric) self.last_param_indices = None @@ -267,12 +273,8 @@ class XgbSearcher(Searcher): Args: search_space (dict): A dictionary for defining the search space. """ - def __init__(self, - search_space, - higher_is_better=True, - loss_type='reg', - min_train_samples=10, - seed=42): + + def __init__(self, search_space, higher_is_better=True, loss_type="reg", min_train_samples=10, seed=42): """Initialize the attributes.""" super().__init__(search_space) @@ -286,35 +288,34 @@ def __init__(self, self._x = [] self._y = [] if loss_type == "reg": - self.model = xgb.XGBRegressor(max_depth=3, - n_estimators=100, - gamma=0.0001, - min_child_weight=1, - subsample=1.0, - eta=0.3, - reg_lambda=1.00, - reg_alpha=0, - objective='reg:squarederror') + self.model = xgb.XGBRegressor( + max_depth=3, + n_estimators=100, + gamma=0.0001, + min_child_weight=1, + subsample=1.0, + eta=0.3, + reg_lambda=1.00, + reg_alpha=0, + objective="reg:squarederror", + ) elif loss_type == "rank": - self.model = xgb.XGBRanker(max_depth=3, - n_estimators=100, - gamma=0.0001, - min_child_weight=1, - subsample=1.0, - eta=0.3, - reg_lambda=1.00, - reg_alpha=0, - objective='rank:pairwise') + self.model = xgb.XGBRanker( + max_depth=3, + n_estimators=100, + gamma=0.0001, + min_child_weight=1, + subsample=1.0, + eta=0.3, + reg_lambda=1.00, + reg_alpha=0, + objective="rank:pairwise", + ) else: # pragma: no cover - raise RuntimeError( - "Invalid loss type: {}, only surport reg and rank".format( - loss_type)) + raise RuntimeError("Invalid loss type: {}, only surport reg and rank".format(loss_type)) self.optimizer = SimulatedAnnealingOptimizer( - generate_func=self._generate_new_points, - T0=100, - Tf=0, - alpha=0.9, - higher_is_better=self.higher_is_better) + generate_func=self._generate_new_points, T0=100, Tf=0, alpha=0.9, higher_is_better=self.higher_is_better + ) def _generate_new_points(self, points): new_points = [] @@ -334,21 +335,21 @@ def suggest(self): x_train, y_train = np.array(self._x), np.array(self._y) self.model.fit(x_train, y_train) - params = self.optimizer.gen_next_params(self.model.predict, - self._x) + params = self.optimizer.gen_next_params(self.model.predict, self._x) self.last_params = params return self.params_vec2params_dict(params) def get_feedback(self, metric): """Get metric feedback and register this metric.""" - assert self.last_params is not None, "Need run suggest first " + \ - "to get parameters and the input metric is corresponding to this parameters." + assert self.last_params is not None, ( + "Need run suggest first " + "to get parameters and the input metric is corresponding to this parameters." + ) if self.best is None or self.best[1] < metric: self.best = (self.params_vec2params_dict(self.last_params), metric) self._x.append(self.last_params) self._y.append(metric) - params_key = '_'.join([str(x) for x in self.last_params]) + params_key = "_".join([str(x) for x in self.last_params]) self.log[params_key] = metric self.last_params = None @@ -360,5 +361,5 @@ def feedback(self, param, metric): self.best = (param, metric) self._x.append(param_list) self._y.append(metric) - params_key = '_'.join([str(x) for x in param]) + params_key = "_".join([str(x) for x in param]) self.log[params_key] = metric diff --git a/neural_compressor/compression/hpo/search_space.py b/neural_compressor/compression/hpo/search_space.py index bab25a886e5..27557fe2ae6 100644 --- a/neural_compressor/compression/hpo/search_space.py +++ b/neural_compressor/compression/hpo/search_space.py @@ -37,13 +37,8 @@ class SearchSpace: 'weight_decay': SearchSpace((0.0001, 0.001), type='continuous') } """ - def __new__( - cls, - bound=None, - interval=None, - value=None, - type=None - ): + + def __new__(cls, bound=None, interval=None, value=None, type=None): if type is None: if interval is not None or value is not None: type = "discrete" @@ -63,6 +58,7 @@ def register_searchspace(name): Returns: cls: The class of register. """ + def register(search_space): SEARCHSPACE[name] = search_space return search_space @@ -72,13 +68,8 @@ def register(search_space): class BaseSearchSpace(object): """Base class for Search Space.""" - def __init__( - self, - bound=None, - interval=None, - value=None, - type=None - ): + + def __init__(self, bound=None, interval=None, value=None, type=None): """Initialize.""" if bound: if not isinstance(bound, (list, tuple)): # pragma: no cover @@ -93,7 +84,7 @@ def __init__( self.interval = interval self.value = value self.type = type - if type == 'discrete': + if type == "discrete": if value: self.total_num = len(value) else: @@ -102,23 +93,21 @@ def __init__( self.total_num = float("inf") def get_value(self): - """get one value from the search space.""" + """Get one value from the search space.""" pass @register_searchspace("discrete") class DiscreteSearchSpace(BaseSearchSpace): """Discrete Search Space.""" + def __init__(self, bound=None, interval=None, value=None, type=None): if bound and interval is None: if isinstance(bound[0], int) and isinstance(bound[1], int): interval = 1 else: interval = 0.01 - super().__init__(bound=bound, - interval=interval, - value=value, - type='discrete') + super().__init__(bound=bound, interval=interval, value=value, type="discrete") def get_random_value(self): """Get a random value from search space.""" @@ -137,7 +126,10 @@ def get_all(self): return [self.get_nth_value(i) for i in range(self.total_num)] def get_value(self, idx=None): - """Get number n value from search space if idx is given. Otherwise, get a random value.""" + """Get number n value from search space if idx is given. + + Otherwise, get a random value. + """ if idx is not None: if not isinstance(idx, int): raise TypeError("The type of idx should be int, not {}".format(type(idx))) @@ -147,18 +139,19 @@ def get_value(self, idx=None): else: value = self.get_random_value() return value - + def index(self, value): """Return the index of the value.""" if self.value: return self.value.index(value) else: return int((value - self.bound[0]) / self.interval) - + @register_searchspace("continuous") class ContinuousSearchSpace(BaseSearchSpace): """Continuous Search Space.""" + def __init__(self, bound, interval=None, value=None, type=None): super().__init__(bound, interval, value, "continuous") diff --git a/neural_compressor/compression/pruner/README.md b/neural_compressor/compression/pruner/README.md index 3561ea7d396..254341b6b41 100644 --- a/neural_compressor/compression/pruner/README.md +++ b/neural_compressor/compression/pruner/README.md @@ -266,35 +266,35 @@ The following section exemplifies how to use hooks in user pass-in training func - Step 1: Define a dict-like configuration in your training codes. Usually only 5-7 configuration items need to be identified. For customized pruning, a configuration template is shown below: ```python - configs = [ - { ## Example of a regular configuration - "op_names": ['layer1.*'], # A list of modules that would be pruned. All linear/conv layers will be hooked when op_names is not explicitly defined. - "start_step": 1, # Step at which to begin pruning, if a gradient-based criterion is used (e.g., snip-momentum), start_step should be equal to or greater than 1. - "end_step": 10000, # Step at which to end pruning, for one-shot pruning start_step = end_step. - "excluded_op_names": ['.*embeddings*'], # A list of modules that would not be pruned. - 'target_sparsity': 0.9, # Target sparsity ratio of modules. - "pruning_frequency": 250, # Frequency of applying pruning, The recommended setting is one fortieth of the pruning steps. - "pattern": "4x1", # Default pruning pattern. - }, # The missing parameter items would be complemented by default settings (i.e. start_step = 1) - - - # It also supports setting multiple pruners, and fine-grained pruning by partition. - { ## pruner2 - 'target_sparsity': 0.9, # Target sparsity ratio of modules. - 'pruning_type': "snip_momentum", # Default pruning type. - 'pattern': "4x1", # Default pruning pattern. - 'op_names': ['layer2.*'], # A list of modules that would be pruned. - 'excluded_op_names': ['layer3.*'], # A list of modules that would not be pruned. - 'start_step': 1, # Step at which to begin pruning. - 'end_step': 10, # Step at which to end pruning. - 'pruning_scope': "global", # Default pruning scope. - 'pruning_frequency': 1, # Frequency of applying pruning. - 'min_sparsity_ratio_per_op': 0.0, # Minimum sparsity ratio of each module. - 'max_sparsity_ratio_per_op': 0.98, # Maximum sparsity ratio of each module. - 'sparsity_decay_type': "exp", # Function applied to control pruning rate. - 'pruning_op_types': ['Conv', 'Linear'], # Types of op that would be pruned. - } - ] + configs = [ + { ## Example of a regular configuration + "op_names": [ + "layer1.*" + ], # A list of modules that would be pruned. All linear/conv layers will be hooked when op_names is not explicitly defined. + "start_step": 1, # Step at which to begin pruning, if a gradient-based criterion is used (e.g., snip-momentum), start_step should be equal to or greater than 1. + "end_step": 10000, # Step at which to end pruning, for one-shot pruning start_step = end_step. + "excluded_op_names": [".*embeddings*"], # A list of modules that would not be pruned. + "target_sparsity": 0.9, # Target sparsity ratio of modules. + "pruning_frequency": 250, # Frequency of applying pruning, The recommended setting is one fortieth of the pruning steps. + "pattern": "4x1", # Default pruning pattern. + }, # The missing parameter items would be complemented by default settings (i.e. start_step = 1) + # It also supports setting multiple pruners, and fine-grained pruning by partition. + { ## pruner2 + "target_sparsity": 0.9, # Target sparsity ratio of modules. + "pruning_type": "snip_momentum", # Default pruning type. + "pattern": "4x1", # Default pruning pattern. + "op_names": ["layer2.*"], # A list of modules that would be pruned. + "excluded_op_names": ["layer3.*"], # A list of modules that would not be pruned. + "start_step": 1, # Step at which to begin pruning. + "end_step": 10, # Step at which to end pruning. + "pruning_scope": "global", # Default pruning scope. + "pruning_frequency": 1, # Frequency of applying pruning. + "min_sparsity_ratio_per_op": 0.0, # Minimum sparsity ratio of each module. + "max_sparsity_ratio_per_op": 0.98, # Maximum sparsity ratio of each module. + "sparsity_decay_type": "exp", # Function applied to control pruning rate. + "pruning_op_types": ["Conv", "Linear"], # Types of op that would be pruned. + }, + ] ``` - Step 2: Enable pruning functionalities @@ -302,49 +302,50 @@ The following section exemplifies how to use hooks in user pass-in training func [**Experimental option** ]Modify model and optimizer. ```python - from neural_compressor import WeightPruningConfig - from neural_compressor.experimental.compression import prepare_pruning - config = WeightPruningConfig(configs) - prepare_pruning(config, model, optimizer) # modify model and optimizer - for epoch in range(num_train_epochs): - model.train() - for step, batch in enumerate(train_dataloader): - outputs = model(**batch) - loss = outputs.loss - loss.backward() - optimizer.step() - lr_scheduler.step() - model.zero_grad() - ``` + from neural_compressor import WeightPruningConfig + from neural_compressor.experimental.compression import prepare_pruning + + config = WeightPruningConfig(configs) + prepare_pruning(config, model, optimizer) # modify model and optimizer + for epoch in range(num_train_epochs): + model.train() + for step, batch in enumerate(train_dataloader): + outputs = model(**batch) + loss = outputs.loss + loss.backward() + optimizer.step() + lr_scheduler.step() + model.zero_grad() + ``` - [**Stable Option** ]Insert Hook functions in your codes. - - ```python - """ All you need is to insert following API functions to your codes: - on_train_begin() # Setup pruners - on_step_begin() # Prune weights - on_before_optimizer_step() # Do weight regularization - on_after_optimizer_step() # Update weights' criteria, mask weights - on_train_end() # End of pruner, print sparse information - """ - from neural_compressor.training import prepare_compression, WeightPruningConfig - config = WeightPruningConfig(configs) - compression_manager = prepare_compression(model, config) # Define a pruning object. - compression_manager.callbacks.on_train_begin() ## insert hook - for epoch in range(num_train_epochs): - model.train() - for step, batch in enumerate(train_dataloader): - compression_manager.callbacks.on_step_begin(step) - outputs = model(**batch) - loss = outputs.loss - loss.backward() - compression_manager.callbacks.on_before_optimizer_step() - optimizer.step() - compression_manager.callbacks.on_after_optimizer_step() - lr_scheduler.step() - model.zero_grad() - compression_manager.callbacks.on_train_end() + [**Stable Option** ]Insert Hook functions in your codes. + + ```python + """ All you need is to insert following API functions to your codes: + on_train_begin() # Setup pruners + on_step_begin() # Prune weights + on_before_optimizer_step() # Do weight regularization + on_after_optimizer_step() # Update weights' criteria, mask weights + on_train_end() # End of pruner, print sparse information + """ + from neural_compressor.training import prepare_compression, WeightPruningConfig + config = WeightPruningConfig(configs) + compression_manager = prepare_compression(model, config) # Define a pruning object. + compression_manager.callbacks.on_train_begin() ## insert hook + for epoch in range(num_train_epochs): + model.train() + for step, batch in enumerate(train_dataloader): + compression_manager.callbacks.on_step_begin(step) + outputs = model(**batch) + loss = outputs.loss + loss.backward() + compression_manager.callbacks.on_before_optimizer_step() + optimizer.step() + compression_manager.callbacks.on_after_optimizer_step() + lr_scheduler.step() + model.zero_grad() + compression_manager.callbacks.on_train_end() ``` In the case mentioned above, pruning process can be done by pre-defined hooks in Neural Compressor. Users need to place those hooks inside the training function. diff --git a/neural_compressor/compression/pruner/__init__.py b/neural_compressor/compression/pruner/__init__.py index dd30b9a1946..e9b9a53a3a2 100644 --- a/neural_compressor/compression/pruner/__init__.py +++ b/neural_compressor/compression/pruner/__init__.py @@ -1,4 +1,4 @@ -"""pruning init.""" +"""Pruning init.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -23,11 +23,9 @@ from .utils import process_config, torch, logger from typing import Optional, Union -FRAMEWORK = { - 'pytorch': 'pt', - 'keras': 'keras' -} - +FRAMEWORK = {"pytorch": "pt", "keras": "keras"} + + def _register_on_step_begin(model): """Mount on_step_begin to the model. @@ -42,6 +40,7 @@ def hook(module, input): hook_handle = model.register_forward_pre_hook(hook) return hook_handle + # def _register_on_step_end(model: torch.nn.Module): # """Mount on_step_end to the model. @@ -53,7 +52,8 @@ def hook(module, input): # pruning.on_step_end() # hook_handle = model.register_backward_hook(hook) # return hook_handle - + + def _rewrite_optimizer_step(opt): """Mount on_before/after_optimizer_step to optimizer. @@ -74,23 +74,19 @@ def new_step(self, closure=None): for pruning in self.prunings: pruning.on_after_optimizer_step() return res - + if not isinstance(opt, torch.optim.Optimizer): logger.error("User optimizer should be a torch.optim.Optimizer object") - + opt.orig_step = opt.step import types + opt.step = types.MethodType(new_step, opt) - + return opt -def save( - obj: object, - f, - pickle_module=None, - pickle_protocol=None, - _use_new_zipfile_serialization=None -): + +def save(obj: object, f, pickle_module=None, pickle_protocol=None, _use_new_zipfile_serialization=None): """A rewrite function for torch save. :param obj: @@ -101,12 +97,12 @@ def save( :return: """ params = {} - if pickle_module != None: - params['pickle_module'] = pickle_module - if pickle_protocol != None: - params['pickle_protocol'] = pickle_protocol - if _use_new_zipfile_serialization != None: - params['_use_new_zipfile_serialization'] = _use_new_zipfile_serialization + if pickle_module is not None: + params["pickle_module"] = pickle_module + if pickle_protocol is not None: + params["pickle_protocol"] = pickle_protocol + if _use_new_zipfile_serialization is not None: + params["_use_new_zipfile_serialization"] = _use_new_zipfile_serialization if isinstance(obj, torch.nn.Module) and hasattr(obj, "prunings"): prunings = obj.prunings @@ -143,6 +139,7 @@ def save( else: torch.orig_save(obj, f) + def _prepare_hooks(model, pruning_list, opt=None): """Wrapper the model and optimizer to support all the pruning functionality. @@ -154,14 +151,15 @@ def _prepare_hooks(model, pruning_list, opt=None): if opt is not None: opt.prunings = pruning_list _rewrite_optimizer_step(opt) - + # Register automated hooks inc_hook_handle = _register_on_step_begin(model) model.inc_hook_handle = inc_hook_handle # Rewrite torch save torch.orig_save = torch.save - setattr(torch, 'save', save) - + setattr(torch, "save", save) + + # def complete_pruning(model: torch.nn.Module, opt: torch.optim): # """UnWrapper the model and optimizer # :param model: the modified model @@ -181,8 +179,10 @@ def _prepare_hooks(model, pruning_list, opt=None): # delattr(opt, "orig_step") # return model, opt -def prepare_pruning(config, model, optimizer=None, dataloader=None, - loss_func=None, framework='pytorch', device: str=None): + +def prepare_pruning( + config, model, optimizer=None, dataloader=None, loss_func=None, framework="pytorch", device: str = None +): """Get registered pruning class, wrapper the model and optimizer to support all the pruning functionality. Get a pruning object from PRUNINGS. @@ -199,39 +199,36 @@ def prepare_pruning(config, model, optimizer=None, dataloader=None, # assert framework in FRAMEWORK.keys(), \ # f"does not support {framework}, currently only support framework: {FRAMEWORK.keys()}" - assert framework=='pytorch', f"The Automation API currently only supports the 'pytorch' framework, " \ - f"but the framework given is: {framework}" + assert framework == "pytorch", ( + f"The Automation API currently only supports the 'pytorch' framework, " + f"but the framework given is: {framework}" + ) pruning_list = [] pruning_conf = process_config(config) if optimizer is not None: basic_conf = [] for pruner_info in pruning_conf: - if 'gpt' in pruner_info["pruning_type"] or 'retrain' in pruner_info["pruning_type"]: + if "gpt" in pruner_info["pruning_type"] or "retrain" in pruner_info["pruning_type"]: continue basic_conf.append(pruner_info) - pruning_list.append(PRUNINGS['basic_pruning'](basic_conf, model, optimizer)) + pruning_list.append(PRUNINGS["basic_pruning"](basic_conf, model, optimizer)) _prepare_hooks(model, pruning_list, opt=optimizer) if dataloader is not None: # The pruning will be done at initialization time, without inserting any hooks. sparse_gpt_conf = [] retrain_free_conf = [] for pruner_info in pruning_conf: - if 'gpt' in pruner_info["pruning_type"]: + if "gpt" in pruner_info["pruning_type"]: sparse_gpt_conf.append(pruner_info) - elif 'retrain' in pruner_info["pruning_type"]: + elif "retrain" in pruner_info["pruning_type"]: retrain_free_conf.append(pruner_info) if len(sparse_gpt_conf) > 0: - pruning_list.append(PRUNINGS['sparse_gpt_pruning'](sparse_gpt_conf, model, - dataloader, loss_func, device)) + pruning_list.append(PRUNINGS["sparse_gpt_pruning"](sparse_gpt_conf, model, dataloader, loss_func, device)) if len(retrain_free_conf) > 0: - pruning_list.append(PRUNINGS['retrain_free_pruning'](retrain_free_conf, - model, dataloader, loss_func)) - - assert len(pruning_list) >= 1, f"The pruning config is not standardized and cannot be initialized properly." + pruning_list.append(PRUNINGS["retrain_free_pruning"](retrain_free_conf, model, dataloader, loss_func)) + + assert len(pruning_list) >= 1, "The pruning config is not standardized and cannot be initialized properly." if len(pruning_list) > 1: - logger.info(f"Note that more than two pruning algorithms are currently used.") + logger.info("Note that more than two pruning algorithms are currently used.") return pruning_list return pruning_list[0] - - - diff --git a/neural_compressor/compression/pruner/criteria.py b/neural_compressor/compression/pruner/criteria.py index 3d0a62c4b2a..581d5a397d3 100644 --- a/neural_compressor/compression/pruner/criteria.py +++ b/neural_compressor/compression/pruner/criteria.py @@ -1,4 +1,4 @@ -"""pruning criterion.""" +"""Pruning criterion.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -47,7 +47,7 @@ class PruningCriterion: Args: config: A config dict object that includes information about pruner and pruning criterion. modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. - + Attributes: scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. """ @@ -58,7 +58,7 @@ def __init__(self, modules, config, pattern): self.modules = modules self.config = config self.pattern = pattern - self.low_memory_usage = config['low_memory_usage'] + self.low_memory_usage = config["low_memory_usage"] def on_step_begin(self): """Calculate and store the pruning scores of pruning modules at the beginning of a step.""" @@ -73,17 +73,17 @@ def on_after_optimizer_step(self): pass -@register_criterion('magnitude') +@register_criterion("magnitude") class MagnitudeCriterion(PruningCriterion): """Pruning criterion. - + The magnitude criterion_class is derived from PruningCriterion. The magnitude value is used to score and determine if a weight is to be pruned. Args: config: A config dict object that includes information about pruner and pruning criterion. modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. - + Attributes: scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. """ @@ -101,17 +101,17 @@ def on_step_begin(self): self.scores[key] = self.pattern.reduce_score(torch.abs(p), key) -@register_criterion('gradient') +@register_criterion("gradient") class GradientCriterion(PruningCriterion): """Pruning criterion. - + The gradient criterion_class is derived from PruningCriterion. The absolute value of gradient is used to score and determine if a weight is to be pruned. Args: config: A config dict object that includes information about pruner and pruning criterion. modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. - + Attributes: scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. """ @@ -130,10 +130,10 @@ def on_before_optimizer_step(self): self.scores[key] = self.pattern.reduce_score(torch.abs(p.grad), key) -@register_criterion('snip') +@register_criterion("snip") class SnipCriterion(PruningCriterion): """Pruning criterion. - + The snip criterion_class is derived from PruningCriterion. The product of magnitude and gradient is used to score and determine if a weight is to be pruned. Please refer to SNIP: Single-shot Network Pruning based on Connection Sensitivity. @@ -142,7 +142,7 @@ class SnipCriterion(PruningCriterion): Args: config: A config dict object that includes information about pruner and pruning criterion. modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. - + Attributes: scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. """ @@ -161,11 +161,10 @@ def on_before_optimizer_step(self): self.scores[key] = self.pattern.reduce_score(torch.abs(p * p.grad), key) - -@register_criterion('snip_momentum') +@register_criterion("snip_momentum") class SnipMomentumCriterion(PruningCriterion): """Pruning criterion. - + The snip_momentum criterion_class is derived from PruningCriterion. A momentum mechanism is used to calculate snip score, which determines if a weight is to be pruned. @@ -174,7 +173,7 @@ class SnipMomentumCriterion(PruningCriterion): modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. alpha: A parameter that determines how much of the snip score is preserved from last pruning step. beta: A parameter that determines how much of the snip score is updated at the current step. - + Attributes: scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. """ @@ -187,9 +186,9 @@ def __init__(self, modules, config, pattern): p = modules[key].weight dtype = torch.float32 if self.low_memory_usage: - dtype = torch.bfloat16 if p.device.type == 'cpu' else torch.float16 + dtype = torch.bfloat16 if p.device.type == "cpu" else torch.float16 # self.scores[key] = torch.zeros(p.shape, dtype=dtype).to(p.device) - if hasattr(self.pattern, 'reduce_score'): + if hasattr(self.pattern, "reduce_score"): self.scores[key] = self.pattern.reduce_score(torch.zeros(p.shape, dtype=dtype).to(p.device), key) else: self.scores[key] = torch.zeros(p.shape, dtype=dtype).to(p.device) @@ -204,17 +203,17 @@ def on_before_optimizer_step(self): p = self.modules[key].weight self.scores[key] *= self.alpha tmp = torch.abs(p * p.grad) - if hasattr(self.pattern, 'reduce_score'): + if hasattr(self.pattern, "reduce_score"): tmp = self.pattern.reduce_score(tmp, key, force=True) if self.low_memory_usage: - tmp = tmp.bfloat16() if p.device.type == 'cpu' else tmp.half() + tmp = tmp.bfloat16() if p.device.type == "cpu" else tmp.half() self.scores[key] += self.beta * tmp -@register_criterion('block_mask') +@register_criterion("block_mask") class BlockMaskCriterion(PruningCriterion): """Pruning criterion. - + The block_mask criterion_class is derived from PruningCriterion. A momentum mechanism is used to calculate snip score, which determines if a block of weights is to be pruned. @@ -223,7 +222,7 @@ class BlockMaskCriterion(PruningCriterion): modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. alpha: A parameter that determines how much of the snip score is preserved from last pruning step. beta: A parameter that determines how much of the snip score is updated at the current step. - + Attributes: scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. """ @@ -236,7 +235,7 @@ def __init__(self, modules, config, pattern, masks, alpha=0.9, beta=1.0): mask = masks[key] dtype = torch.float32 if self.low_memory_usage: - dtype = torch.bfloat16 if mask.device.type == 'cpu' else torch.float16 + dtype = torch.bfloat16 if mask.device.type == "cpu" else torch.float16 self.scores[key] = torch.zeros(mask.shape, dtype=dtype).to(mask.device) self.alpha = alpha self.beta = beta @@ -248,15 +247,15 @@ def on_before_optimizer_step(self, masks): grad = masks[key].grad if self.low_memory_usage: # TODO check bf16 grad availability - grad = grad.bfloat16() if grad.device.type == 'cpu' else grad.half() + grad = grad.bfloat16() if grad.device.type == "cpu" else grad.half() self.scores[key] *= self.alpha self.scores[key] += self.beta * torch.abs(grad) -@register_criterion('retrain_free') +@register_criterion("retrain_free") class RetrainFreeCriterion(PruningCriterion): """Pruning criterion. - + The retrain_free criterion_class is derived from PruningCriterion. Args: @@ -264,7 +263,7 @@ class RetrainFreeCriterion(PruningCriterion): modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. alpha: A parameter that determines how much of the snip score is preserved from last pruning step. beta: A parameter that determines how much of the snip score is updated at the current step. - + Attributes: scores: A dict {"module_name": Tensor} that stores the scores of pruning modules. """ @@ -276,14 +275,14 @@ def __init__(self, modules, config, pattern, masks): self.collected_grads = {} for key in self.modules.keys(): for name, param in self.modules[key].named_parameters(): - param.requires_grad_(False) # only for retrain-free criterion - + param.requires_grad_(False) # only for retrain-free criterion + if key not in masks.keys(): - continue # No corresponding block mask, skip. + continue # No corresponding block mask, skip. mask = masks[key] dtype = torch.float32 if self.low_memory_usage: - dtype = torch.bfloat16 if mask.device.type == 'cpu' else torch.float16 + dtype = torch.bfloat16 if mask.device.type == "cpu" else torch.float16 self.scores[key] = torch.zeros(mask.shape, dtype=dtype).to(mask.device) self.collected_grads[key] = [] @@ -293,6 +292,6 @@ def on_before_optimizer_step(self, masks): for key in masks.keys(): mask_grad = masks[key].grad.clone() if self.low_memory_usage: - mask_grad = mask_grad.bfloat16() if mask_grad.device.type == 'cpu' else mask_grad.half() + mask_grad = mask_grad.bfloat16() if mask_grad.device.type == "cpu" else mask_grad.half() self.collected_grads[key].append(mask_grad.cpu()) self.scores[key] += mask_grad.pow(2) diff --git a/neural_compressor/compression/pruner/model_slim/README.md b/neural_compressor/compression/pruner/model_slim/README.md index 1957d288d1c..9e3a7cb6809 100644 --- a/neural_compressor/compression/pruner/model_slim/README.md +++ b/neural_compressor/compression/pruner/model_slim/README.md @@ -1,75 +1,73 @@ -Auto Slim -============ - -1. [Introduction](#introduction) - - - [Channel Pruning and Model Slim](#channel-pruning-and-model-slim) - - - [Feed Forward Network Slim](#feed-forward-network-slim) - - - [Multi-head Attention Slim (Experimental)](#multi-head-attention-slim-experimental) - -2. [API for Auto Slim](#api-for-auto-slim) - -3. [Run Examples](#run-examples) - -## Introduction - -### Channel Pruning and Model Slim - - Channel-wise pruning means removing less salient channels on feature maps and it could directly shrink feature map widths. Users could set a channelx1 (or 1xchannel) pruning pattern to use this method. - - An interesting thing is that if we do channel-wise pruning for some layers in NLP models, we can permanently remove these all-zero channels without changing their accuracy. By applying this method to Transformer based models, including GPT, BERT and other large language models, users can obtain acceleration in inference. - - Based on this logic, we have developed **Model Slim** method, this method aims at optimizing Transformer's both two main modules: **multi-head attention** modules and **feed forward networks**. This leads to a promising application of Since these two modules take most computing overheads in the entire model, users can slim models and obtain inference speed gain without deploying them with particular hardware-aware repos. Plus, we have already verified that after applying model slim in some Transformer models, their inference speed and total size can be significantly improved. - -### Feed Forward Network Slim - - Feed forward network mainly consists two consecutive linear layers. We conduct the input channel pruning for the second linear layer (masking weights by column). We can remove these all-zero channels. Plus, we also remove the same indices' output channels in the first linear layers (masking weights by row), since their contribution for activation will be masked by the second layer's. - - - - The figure above shows how we execute the slim process for feed forward networks. In three sub-figures, **row number and column number refers to output channels and input channels respectively**. (a) shows a feed forward network's initial status with the second linear layer's input channel pruned. (b) shows projecting the sparse channel indices to the first layer's output channels. The channel with same index takes the same color, and white channels means their weights are pruned to be zero. Finally, in (c), we remove sparse channel in both linear layers and obtain two dense linear layers. - - This leads to no change for model's accuracy, but can obtain a significant acceleration for model's inference, because the transformer models' feed forward network parts take nearly 50% of entire computing overhead. Thus, compressing weights in feed forward network is really useful. - -### Multi-head Attention Slim - - Self attention modules are common in all Transformer-based models. These models use multi-head attention to enhance their abilities of linking contextual information. Transformer-based models usually stack a sequence of multi-head attention modules, and they take great storage and memory bandwidth. As an optimization method, head pruning removes attention heads which make minor contribution to model's contextual analysis. This method does not lead to much accuracy loss, but provides us with much opportunity for model acceleration. - -## API for Auto Slim - - Since feed forward networks and multi-head attention modules share similar structures in difference models, it would be more convenient if we can locate them automatically and execute the model slim schemes respectively. We provide API functions for you to complete the process above and slim your transformer models easily. Here is how to call our API functions. Simply provide a target sparsity value to our Our API function parse_auto_slim_config and it can generate the pruning_configs used by our pruning API. Such process is fully automatic and target multi-head attention layers will be included without manual setting. After pruning process finished, use API function model_slim to slim the model. - - Since head pruning slim process is still under refinement progress, its auto slim function is limited. We will update its function right away. - -```python -# auto slim config -# part1 generate pruning configs for the second linear layers. -pruning_configs = [] -from neural_compressor.compression.pruner import parse_auto_slim_config -auto_slim_configs = parse_auto_slim_config( - model, - multi_head_attention_sparsity, - feed_forward_sparsity -) -pruning_configs += auto_slim_configs - -################ -""" -# Training codes. -...... -""" -################ - -from neural_compressor.compression.pruner import model_slim -model = model_slim(model) -``` - -## Run Examples - -We have provided BERT-Base examples for both and feed forward networks and multi-head attention modules to explicit our slim potential and obtain best acceleration performance. Please follow this(../../../../examples/pytorch/nlp/huggingface_models/question-answering/model_slim/). More examples related to popular large language models will be included right away. +Auto Slim +============ + +1. [Introduction](#introduction) + + - [Channel Pruning and Model Slim](#channel-pruning-and-model-slim) + + - [Feed Forward Network Slim](#feed-forward-network-slim) + + - [Multi-head Attention Slim (Experimental)](#multi-head-attention-slim-experimental) + +2. [API for Auto Slim](#api-for-auto-slim) + +3. [Run Examples](#run-examples) + +## Introduction + +### Channel Pruning and Model Slim + + Channel-wise pruning means removing less salient channels on feature maps and it could directly shrink feature map widths. Users could set a channelx1 (or 1xchannel) pruning pattern to use this method. + + An interesting thing is that if we do channel-wise pruning for some layers in NLP models, we can permanently remove these all-zero channels without changing their accuracy. By applying this method to Transformer based models, including GPT, BERT and other large language models, users can obtain acceleration in inference. + + Based on this logic, we have developed **Model Slim** method, this method aims at optimizing Transformer's both two main modules: **multi-head attention** modules and **feed forward networks**. This leads to a promising application of Since these two modules take most computing overheads in the entire model, users can slim models and obtain inference speed gain without deploying them with particular hardware-aware repos. Plus, we have already verified that after applying model slim in some Transformer models, their inference speed and total size can be significantly improved. + +### Feed Forward Network Slim + + Feed forward network mainly consists two consecutive linear layers. We conduct the input channel pruning for the second linear layer (masking weights by column). We can remove these all-zero channels. Plus, we also remove the same indices' output channels in the first linear layers (masking weights by row), since their contribution for activation will be masked by the second layer's. + + + + The figure above shows how we execute the slim process for feed forward networks. In three sub-figures, **row number and column number refers to output channels and input channels respectively**. (a) shows a feed forward network's initial status with the second linear layer's input channel pruned. (b) shows projecting the sparse channel indices to the first layer's output channels. The channel with same index takes the same color, and white channels means their weights are pruned to be zero. Finally, in (c), we remove sparse channel in both linear layers and obtain two dense linear layers. + + This leads to no change for model's accuracy, but can obtain a significant acceleration for model's inference, because the transformer models' feed forward network parts take nearly 50% of entire computing overhead. Thus, compressing weights in feed forward network is really useful. + +### Multi-head Attention Slim + + Self attention modules are common in all Transformer-based models. These models use multi-head attention to enhance their abilities of linking contextual information. Transformer-based models usually stack a sequence of multi-head attention modules, and they take great storage and memory bandwidth. As an optimization method, head pruning removes attention heads which make minor contribution to model's contextual analysis. This method does not lead to much accuracy loss, but provides us with much opportunity for model acceleration. + +## API for Auto Slim + + Since feed forward networks and multi-head attention modules share similar structures in difference models, it would be more convenient if we can locate them automatically and execute the model slim schemes respectively. We provide API functions for you to complete the process above and slim your transformer models easily. Here is how to call our API functions. Simply provide a target sparsity value to our Our API function parse_auto_slim_config and it can generate the pruning_configs used by our pruning API. Such process is fully automatic and target multi-head attention layers will be included without manual setting. After pruning process finished, use API function model_slim to slim the model. + + Since head pruning slim process is still under refinement progress, its auto slim function is limited. We will update its function right away. + +```python +# auto slim config +# part1 generate pruning configs for the second linear layers. +pruning_configs = [] +from neural_compressor.compression.pruner import parse_auto_slim_config + +auto_slim_configs = parse_auto_slim_config(model, multi_head_attention_sparsity, feed_forward_sparsity) +pruning_configs += auto_slim_configs + +################ +""" +# Training codes. +...... +""" +################ + +from neural_compressor.compression.pruner import model_slim + +model = model_slim(model) +``` + +## Run Examples + +We have provided BERT-Base examples for both and feed forward networks and multi-head attention modules to explicit our slim potential and obtain best acceleration performance. Please follow this(../../../../examples/pytorch/nlp/huggingface_models/question-answering/model_slim/). More examples related to popular large language models will be included right away. diff --git a/neural_compressor/compression/pruner/model_slim/__init__.py b/neural_compressor/compression/pruner/model_slim/__init__.py index 0f0bc1b2a94..347823e7f30 100644 --- a/neural_compressor/compression/pruner/model_slim/__init__.py +++ b/neural_compressor/compression/pruner/model_slim/__init__.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. from .auto_slim import parse_auto_slim_config -from .auto_slim import model_slim \ No newline at end of file +from .auto_slim import model_slim diff --git a/neural_compressor/compression/pruner/model_slim/auto_slim.py b/neural_compressor/compression/pruner/model_slim/auto_slim.py index 0e4b6ccd09f..90207da561f 100644 --- a/neural_compressor/compression/pruner/model_slim/auto_slim.py +++ b/neural_compressor/compression/pruner/model_slim/auto_slim.py @@ -19,6 +19,7 @@ from ..utils import logger + def model_slim(model, dataloader=None, round_multiplier=32): """Slim the sparse model automatically.""" try: @@ -31,7 +32,8 @@ def model_slim(model, dataloader=None, round_multiplier=32): logger.warning("model MHA slim failed.") return model -def model_slim_ffn2(model, dataloader = None, round_multiplier=32): + +def model_slim_ffn2(model, dataloader=None, round_multiplier=32): """Remove some sparse part in the model permanently and obtain acceleration directly. Args: @@ -40,7 +42,8 @@ def model_slim_ffn2(model, dataloader = None, round_multiplier=32): """ from .pattern_analyzer import Linear2LinearSearcher from .weight_slim import LinearCompressionIterator - logger.warning(f"You are using model slim methods, some weight channels will be removed permanently.") + + logger.warning("You are using model slim methods, some weight channels will be removed permanently.") pa_obj = Linear2LinearSearcher(model, dataloader) layers = pa_obj.search() layers = pa_obj.from_layer_name_to_object(layers) @@ -48,17 +51,19 @@ def model_slim_ffn2(model, dataloader = None, round_multiplier=32): linear_pruner(masks=None, round_value=round_multiplier) return model -def model_slim_mha(model, dataloader = None): + +def model_slim_mha(model, dataloader=None): """Remove some sparse part in the model permanently and obtain acceleration directly. Args: model: a sprase model. """ - from .weight_slim import MHACompression from .pattern_analyzer import SelfMHASearcher - logger.warning(f"You are using model slim methods, some attention heads will be removed permanently.") + from .weight_slim import MHACompression + + logger.warning("You are using model slim methods, some attention heads will be removed permanently.") pa_obj = SelfMHASearcher(model, dataloader) - layers, _ = pa_obj.search(split_qkv_ffn = False) + layers, _ = pa_obj.search(split_qkv_ffn=False) layers = pa_obj.obtain_mha_module(layers) layers = pa_obj.from_layer_name_to_object(layers) for layer in layers: @@ -66,8 +71,9 @@ def model_slim_mha(model, dataloader = None): mha_compression() return model + # auto slim config -def parse_auto_slim_config(model, dataloader = None, ffn2_sparsity = .0, mha_sparsity = .0, **kwargs): +def parse_auto_slim_config(model, dataloader=None, ffn2_sparsity=0.0, mha_sparsity=0.0, **kwargs): """Get model slim pruning configs.""" auto_slim_configs = [] if ffn2_sparsity > 0 and ffn2_sparsity < 1: @@ -76,34 +82,26 @@ def parse_auto_slim_config(model, dataloader = None, ffn2_sparsity = .0, mha_spa auto_slim_configs += generate_mha_pruning_config(model, dataloader, mha_sparsity, **kwargs) return auto_slim_configs + def generate_ffn2_pruning_config(model, dataloader, ffn2_sparsity, **kwargs): """Get consecutive linear layers pruning configs.""" from .pattern_analyzer import Linear2LinearSearcher + searcher = Linear2LinearSearcher(model, dataloader) layers = searcher.search() # extract the second linear layer - ffn_layers = [ffn2_module['root_linear'] for ffn2_module in layers] - ffn2_pruning_config = [ - { - "op_names": ffn_layers, - "pattern": "channelx1", - "target_sparsity": ffn2_sparsity - } - ] + ffn_layers = [ffn2_module["root_linear"] for ffn2_module in layers] + ffn2_pruning_config = [{"op_names": ffn_layers, "pattern": "channelx1", "target_sparsity": ffn2_sparsity}] # append kwargs to generated config for item in ffn2_pruning_config: item.update(kwargs) return ffn2_pruning_config + def generate_mha_pruning_config(model, dataloader, mha_sparsity, **kwargs): """Get multi-head attention layers pruning configs.""" # method 1: apply real mha pruning - mha_pruning_config = [ - { - "pattern": "mha", - "target_sparsity": mha_sparsity - } - ] + mha_pruning_config = [{"pattern": "mha", "target_sparsity": mha_sparsity}] # append kwargs to generated config for item in mha_pruning_config: item.update(kwargs) diff --git a/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py b/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py index 7df005fbbc6..d055793bb76 100644 --- a/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py +++ b/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py @@ -16,60 +16,65 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils import logger import re + from ....utils.utility import LazyImport -torch = LazyImport('torch') -tf = LazyImport('tensorflow') +from ..utils import logger + +torch = LazyImport("torch") +tf = LazyImport("tensorflow") -JIT_SUPPORT_OPS = ['linear', 'dropout', 'gelu', 'silu', 'relu', 'mul', 'add'] +JIT_SUPPORT_OPS = ["linear", "dropout", "gelu", "silu", "relu", "mul", "add"] # MHA_SUPPORT_NAMES = ["q", "k", "v"] + def get_attributes(module, attrs: str): """Get a multi-level descent module of module. Args: module (torch.nn.Module): The torch module. attrs (str): The attributes' calling path. - + Returns: attr: The target attribute of the module. """ assert isinstance(module, torch.nn.Module) - attrs_list = attrs.split('.') + attrs_list = attrs.split(".") sub_module = module while attrs_list: attr = attrs_list.pop(0) sub_module = getattr(sub_module, attr) return sub_module + def get_common_module(layer1: str, layer2: str): - """Get the module which contains layer1 and layer2 (nearest father nodes) - """ - attribute_seq1 = layer1.split('.') - attribute_seq2 = layer2.split('.') + """Get the module which contains layer1 and layer2 (nearest father nodes)""" + attribute_seq1 = layer1.split(".") + attribute_seq2 = layer2.split(".") target_module = [] for idx in range(min(len(attribute_seq1), len(attribute_seq2))): if attribute_seq1[idx] != attribute_seq2[idx]: break else: target_module.append(attribute_seq1[idx]) - return '.'.join(target_module) + return ".".join(target_module) + def print_iterables(data_iters): """Print the auto slim logs.""" for data in data_iters: try: - logger.info(f"{data}: {data_iters[data]}") # dict + logger.info(f"{data}: {data_iters[data]}") # dict except: - logger.info(f"{data}") # list + logger.info(f"{data}") # list + class RecipeSearcher(object): """Searcher class which searches patterns with a pre-defined recipe. - A Recipe is a dict type data which contains the root module's name and - its sub-modules' levelwise calling way. + A Recipe is a dict type data which contains the root module's name and + its sub-modules' levelwise calling way. For example, for the self-attention module in Huggingface bert-model, if we want to obtain its linear ops (query, key, value and output), the recipe should be like: @@ -80,7 +85,7 @@ class RecipeSearcher(object): Args: model (torch.nn.Module): The PyTorch model for searching. recipe (dict): A dict containing infomation of the searching pattern. - + Attributes: model: The PyTorch model for searching. recipe: A dict containing infomation of the searching pattern. @@ -105,7 +110,7 @@ def search(self, target_name): self.search_results.clear() self.dfs_search(self.model, type(self.model).__name__, target_name) return self.search_results - + def dfs_search(self, module, module_name, target_name): """Operations called for one single search step.""" module_type = type(module).__name__ @@ -116,6 +121,7 @@ def dfs_search(self, module, module_name, target_name): for n, m in module.named_children(): self.dfs_search(m, n, target_name) + class JitBasicSearcher(object): """Static graph searcher class which searches patterns with PyTorch static graph and its input/output information. @@ -126,7 +132,7 @@ class JitBasicSearcher(object): Args: model (torch.nn.Module): The PyTorch model for searching. - + Attributes: model: The PyTorch model for searching. device: The model's current device type. @@ -136,7 +142,7 @@ class JitBasicSearcher(object): searching_results: The list/dict which store matched patterns. """ - def __init__(self, model, dataloader = None, placeholder_shape = None, placeholder_dtype = None): + def __init__(self, model, dataloader=None, placeholder_shape=None, placeholder_dtype=None): """Initialize the attributes.""" assert isinstance(model, torch.nn.Module) if "PyTorchFXModel" in type(model).__name__: @@ -149,35 +155,34 @@ def __init__(self, model, dataloader = None, placeholder_shape = None, placehold except: self.device = next(self.model.parameters()).device # use torch.jit to generate static graph - self.dataloader = dataloader # user can set a dataloader to help trace static graph - self.placeholder_shape = placeholder_shape # dummy input's shape - self.placeholder_dtype = placeholder_dtype # dummy input's data + self.dataloader = dataloader # user can set a dataloader to help trace static graph + self.placeholder_shape = placeholder_shape # dummy input's shape + self.placeholder_dtype = placeholder_dtype # dummy input's data self.static_graph = None self.flatten_static_graph = None self.analyze_dummy_input() self.generate_static_graph() # save the searching results - self.target_layers = ['linear'] + self.target_layers = ["linear"] self.search_results = [] - + def analyze_dummy_input(self): """Analyze the model's input type. - + If no dataloader is specified, searcher will automatically generate a dummy input to obtain static graph. - """ # if the user already set the dummy inputs, no need to analyze the model - if self.placeholder_dtype != None and self.placeholder_dtype != None: + if self.placeholder_dtype is not None and self.placeholder_dtype is not None: return # analyze the model automatically first_parameter = None for n, p in self.model.named_parameters(): - if first_parameter != None: + if first_parameter is not None: break else: first_parameter = p - if len(first_parameter.shape) == 4: + if len(first_parameter.shape) == 4: # conv op, indicating that this is a cv model self.placeholder_shape = [1, 3, 512, 512] self.placeholder_dtype = torch.float32 @@ -197,23 +202,20 @@ def generate_static_graph_with_dummyinput(self): dummy_inputs = self.generate_dummy_inputs() dummy_inputs = [] + [dummy_inputs] if type(self.model).__name__ == "WhisperForConditionalGeneration": - dummy_inputs = [ - torch.ones([1, 80, 3000]), - torch.ones([1, 448], dtype=torch.int64) - ] - logger.info(f"Generating static graph from original model using auto dummy input: start.") + dummy_inputs = [torch.ones([1, 80, 3000]), torch.ones([1, 448], dtype=torch.int64)] + logger.info("Generating static graph from original model using auto dummy input: start.") try: self.static_graph = torch.jit.trace(self.model, dummy_inputs, strict=False) - # re-org from original static codes. - self.flatten_static_graph = [l.strip() for l in self.static_graph.inlined_graph.__str__().split('\n')] - logger.info(f"Generating static graph from original model using auto dummy input: success.") + # re-org from original static codes. + self.flatten_static_graph = [l.strip() for l in self.static_graph.inlined_graph.__str__().split("\n")] + logger.info("Generating static graph from original model using auto dummy input: success.") except: - logger.info(f"Generating static graph from original model using auto dummy input: failed.") - + logger.info("Generating static graph from original model using auto dummy input: failed.") + def generate_static_graph_with_dataloader(self): """Generate static graph from a external dataloader.""" # dummy_input = self.dataloader[0] - logger.info(f"Generating static graph from original model using external data: start.") + logger.info("Generating static graph from original model using external data: start.") for dummy_input in self.dataloader: if isinstance(dummy_input, dict): try: @@ -232,29 +234,29 @@ def generate_static_graph_with_dataloader(self): self.static_graph = torch.jit.trace(self.model, dummy_input.to(self.device), strict=False) except: pass - if self.static_graph != None: + if self.static_graph is not None: # if jit graph is successfully generated, end iteration break try: - self.flatten_static_graph = [l.strip() for l in self.static_graph.inlined_graph.__str__().split('\n')] - logger.info(f"Generating static graph from original model using external data: success.") + self.flatten_static_graph = [l.strip() for l in self.static_graph.inlined_graph.__str__().split("\n")] + logger.info("Generating static graph from original model using external data: success.") except: - logger.warning(f"Generating static graph from original model using external data: failed.") + logger.warning("Generating static graph from original model using external data: failed.") def generate_static_graph(self): """Generate static graph with two methods: using dataloader or dummy input.""" # first do the jit trace using dataloader - if self.dataloader != None: + if self.dataloader is not None: self.generate_static_graph_with_dataloader() # if dataloader based jit trace cannot work or not chosen, use dummy input - if self.static_graph != None: + if self.static_graph is not None: return else: self.generate_static_graph_with_dummyinput() - + def generate_dummy_inputs(self): """Generate dummy inputs for the model's static graph. - + Return: A torch.Tensor passed into the model to generate static graph. """ @@ -262,19 +264,19 @@ def generate_dummy_inputs(self): def filter_static_code(self, list_in, kw): """Obtain sub-list which contains some key words. - + Args: list_in: list. kw: string. - - Return: a sub-list of list_in, whose members contains kw. + + Return: a sub-list of list_in, whose members contains kw. """ list_out = [] for info in list_in: if kw in info: list_out.append(info) return list_out - + def refine_strings(self, string_list): """Remove space and tabs in strings.""" return [s.strip() for s in string_list] @@ -284,7 +286,7 @@ def analyze_jit_code(self, code): Args: code: a str presenting static graph forwarding code - + Return: A dict: { @@ -294,36 +296,38 @@ def analyze_jit_code(self, code): op_trace: "the absolute dir to get this model, in torch.nn.Module's attribute style." } """ + def remove_weight_or_bias_getattr_op(input_name): # %weight and %bias are not related to graph search, therefore skip return "%weight" not in input_name and "bias" not in input_name + # step1 : find outputs' name - output_names = code.split(":")[0].strip().split(',') + output_names = code.split(":")[0].strip().split(",") output_names = self.refine_strings(output_names) # step2: find inputs' name # use pattern match to find aten::op which includes inputs' name - aten_pattern = re.compile('aten::.*,') + aten_pattern = re.compile("aten::.*,") aten_regex = aten_pattern.search(code)[0] input_pattern = re.compile("\(.*\)") input_names = input_pattern.search(aten_regex)[0][1:-1].split(",") input_names = filter(remove_weight_or_bias_getattr_op, input_names) input_names = self.refine_strings(input_names) # step3: obtain the tensor shape of ops - shape_pattern = re.compile('Float\(.* strides') + shape_pattern = re.compile("Float\(.* strides") try: op_shape = shape_pattern.search(code)[0][6:-9] except: op_shape = None # step4: find the op name (linear, or a act type) - aten_op_pattern = re.compile('aten::.*\(') + aten_op_pattern = re.compile("aten::.*\(") op_type = aten_op_pattern.search(code)[0][6:-1] - # step5: find the attribute calling code - op_trace_pattern = re.compile('scope\:.*\#') + # step5: find the attribute calling code + op_trace_pattern = re.compile("scope\:.*\#") op_trace = self.get_layer_path_from_jit_code(op_trace_pattern.search(code)[0]) # step6: compile all information in a dict and return res = { - "output_names": output_names, # should be a list - "input_names": input_names, # shoule be a list + "output_names": output_names, # should be a list + "input_names": input_names, # shoule be a list "op_shape": op_shape, "op_type": op_type, "op_trace": op_trace, @@ -339,17 +343,17 @@ def get_layer_object_from_jit_codes(self, scope_code): In jit, scope keyword is a item which use to trace a layer from a model For example, for a intermediate layer in Huggingface bert-base, its scope is like: - scope: __module.bert/__module.bert.encoder/__module.bert.encoder.layer.0/ + scope: __module.bert/__module.bert.encoder/__module.bert.encoder.layer.0/ __module.bert.encoder.layer.0.intermediate/__module.bert.encoder.layer.0.intermediate.dense # example: '__module.bert.encoder.layer.11.intermediate.intermediate_act_fn' Args: - scope_code: a string representing a operator's forward code. - + scope_code: a string representing a operator's forward code. + Return: a torch.nn.module: the layer/operator corresponding with scope_code. """ - scope_regex = re.compile('scope\: .* \#') + scope_regex = re.compile("scope\: .* \#") try: scope_part = scope_regex.search(scope_code)[0] except: @@ -358,19 +362,17 @@ def get_layer_object_from_jit_codes(self, scope_code): # strip scope keyword, only keep contrete items scope_part = scope_part[7:-2].strip() # the last content contains the complete route from top to down - scope_contents = scope_part.split('/')[-1] - attrs = scope_contents.split('.')[1:] + scope_contents = scope_part.split("/")[-1] + attrs = scope_contents.split(".")[1:] sub_module = self.model # iteratively locate the target layer from top(model) to down(layer) for attr in attrs: sub_module = getattr(sub_module, attr) return sub_module - + def get_layer_path_from_jit_code(self, scope_code): - """ - Get the module name from its static graph scope code. - """ - scope_regex = re.compile('scope\: .* \#') + """Get the module name from its static graph scope code.""" + scope_regex = re.compile("scope\: .* \#") try: scope_part = scope_regex.search(scope_code)[0] except: @@ -378,11 +380,12 @@ def get_layer_path_from_jit_code(self, scope_code): return "" # strip scope keyword, only keep contrete items scope_part = scope_part[7:-2].strip() - scope_contents = scope_part.split('/')[-1] - level_names = scope_contents.split('.') - level_names_main = ".".join(level_names[1:]) + scope_contents = scope_part.split("/")[-1] + level_names = scope_contents.split(".") + level_names_main = ".".join(level_names[1:]) return level_names_main + class Linear2LinearSearcher(JitBasicSearcher): """Static graph searcher for consecutive linear layers. @@ -391,7 +394,7 @@ class Linear2LinearSearcher(JitBasicSearcher): Args: model (torch.nn.Module): The PyTorch model for searching. - + Attributes: model: The PyTorch model for searching. device: The model's current device type. @@ -403,7 +406,7 @@ class Linear2LinearSearcher(JitBasicSearcher): current_pattern: a searching path to store searching status. """ - def __init__(self, model, dataloader = None, placeholder_shape = None, placeholder_dtype = None): + def __init__(self, model, dataloader=None, placeholder_shape=None, placeholder_dtype=None): """Initialize.""" assert isinstance(model, torch.nn.Module) super(Linear2LinearSearcher, self).__init__(model, dataloader, placeholder_shape, placeholder_dtype) @@ -411,21 +414,21 @@ def __init__(self, model, dataloader = None, placeholder_shape = None, placehold self.current_pattern = [] # initialize target_op_lut for op in JIT_SUPPORT_OPS: - self.target_op_lut[op] = JitBasicSearcher.filter_static_code(self, self.flatten_static_graph, "aten::"+op) + self.target_op_lut[op] = JitBasicSearcher.filter_static_code(self, self.flatten_static_graph, "aten::" + op) def search_frontier_ops_from_node(self, node_name): """Search the frontier nodes from a original op's input nodes. Args: node_name: a node string (%input xxx, %yyy, etc.) - + Return: a list of ops, whose output is node_name. """ target_frontier_ops = [] for op_type, op_codes in self.target_op_lut.items(): for op_code in op_codes: - output_names = JitBasicSearcher.analyze_jit_code(self, op_code)['output_names'] + output_names = JitBasicSearcher.analyze_jit_code(self, op_code)["output_names"] if output_names.__len__() == 1 and node_name == output_names[0]: target_frontier_ops.append(op_code) else: @@ -436,26 +439,28 @@ def search_from_root_linear(self, linear_code): """Search frontier linears from a linear op.""" self.current_pattern.clear() linear_info = JitBasicSearcher.analyze_jit_code(self, linear_code) - root_linear_trace = linear_info['op_trace'] + root_linear_trace = linear_info["op_trace"] # data structure to save the results results = { "root_linear": root_linear_trace, "target_frontier_linears": [], } + # start dfs def dfs(root_op_code): - """a dfs step code.""" + """A dfs step code.""" op_info = JitBasicSearcher.analyze_jit_code(self, root_op_code) - op_inputs = op_info['input_names'] + op_inputs = op_info["input_names"] for op_input in op_inputs: frontier_ops = self.search_frontier_ops_from_node(op_input) # retrively search the ops for frontier_op in frontier_ops: frontier_op_info = JitBasicSearcher.analyze_jit_code(self, frontier_op) - if frontier_op_info['op_type'] == 'linear': - results['target_frontier_linears'].append(frontier_op_info['op_trace']) + if frontier_op_info["op_type"] == "linear": + results["target_frontier_linears"].append(frontier_op_info["op_trace"]) else: dfs(frontier_op) + dfs(linear_code) return results @@ -467,7 +472,7 @@ def search(self): | \ / B Z A, B, X, Y, Z are all linear layers, some ops including add, mul, dropout can be ignored. - When we prune B or Z, we can also prune A or X & Y of same channel indices. + When we prune B or Z, we can also prune A or X & Y of same channel indices. Return: A list [ { @@ -477,9 +482,9 @@ def search(self): ] """ all_linear_structure_results = [] - for linear_code in self.target_op_lut['linear']: + for linear_code in self.target_op_lut["linear"]: search_res = self.search_from_root_linear(linear_code) - if search_res['target_frontier_linears'].__len__() > 0: + if search_res["target_frontier_linears"].__len__() > 0: all_linear_structure_results.append(search_res) # Summary print_iterables(all_linear_structure_results) @@ -487,7 +492,7 @@ def search(self): if all_linear_structure_results.__len__() == 0: logger.warning("No linear2linear modules are hooked.") return all_linear_structure_results - + def from_layer_name_to_object(self, l2l_search_layers): """Obtain the layer objects themselves from their names. { @@ -506,13 +511,14 @@ def from_layer_name_to_object(self, l2l_search_layers): "root_linear": None, "target_frontier_linears": [], } - layer_obj['root_linear'] = get_attributes(self.model, item['root_linear']) - layer_obj['target_frontier_linears'] = [ - get_attributes(self.model, linfo) for linfo in item['target_frontier_linears'] + layer_obj["root_linear"] = get_attributes(self.model, item["root_linear"]) + layer_obj["target_frontier_linears"] = [ + get_attributes(self.model, linfo) for linfo in item["target_frontier_linears"] ] layer_objs.append(layer_obj) return layer_objs + class SelfMHASearcher(JitBasicSearcher): """Static graph searcher for multi-head attention modules. @@ -521,7 +527,7 @@ class SelfMHASearcher(JitBasicSearcher): Args: model (torch.nn.Module): The PyTorch model for searching. - + Attributes: model: The PyTorch model for searching. device: The model's current device type. @@ -529,7 +535,7 @@ class SelfMHASearcher(JitBasicSearcher): flatten_static_graph: A list of string with the model's static graph inference details. """ - def __init__(self, model, dataloader = None, placeholder_shape = None, placeholder_dtype = None): + def __init__(self, model, dataloader=None, placeholder_shape=None, placeholder_dtype=None): """Initialize.""" assert isinstance(model, torch.nn.Module) super(SelfMHASearcher, self).__init__(model, dataloader, placeholder_shape, placeholder_dtype) @@ -541,7 +547,7 @@ def get_head_pattern(self): qkv_pattern = str(head_size) + "xchannel" ffn_pattern = "channelx" + str(head_size) return qkv_pattern, ffn_pattern - + def gather_mha_inputs(self): """Search the multi-head attention modules' query, key, as well as value layers.""" linears = JitBasicSearcher.filter_static_code(self, self.flatten_static_graph, "aten::linear") @@ -550,10 +556,10 @@ def gather_mha_inputs(self): input_counts = {} # get all linear modules for linfo in linear_infos: - for input_name in linfo['input_names']: - if linfo['op_type'] == 'linear' and input_name in input_counts: + for input_name in linfo["input_names"]: + if linfo["op_type"] == "linear" and input_name in input_counts: input_counts[input_name] += 1 - elif linfo['op_type'] == 'linear' and input_name not in input_counts: + elif linfo["op_type"] == "linear" and input_name not in input_counts: input_counts[input_name] = 1 else: # op which is not linear, skip @@ -564,17 +570,17 @@ def gather_mha_inputs(self): if v >= 3: # attention's number input_counts_filtered[k] = v - else: + else: continue return input_counts_filtered - + def gather_linear_from_input(self, input_names: dict): """Gather query, key and value layers of the same self-attention module together.""" linear_clusters = {} linears = JitBasicSearcher.filter_static_code(self, self.flatten_static_graph, "aten::linear") for li in linears: linfo = JitBasicSearcher.analyze_jit_code(self, li) - for input_name in linfo['input_names']: + for input_name in linfo["input_names"]: if input_name in input_names: if input_name in linear_clusters: linear_clusters[input_name].append(li) @@ -589,7 +595,7 @@ def extract_qkv_from_linears(self, linears): Args: linears: A dict, key is input name, value is a list of linear layers jit code - + Return: A dict contains only qkv linear layers. """ @@ -599,8 +605,8 @@ def extract_qkv_from_linears(self, linears): # step 1: statistics of linears clusters with same shape. op_shape_lut = {} for linfo in linfos: - op_shape = linfo['op_shape'] - if op_shape_lut.get(op_shape, None) == None: + op_shape = linfo["op_shape"] + if op_shape_lut.get(op_shape, None) is None: op_shape_lut[op_shape] = 1 else: op_shape_lut[op_shape] += 1 @@ -613,8 +619,8 @@ def extract_qkv_from_linears(self, linears): # step 2: extract qkv layers qkv_linears = [] for linfo in linfos: - if linfo['op_shape'] in qkv_related_op_shape: - qkv_linears.append(linfo['op_trace']) + if linfo["op_shape"] in qkv_related_op_shape: + qkv_linears.append(linfo["op_trace"]) else: continue qkv_clusters[input_name] = qkv_linears @@ -630,15 +636,12 @@ def search_ffn_from_qkv(self, qkv_clusters): for n, m in self.model.named_modules(): if type(m).__name__ == "Linear": linear_lut.append(n) - # initialize the qkv data structure + # initialize the qkv data structure self_attn_list = [] for input_name in qkv_clusters: - self_attn = { - "qkv": qkv_clusters[input_name][:], - "ffn": [] - } + self_attn = {"qkv": qkv_clusters[input_name][:], "ffn": []} for idx in range(len(linear_lut)): - if idx >= 1 and (linear_lut[idx-1] in self_attn["qkv"]) and (linear_lut[idx] not in self_attn["qkv"]): + if idx >= 1 and (linear_lut[idx - 1] in self_attn["qkv"]) and (linear_lut[idx] not in self_attn["qkv"]): # this means we find the first linear layer after qkv self_attn["ffn"].append(linear_lut[idx]) break @@ -648,19 +651,18 @@ def search_ffn_from_qkv(self, qkv_clusters): del self_attn return self_attn_list - def search(self, split_qkv_ffn = True): + def search(self, split_qkv_ffn=True): """Operations called for entire searching process. Args: split_qkv_ffn: a bool. Whether to rearrange searched attention heads' linear layers. - if True: return two lists: one contains all query, key and value layers, + if True: return two lists: one contains all query, key and value layers, the other contains all forward layers. - if False: only return one list containing self-attention's linear layers, - query, key, value layers and forward layers are not splited. - + if False: only return one list containing self-attention's linear layers, + query, key, value layers and forward layers are not splited. + Return: two lists containing self-attention modules' layer names. - """ input_names_for_linears = self.gather_mha_inputs() linear_clusters = self.gather_linear_from_input(input_names_for_linears) @@ -679,7 +681,7 @@ def search(self, split_qkv_ffn = True): ffn_list = [] for item in self_attn_list: qkv_list += item["qkv"] - ffn_list += item['ffn'] + ffn_list += item["ffn"] return qkv_list, ffn_list def from_layer_name_to_object(self, mha_search_layers): @@ -710,18 +712,18 @@ def from_layer_name_to_object(self, mha_search_layers): for mha_search_layer in mha_search_layers: # copy layer names layer_obj = { - "qkv_name": mha_search_layer['qkv'][:], - "ffn_name": mha_search_layer['ffn'][:], - "mha_name": mha_search_layer['mha_name'][:], + "qkv_name": mha_search_layer["qkv"][:], + "ffn_name": mha_search_layer["ffn"][:], + "mha_name": mha_search_layer["mha_name"][:], } # obtain pytorch module - layer_obj['qkv_module'] = [get_attributes(self.model, layer_name) for layer_name in mha_search_layer['qkv']] - layer_obj['ffn_module'] = [get_attributes(self.model, layer_name) for layer_name in mha_search_layer['ffn']] + layer_obj["qkv_module"] = [get_attributes(self.model, layer_name) for layer_name in mha_search_layer["qkv"]] + layer_obj["ffn_module"] = [get_attributes(self.model, layer_name) for layer_name in mha_search_layer["ffn"]] # we can directly copy since we have already obtained this module before - layer_obj['mha_module'] = mha_search_layer['mha_module'][:] + layer_obj["mha_module"] = mha_search_layer["mha_module"][:] layer_objs.append(layer_obj) return layer_objs - + def obtain_mha_module(self, self_attention_list): """Return the attention module object (qkv & ffn's common module). @@ -747,17 +749,18 @@ def obtain_mha_module(self, self_attention_list): for idx in range(len(self_attention_list)): # get query layer name # get attn_output layer name - qkv_layer_name = self_attention_list[idx]['qkv'] - ffn_layer_name = self_attention_list[idx]['ffn'] + qkv_layer_name = self_attention_list[idx]["qkv"] + ffn_layer_name = self_attention_list[idx]["ffn"] # problematic implementations # mha_module_name = get_common_module(qkv_layer_name, ffn_layer_name) mha_module_name = get_common_module(qkv_layer_name[0], qkv_layer_name[-1]) - self_attention_list[idx]['mha_name'] = [mha_module_name] - self_attention_list[idx]['mha_module'] = [ - get_attributes(self.model, mha_module_name) for mha_module_name in self_attention_list[idx]['mha_name'] + self_attention_list[idx]["mha_name"] = [mha_module_name] + self_attention_list[idx]["mha_module"] = [ + get_attributes(self.model, mha_module_name) for mha_module_name in self_attention_list[idx]["mha_name"] ] return self_attention_list - + + class ClassifierHeadSearcher(object): """Static graph searcher for multi-head attention modules. @@ -766,7 +769,7 @@ class ClassifierHeadSearcher(object): Args: model (torch.nn.Module): The PyTorch model for searching. - + Attributes: model: The PyTorch model for searching. device: The model's current device type. @@ -780,8 +783,8 @@ def __init__(self, model): super(ClassifierHeadSearcher, self).__init__() self.model = model self.pruning_ops = ["Linear", "Conv2d"] - self.excluded_ops = ["Dropout"] # to be extended - + self.excluded_ops = ["Dropout"] # to be extended + def search(self, return_name=True): all_modules = [] all_lc_modules = [] @@ -793,8 +796,11 @@ def search(self, return_name=True): else: continue last_lc = all_lc_modules[-1] - if last_lc == all_modules[-1]: return last_lc - else: return None + if last_lc == all_modules[-1]: + return last_lc + else: + return None + class ClassifierHeadSearcherTF(object): """Static graph searcher for multi-head attention modules. @@ -804,7 +810,7 @@ class ClassifierHeadSearcherTF(object): Args: model (tf.keras.Model): The Keras model for searching. - + Attributes: model: The Keras model for searching. device: The model's current device type. @@ -818,8 +824,8 @@ def __init__(self, model): super(ClassifierHeadSearcherTF, self).__init__() self.model = model self.pruning_ops = ["Dense", "Conv2d"] - self.excluded_ops = ["Dropout"] # to be extended - + self.excluded_ops = ["Dropout"] # to be extended + def search(self, return_name=True): all_modules = [] all_lc_modules = [] @@ -831,6 +837,6 @@ def search(self, return_name=True): else: continue last_lc = all_lc_modules[-1] - if last_lc == all_modules[-1]: + if last_lc == all_modules[-1]: return last_lc - return None \ No newline at end of file + return None diff --git a/neural_compressor/compression/pruner/model_slim/weight_slim.py b/neural_compressor/compression/pruner/model_slim/weight_slim.py index 626d95b759a..169bf1ca589 100644 --- a/neural_compressor/compression/pruner/model_slim/weight_slim.py +++ b/neural_compressor/compression/pruner/model_slim/weight_slim.py @@ -16,8 +16,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..utils import torch, logger -import random +import random + +from ..utils import logger, torch # since we have to modify the attribute's name in MHA module after slim, # we need to locate them automatically. @@ -28,6 +29,7 @@ "hidden_size": ["all_head_size", "embed_dim", "hidden_size"], } + class PostCompressionUtils(object): """Operations library related to weight compression.""" @@ -36,7 +38,7 @@ def obtain_output_masks(tensor): """Obtain a lower dimension mask for an sparse weight matrix.""" # dim 1 is input channel tensor_reduce = torch.sum(tensor.abs(), 1) - mask_reduce = torch.where(tensor_reduce==0.0, 0, 1) + mask_reduce = torch.where(tensor_reduce == 0.0, 0, 1) return mask_reduce @staticmethod @@ -44,7 +46,7 @@ def obtain_input_masks(tensor): """Obtain a lower dimension mask for an sparse weight matrix.""" # dim 0 is output channel tensor_reduce = torch.sum(tensor.abs(), 0) - mask_reduce = torch.where(tensor_reduce==0.0, 0, 1) + mask_reduce = torch.where(tensor_reduce == 0.0, 0, 1) return mask_reduce @staticmethod @@ -52,10 +54,10 @@ def get_mha_output_indice(tensor, hidden_size, head_nums): """Obtain a lower dimension mask for an sparse weight matrix.""" head_size = hidden_size // head_nums tensor_reduce = torch.sum(tensor.abs(), 1) - mask_reduce = torch.where(tensor_reduce==0.0, 0, 1) + mask_reduce = torch.where(tensor_reduce == 0.0, 0, 1) mask_reduce_headwise = mask_reduce.reshape(mask_reduce.shape[0] // head_size, head_size).sum(1) / head_size mask_reduce_indice = torch.nonzero(torch.where(mask_reduce_headwise <= 0.00001, 1, 0) == 1).squeeze().tolist() - if isinstance(mask_reduce_indice, int): + if isinstance(mask_reduce_indice, int): # only one channel is pruned return [mask_reduce_indice] return mask_reduce_indice @@ -65,10 +67,10 @@ def get_mha_input_indice(tensor, hidden_size, head_nums): """Obtain a lower dimension mask for an sparse weight matrix.""" head_size = hidden_size // head_nums tensor_reduce = torch.sum(tensor.abs(), 0) - mask_reduce = torch.where(tensor_reduce==0.0, 0, 1) + mask_reduce = torch.where(tensor_reduce == 0.0, 0, 1) mask_reduce_headwise = mask_reduce.reshape(mask_reduce.shape[0] // head_size, head_size).sum(1) / head_size mask_reduce_indice = torch.nonzero(torch.where(mask_reduce_headwise <= 0.00001, 1, 0) == 1).squeeze().tolist() - if isinstance(mask_reduce_indice, int): + if isinstance(mask_reduce_indice, int): # only one channel is pruned return [mask_reduce_indice] return mask_reduce_indice @@ -92,7 +94,7 @@ def find_pruneable_indices(indice, n_heads, head_size=1, round_option=0): n_head: int. number of head to prune head_size: for head pruning, it is head size, for channel pruning, it is 1 round_option: if pruning channel number does not equals to 32x, (16x), round it to a 32x int - + Return: indice: the mask' zero-value elements indice indice_to_keep: the masks one-value elements indice @@ -118,9 +120,9 @@ def find_pruneable_indices(indice, n_heads, head_size=1, round_option=0): return indice, indice_to_keep @staticmethod - def prune_linear(layer, index, device, dim = 0, prune_bias = True): + def prune_linear(layer, index, device, dim=0, prune_bias=True): """Operation to compress a sparse linear layer's weight. - + Args: layer (torch.nn.Linear): a linear layer index (list): the indice which channels/heads should be kept @@ -128,11 +130,11 @@ def prune_linear(layer, index, device, dim = 0, prune_bias = True): prune_bias (bool): if output channel is pruned, bias should also be pruned. Return: - The same layer object whose weight (probability also bias) has been compressed. + The same layer object whose weight (probability also bias) has been compressed. """ index = index.to(device) _w = layer.weight.index_select(dim, index).clone().detach() - if layer.bias != None: + if layer.bias is not None: if prune_bias: _b = layer.bias[index].clone().detach() else: @@ -144,7 +146,7 @@ def prune_linear(layer, index, device, dim = 0, prune_bias = True): setattr(layer, "in_features", new_size[1]) setattr(layer, "out_features", new_size[0]) setattr(layer, "weight", torch.nn.Parameter(_w.clone())) - if _b != None: + if _b is not None: setattr(layer, "bias", torch.nn.Parameter(_b.clone())) else: setattr(layer, "bias", None) @@ -154,17 +156,18 @@ def prune_linear(layer, index, device, dim = 0, prune_bias = True): layer.weight.copy_(_w.contiguous()) layer.weight.requires_grad = True - if prune_bias and layer.bias != None: + if prune_bias and layer.bias is not None: layer.bias.requires_grad = False layer.bias.copy_(_b.contiguous()) layer.bias.requires_grad = True + class LinearCompression(object): """Class which automatically compresses two consecutive linear layers. - For two consecutive linear layer, when the second layer's input channel is pruned, - then the first layer's output channel can also be pruned, - while the second layer's output hidden state value is identical. + For two consecutive linear layer, when the second layer's input channel is pruned, + then the first layer's output channel can also be pruned, + while the second layer's output hidden state value is identical. for example, two consecutive linears have following structure. x = layer_1(input) x = act_fn(x) @@ -189,12 +192,12 @@ def __init__(self, root_linear, target_linears): self.target_linears = target_linears self.device = self.root_linear.weight.device self.log = { - 'root_before': [self.root_linear.out_features, self.root_linear.in_features], - 'target_before': [ + "root_before": [self.root_linear.out_features, self.root_linear.in_features], + "target_before": [ [linear_layer.out_features, linear_layer.in_features] for linear_layer in self.target_linears ], } - + def __call__(self, mask=None, round_value=32): """Operation to execute weight compression process. @@ -202,7 +205,6 @@ def __call__(self, mask=None, round_value=32): mask: the predefined mask of the second layer's input channel. if is None, the API automatically detects the sparse channel and generates the mask. round_value (int): if pruning channel number does not equals to 32x, (16x), round it to a 32x int - """ if mask is not None: root_linear_mask = mask.clone().to(self.device) @@ -210,30 +212,26 @@ def __call__(self, mask=None, round_value=32): root_linear_mask = PostCompressionUtils.obtain_input_masks(self.root_linear.weight) root_linear_indice_to_prune = PostCompressionUtils.get_mask_indices(root_linear_mask) _, root_linear_indice_to_keep = PostCompressionUtils.find_pruneable_indices( - root_linear_indice_to_prune, - self.root_linear.in_features, 1, round_value - ) # 1 refer to channel-wise pruning + root_linear_indice_to_prune, self.root_linear.in_features, 1, round_value + ) # 1 refer to channel-wise pruning # slim the root linear layer PostCompressionUtils.prune_linear( - self.root_linear, - root_linear_indice_to_keep, - device=self.device, dim=1, prune_bias=False + self.root_linear, root_linear_indice_to_keep, device=self.device, dim=1, prune_bias=False ) for target_linear in self.target_linears: PostCompressionUtils.prune_linear( - target_linear, - root_linear_indice_to_keep, - device=self.device, dim=0, prune_bias=True + target_linear, root_linear_indice_to_keep, device=self.device, dim=0, prune_bias=True ) # Summary: - self.log['root_after'] = [self.root_linear.out_features, self.root_linear.in_features] - self.log['target_after'] = [ + self.log["root_after"] = [self.root_linear.out_features, self.root_linear.in_features] + self.log["target_after"] = [ [linear_layer.out_features, linear_layer.in_features] for linear_layer in self.target_linears ] logger.info(f"linear compression: {self.log['root_before']} -> {self.log['root_after']}") - for idx in range(len(self.log['target_before'])): + for idx in range(len(self.log["target_before"])): logger.info(f"linear compression: {self.log['target_before'][idx]} -> {self.log['target_after'][idx]}") + class LinearCompressionIterator(object): """Pruner of a sequence of consecutive linear patterns. @@ -252,7 +250,6 @@ def __call__(self, masks=None, round_value=32): mask: the predefined masks of the second layers input channel. if is None, the API automatically detects the sparse channel and generates the mask. round_value (int): if pruning channel number does not equals to 32x, (16x), round it to a 32x int - """ # masks should have same length as layers patterns # self.linear_patterns: a list or dict @@ -262,7 +259,7 @@ def __call__(self, masks=None, round_value=32): mask_len = masks.shape[0] layer_idx = 0 for pattern in self.linear_patterns: - linear_pruner = LinearCompression(pattern['root_linear'], pattern['target_frontier_linears']) + linear_pruner = LinearCompression(pattern["root_linear"], pattern["target_frontier_linears"]) # if isinstance(self.linear_patterns, dict): # linear_pruner = LinearCompression(self.linear_patterns[pattern][0], self.linear_patterns[pattern][1]) # elif isinstance(self.linear_patterns, list): @@ -270,13 +267,14 @@ def __call__(self, masks=None, round_value=32): # else: # raise NotImplementedError # compression - if masks != None and layer_idx < len(masks): + if masks is not None and layer_idx < len(masks): linear_pruner(mask=masks[layer_idx], round_value=round_value) else: linear_pruner(round_value=round_value) layer_idx += 1 del linear_pruner - logger.info(f"Post pruning model slim finished.") + logger.info("Post pruning model slim finished.") + class MHACompression(object): def __init__(self, mha_object): @@ -290,14 +288,14 @@ def __init__(self, mha_object): 'qkv_module': [torch.nn.Linear, torch.nn.Linear, torch.nn.Linear], 'ffn_module': [torch.nn.Linear], 'mha_module': [torch.nn.Module] (keep not change), - } + } """ - self.qkv_name = mha_object['qkv_name'] # list - self.ffn_name = mha_object['ffn_name'] # list - self.mha_name = mha_object['mha_name'] # list - self.qkv = mha_object['qkv_module'] # list - self.ffn = mha_object['ffn_module'] # list - self.mha = mha_object['mha_module'] # list + self.qkv_name = mha_object["qkv_name"] # list + self.ffn_name = mha_object["ffn_name"] # list + self.mha_name = mha_object["mha_name"] # list + self.qkv = mha_object["qkv_module"] # list + self.ffn = mha_object["ffn_module"] # list + self.mha = mha_object["mha_module"] # list self.attributes_for_this_mha = self.check_mha_attributes(self.mha[0]) logger.info(f"Following attributes are hooked and might be modified: {self.attributes_for_this_mha}") @@ -317,7 +315,7 @@ def check_mha_attributes(self, mha: torch.nn.Module): if hasattr(mha, attr_name): attributes_for_this_mha[k] = attr_name for k, v in attributes_for_this_mha.items(): - if v == None: + if v is None: logger.warning(f"Cannot locate attributes {k} in {type(mha).__name__}, please set them manually.") raise NotImplementedError return attributes_for_this_mha @@ -328,13 +326,13 @@ def find_common_indice(self, d): common_indice = set(common_indice) & set(v) return list(common_indice) - def mask_mha_weights(self, head_mask = None): - head_size = getattr(self.mha[0], self.attributes_for_this_mha['head_size']) - head_nums = getattr(self.mha[0], self.attributes_for_this_mha['head_nums']) + def mask_mha_weights(self, head_mask=None): + head_size = getattr(self.mha[0], self.attributes_for_this_mha["head_size"]) + head_nums = getattr(self.mha[0], self.attributes_for_this_mha["head_nums"]) # check assert head_mask.numel() == head_nums, f"Module {self.mha_name}'s head num and head mask does not match." # extend the masks - ffn_mask = torch.repeat_interleave(head_mask, head_size, dim = -1) + ffn_mask = torch.repeat_interleave(head_mask, head_size, dim=-1) qkv_mask = ffn_mask.permute(1, 0) # mask the weight data for qkv_linear in self.qkv: @@ -343,29 +341,29 @@ def mask_mha_weights(self, head_mask = None): for ffn_linear in self.ffn: # 1 linears ffn_linear.weight.data = ffn_linear.weight.data * ffn_mask.to(self.device) - - def __call__(self, head_mask = None): - """ - for qkv, prune output channel, for output, prune input channel - four linear shares identical masks (attention mask) - """ + + def __call__(self, head_mask=None): + """For qkv, prune output channel, for output, prune input channel + four linear shares identical masks (attention mask)""" # obtain mha attributes - hidden_size = getattr(self.mha[0], self.attributes_for_this_mha['hidden_size']) - head_nums = getattr(self.mha[0], self.attributes_for_this_mha['head_nums']) - head_size = getattr(self.mha[0], self.attributes_for_this_mha['head_size']) + hidden_size = getattr(self.mha[0], self.attributes_for_this_mha["hidden_size"]) + head_nums = getattr(self.mha[0], self.attributes_for_this_mha["head_nums"]) + head_size = getattr(self.mha[0], self.attributes_for_this_mha["head_size"]) qkv_indice = [ PostCompressionUtils.get_mha_output_indice( - layer.weight, - hidden_size, + layer.weight, + hidden_size, head_nums, - ) for layer in self.qkv + ) + for layer in self.qkv ] ffn_indice = [ PostCompressionUtils.get_mha_input_indice( - layer.weight, - hidden_size, + layer.weight, + hidden_size, head_nums, - ) for layer in self.ffn + ) + for layer in self.ffn ] all_indice_to_prune = { "qkv": qkv_indice, @@ -374,7 +372,7 @@ def __call__(self, head_mask = None): all_indice_to_prune_list = [] for k, v in all_indice_to_prune.items(): all_indice_to_prune_list += v - + # alignment, take the least heads to prune # logger.info(all_indice_to_prune) prune_indice = self.find_common_indice(all_indice_to_prune_list) @@ -390,7 +388,7 @@ def __call__(self, head_mask = None): # Update hyper params and store pruned heads, this is critical for mha slim for mha in self.mha: - new_head_nums = getattr(mha, self.attributes_for_this_mha['head_nums']) - len(prune_indice) - new_hidden_size = getattr(mha, self.attributes_for_this_mha['head_size']) * new_head_nums - setattr(mha, self.attributes_for_this_mha['head_nums'], new_head_nums) - setattr(mha, self.attributes_for_this_mha['hidden_size'], new_hidden_size) + new_head_nums = getattr(mha, self.attributes_for_this_mha["head_nums"]) - len(prune_indice) + new_hidden_size = getattr(mha, self.attributes_for_this_mha["head_size"]) * new_head_nums + setattr(mha, self.attributes_for_this_mha["head_nums"], new_head_nums) + setattr(mha, self.attributes_for_this_mha["hidden_size"], new_hidden_size) diff --git a/neural_compressor/compression/pruner/patterns/__init__.py b/neural_compressor/compression/pruner/patterns/__init__.py index 99cc128840b..13335748b8e 100644 --- a/neural_compressor/compression/pruner/patterns/__init__.py +++ b/neural_compressor/compression/pruner/patterns/__init__.py @@ -24,16 +24,13 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) -FRAMEWORK = { - 'pytorch': 'pt', - 'keras': 'keras' -} +FRAMEWORK = {"pytorch": "pt", "keras": "keras"} -def get_pattern(config, modules, framework='pytorch'): +def get_pattern(config, modules, framework="pytorch"): """Get registered pattern class. Get a Pattern object from PATTERNS. @@ -51,7 +48,7 @@ def get_pattern(config, modules, framework='pytorch'): assert framework in FRAMEWORK.keys(), f"does not support {framework}, currently only support {FRAMEWORK.keys()}" name = config.pattern - name = name.split('_')[-1] + name = name.split("_")[-1] pattern = FRAMEWORK[framework] if "x" in name: pattern += "NxM" diff --git a/neural_compressor/compression/pruner/patterns/base.py b/neural_compressor/compression/pruner/patterns/base.py index 678cc2ca3cd..e0f72933958 100644 --- a/neural_compressor/compression/pruner/patterns/base.py +++ b/neural_compressor/compression/pruner/patterns/base.py @@ -1,4 +1,4 @@ -"""pruning patterns.""" +"""Pruning patterns.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -19,7 +19,8 @@ from collections import namedtuple import numpy as np -from ..utils import torch, tf + +from ..utils import tf, torch PATTERNS = {} @@ -45,7 +46,7 @@ def register(pattern): return register -SparsityInfo = namedtuple("SparsityInfo", ['zero_cnt', 'total_cnt', 'sparsity_ratio']) +SparsityInfo = namedtuple("SparsityInfo", ["zero_cnt", "total_cnt", "sparsity_ratio"]) class ProgressivePatternUtils(object): @@ -118,8 +119,8 @@ def update_new_added_masks(pre_masks, cur_masks): for key in pre_masks.keys(): pre_mask = pre_masks[key] cur_mask = cur_masks[key] - zero = torch.tensor([0.]).to(pre_mask.device) - one = torch.tensor([1.]).to(cur_mask.device) + zero = torch.tensor([0.0]).to(pre_mask.device) + one = torch.tensor([1.0]).to(cur_mask.device) new_added_masks[key] = torch.where(pre_mask == cur_mask, one, zero) return new_added_masks @@ -167,18 +168,16 @@ def update_progressive_masks_global_scores(pre_masks, cur_masks, scores, progres score = scores[key] new_added_filter = 1 - new_added_mask score_masked = (score * new_added_filter).abs() - zero = torch.tensor([0.]).to(score.device) - one = torch.tensor([1.]).to(score.device) + zero = torch.tensor([0.0]).to(score.device) + one = torch.tensor([1.0]).to(score.device) progressive_mask = (new_added_mask + torch.where(score_masked <= threshold, zero, one)) * pre_masks[key] progressive_masks[key] = progressive_mask return progressive_masks @staticmethod - def update_progressive_masks_local_scores(pre_masks, - cur_masks, - scores, - progressive_step, - progressive_configs): # pragma: no cover + def update_progressive_masks_local_scores( + pre_masks, cur_masks, scores, progressive_step, progressive_configs + ): # pragma: no cover """Generate the progressive masks. Args: @@ -200,11 +199,7 @@ def update_progressive_masks_local_scores(pre_masks, cur_masks_for_this = {key: cur_masks[key]} scores_for_this = {key: scores[key]} progressive_masks_for_this = ProgressivePatternUtils.update_progressive_masks_global_scores( - pre_masks_for_this, - cur_masks_for_this, - scores_for_this, - progressive_step, - progressive_configs + pre_masks_for_this, cur_masks_for_this, scores_for_this, progressive_step, progressive_configs ) progressive_masks.update(progressive_masks_for_this) return progressive_masks @@ -223,21 +218,18 @@ def update_progressive_masks_scores_order(pre_masks, cur_masks, scores, progress Returns: A dict{"layer_name": Tensor} that stores the masks generated in progressive pruning. """ - if progressive_configs['use_global']: - return ProgressivePatternUtils.update_progressive_masks_global_scores(pre_masks, cur_masks, scores, \ - progressive_step, progressive_configs) + if progressive_configs["use_global"]: + return ProgressivePatternUtils.update_progressive_masks_global_scores( + pre_masks, cur_masks, scores, progressive_step, progressive_configs + ) else: - return ProgressivePatternUtils.update_progressive_masks_local_scores(pre_masks, cur_masks, scores, \ - progressive_step, progressive_configs) + return ProgressivePatternUtils.update_progressive_masks_local_scores( + pre_masks, cur_masks, scores, progressive_step, progressive_configs + ) @staticmethod def update_progressive_masks_linear_order( - pre_masks, - cur_masks, - scores, - progressive_step, - progressive_configs: dict, - block_sizes: dict + pre_masks, cur_masks, scores, progressive_step, progressive_configs: dict, block_sizes: dict ): """Generate the progressive masks. @@ -264,14 +256,24 @@ def update_progressive_masks_linear_order( # progressive masks are generated in the direction of block's large dim. if block_size[0] >= block_size[1]: # NxM (N>=M), output channel pruning - new_shape = [shape[0] // block_size[0], progressive_steps, block_size[0] // progressive_steps, - shape[1] // block_size[1], block_size[1]] + new_shape = [ + shape[0] // block_size[0], + progressive_steps, + block_size[0] // progressive_steps, + shape[1] // block_size[1], + block_size[1], + ] new_added_mask_reshape = new_added_mask.reshape(new_shape) new_added_mask_reshape[:, progressive_step:, :, :, :] = 1.0 else: # NxM (N= gap_cnt: return need_adjust, adjust_sparsity_ratio else: @@ -488,7 +500,7 @@ def adjust_ratio(self, masks: dict, layer_name: str, key_new_sparsity: SparsityI class PytorchBasePattern(BasePattern): def __init__(self, config, modules): super().__init__(config, modules) - # If you need to use it, you can set it in example + # If you need to use it, you can set it in example # and start the environment variable: exaport CUBLAS_WORKSPACE_CONFIG=:'4096:8' # torch.use_deterministic_algorithms(True, warn_only=True) @@ -502,7 +514,7 @@ def reduce_tensor(self, data, dim): Returns: The reduced tensor. """ - name = self.config['criterion_reduce_type'] + name = self.config["criterion_reduce_type"] if name == "mean": return torch.mean(data, dim=dim) elif name == "sum": @@ -576,7 +588,7 @@ def get_sparsity_ratio_progressive(self, pre_masks, return_dict=False): zero_cnt += float(torch.sum(pre_masks[key] == 0.0).data.item()) total_cnt += float(pre_masks[key].numel()) - return (zero_cnt / total_cnt) + return zero_cnt / total_cnt def get_pattern_lock_masks(self, modules): """Obtain masks from original weight map according the pattern and weights' zero positions. @@ -617,7 +629,7 @@ def get_sparsity_ratio_each_layer(self, masks): continue reduced_mask = masks[key] if self.block else self.get_reduced_masks_from_data(masks[key], key) - zero_cnt = (int(torch.sum(reduced_mask == 0.0).data.item())) + zero_cnt = int(torch.sum(reduced_mask == 0.0).data.item()) total_cnt = int(reduced_mask.numel()) sparsity_ratio = float(zero_cnt) / total_cnt val = SparsityInfo(zero_cnt, total_cnt, sparsity_ratio) @@ -643,7 +655,7 @@ def reduce_tensor(self, data, dim): Returns: The reduced tensor. """ - name = self.config['criterion_reduce_type'] + name = self.config["criterion_reduce_type"] if name == "mean": return tf.math.reduce_mean(data, dim) elif name == "sum": @@ -667,8 +679,8 @@ def get_single_mask_per_target_ratio(self, score, exact_sparsity_ratio): k = int(exact_sparsity_ratio * flattern_score.size) threshold = np.partition(flattern_score, kth=k)[k] if not k < 1: - zero = tf.convert_to_tensor([0.]) - one = tf.convert_to_tensor([1.]) + zero = tf.convert_to_tensor([0.0]) + one = tf.convert_to_tensor([1.0]) mask = tf.where(score <= threshold, zero, one) else: mask = tf.ones_like(score.shape) @@ -704,4 +716,3 @@ def get_sparsity_ratio_each_layer(self, masks): sparsity_ratio = float(zero_cnts) / total_cnts return infos, SparsityInfo(zero_cnts, total_cnts, sparsity_ratio) - diff --git a/neural_compressor/compression/pruner/patterns/mha.py b/neural_compressor/compression/pruner/patterns/mha.py index 7897f7f2b9d..6df381635bb 100644 --- a/neural_compressor/compression/pruner/patterns/mha.py +++ b/neural_compressor/compression/pruner/patterns/mha.py @@ -15,13 +15,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .base import (register_pattern, - PytorchBasePattern - ) from ..utils import torch +from .base import PytorchBasePattern, register_pattern -@register_pattern('ptMHA') +@register_pattern("ptMHA") class PatternMHA(PytorchBasePattern): """Pruning Pattern. @@ -44,7 +42,7 @@ def __init__(self, config, modules=None): def get_masks_global(self, scores, target_sparsity_ratio, pre_masks): # gather all score items into one tensor - if target_sparsity_ratio <= .0: + if target_sparsity_ratio <= 0.0: return pre_masks flatten_score = torch.cat(list(scores.values())).flatten() k = int(target_sparsity_ratio * flatten_score.numel()) @@ -52,8 +50,8 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks): return pre_masks threshold, _ = torch.kthvalue(flatten_score, k) head_masks = {} - zero = torch.tensor([0.]).to(threshold.device) - one = torch.tensor([1.]).to(threshold.device) + zero = torch.tensor([0.0]).to(threshold.device) + one = torch.tensor([1.0]).to(threshold.device) for mha_name, mha_score in scores.items(): head_masks[mha_name] = torch.where(mha_score <= threshold, zero, one).permute(1, 0) head_masks[mha_name] = head_masks[mha_name].bool() diff --git a/neural_compressor/compression/pruner/patterns/ninm.py b/neural_compressor/compression/pruner/patterns/ninm.py index 5fce8b9723f..d31e6b5204e 100644 --- a/neural_compressor/compression/pruner/patterns/ninm.py +++ b/neural_compressor/compression/pruner/patterns/ninm.py @@ -15,14 +15,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .base import (register_pattern, - PytorchBasePattern, - SparsityInfo, - ProgressivePatternUtils) -from ..utils import logger, torch, tf, nn +from ..utils import logger, nn, tf, torch +from .base import ProgressivePatternUtils, PytorchBasePattern, SparsityInfo, register_pattern -@register_pattern('ptN:M') +@register_pattern("ptN:M") class PytorchPatternNInM(PytorchBasePattern): """Pruning Pattern. @@ -41,9 +38,9 @@ class PytorchPatternNInM(PytorchBasePattern): def __init__(self, config, modules): """Initialize the basic pruning unit of N:M pattern.""" super(PytorchPatternNInM, self).__init__(config, modules) - pattern = self.pattern.split('_')[-1] - self.N = int(pattern.split(':')[0]) - self.M = int(pattern.split(':')[1]) # m is bigger + pattern = self.pattern.split("_")[-1] + self.N = int(pattern.split(":")[0]) + self.M = int(pattern.split(":")[1]) # m is bigger self.check_layer_validity(self.modules, (self.N, self.M)) def check_layer_validity(self, datas: dict, block_size: tuple): @@ -135,8 +132,7 @@ def get_sparsity_ratio(self, pre_masks, return_dict=False): sparsity_ratio = float(zero_cnt) / total_cnt * self.N / self.M if return_dict: - return {"sparsity_ratio": sparsity_ratio, "zero_cnt": zero_cnt, - "total_cnt": total_cnt} + return {"sparsity_ratio": sparsity_ratio, "zero_cnt": zero_cnt, "total_cnt": total_cnt} else: return sparsity_ratio @@ -203,7 +199,7 @@ def reshape_reduced_to_orig(self, data, key, orig_shape): """ data = data.repeat_interleave(self.M, dim=-1) return self._reshape_2dims_to_orig(data, orig_shape) - + def get_least_ninm_masks(self, scores): least_ninm_masks = {} for key in scores.keys(): @@ -215,7 +211,6 @@ def get_least_ninm_masks(self, scores): mask = self.get_least_ninm_mask_from_data(current_score) least_ninm_masks[key] = mask return least_ninm_masks - def reduce_score(self, score, key, force=False): if not force: @@ -283,12 +278,11 @@ def get_ele_mask_per_threshold(self, score, threshold, block_size, least_ninm_ma mask = torch.where(score <= threshold, zero, one) mask = mask.repeat_interleave(block_size[1], dim=-1) # both zero will be zero - mask = (mask + least_ninm_mask) + mask = mask + least_ninm_mask mask = torch.where(mask <= 0, zero, one) return mask - def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, - keep_exact_sparsity_ratio=True): + def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, keep_exact_sparsity_ratio=True): """Generate masks for layers. Gather all layer's scores together and calculate a common threshold. @@ -327,19 +321,23 @@ def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, total_cnt = info["total_cnt"] current_sparsity_ratio = float(zero_cnt) / total_cnt key_new_sparsity = SparsityInfo(zero_cnt, total_cnt, current_sparsity_ratio) - need_adjust, adjust_ratio = self.adjust_ratio(masks, key, key_new_sparsity, - self.max_sparsity_ratio_per_op * self.M / self.N, - self.min_sparsity_ratio_per_op * self.M / self.N, - self.target_sparsity_ratio * self.M / self.N) + need_adjust, adjust_ratio = self.adjust_ratio( + masks, + key, + key_new_sparsity, + self.max_sparsity_ratio_per_op * self.M / self.N, + self.min_sparsity_ratio_per_op * self.M / self.N, + self.target_sparsity_ratio * self.M / self.N, + ) if need_adjust: self.keep_mask_layers[key] = True masks[key] = self.get_single_mask_per_target_ratio(new_scores[key], adjust_ratio) masks[key] = masks[key].repeat_interleave(self.M, dim=-1) # both zero will be zero - masks[key] = (masks[key] + least_ninm_masks[key]) - zero = torch.tensor([0.]).to(score.device) - one = torch.tensor([1.]).to(score.device) + masks[key] = masks[key] + least_ninm_masks[key] + zero = torch.tensor([0.0]).to(score.device) + one = torch.tensor([1.0]).to(score.device) masks[key] = torch.where(masks[key] <= 0, zero, one) if keep_exact_sparsity_ratio: zero_cnt = self.get_sparsity_ratio({key: masks[key]}, return_dict=True)["zero_cnt"] @@ -364,7 +362,7 @@ def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, mask = self._reshape_2dims_to_orig(mask, orig_shape) masks[key] = mask layer_ratio = torch.sum(masks[key] == 0.0).data.item() / masks[key].numel() - logger.info(f'layer {key} sparsity_ratio is {layer_ratio}') + logger.info(f"layer {key} sparsity_ratio is {layer_ratio}") return masks def get_pattern_lock_masks(self, modules): @@ -390,16 +388,18 @@ def get_pattern_lock_masks(self, modules): return pattern_lock_masks def update_progressive_masks(self, pre_masks, cur_masks, scores, progressive_step, progressive_configs): - assert progressive_configs['progressive_type'] == "scores", "N:M progressive pruning only supports 'scores'." + assert progressive_configs["progressive_type"] == "scores", "N:M progressive pruning only supports 'scores'." # we only have to handle global score or local score new_scores = {} for key in scores.keys(): new_scores[key] = self.reshape_reduced_to_orig(scores[key], key, pre_masks[key].shape) - return ProgressivePatternUtils.update_progressive_masks_scores_order(pre_masks, cur_masks, new_scores, - progressive_step, progressive_configs) - - def fasterprune(self, gpt, blocksize=128, percdamp=.01): + return ProgressivePatternUtils.update_progressive_masks_scores_order( + pre_masks, cur_masks, new_scores, progressive_step, progressive_configs + ) + + def fasterprune(self, gpt, blocksize=128, percdamp=0.01): import transformers + W = gpt.module.weight.data.clone() dev = gpt.dev rows = gpt.rows @@ -414,13 +414,13 @@ def fasterprune(self, gpt, blocksize=128, percdamp=.01): dead = torch.diag(H) == 0 H[dead, dead] = 1 W[:, dead] = 0 - + Losses = torch.zeros(rows, device=dev) - damp = percdamp * torch.mean(torch.diag(H)) # λI + damp = percdamp * torch.mean(torch.diag(H)) # λI diag = torch.arange(columns, device=dev) - H[diag, diag] += damp # H = (X*X.t() + λI) - H = torch.linalg.cholesky(H) # te default is lower triangle + H[diag, diag] += damp # H = (X*X.t() + λI) + H = torch.linalg.cholesky(H) # te default is lower triangle H = torch.cholesky_inverse(H) H = torch.linalg.cholesky(H, upper=True) Hinv = H @@ -444,14 +444,14 @@ def fasterprune(self, gpt, blocksize=128, percdamp=.01): d = Hinv1[i, i] if N != 0 and i % M == 0: - tmp = W1[:, i:(i + M)] ** 2 / (torch.diag(Hinv1)[i:(i + M)].reshape((1, -1))) ** 2 + tmp = W1[:, i : (i + M)] ** 2 / (torch.diag(Hinv1)[i : (i + M)].reshape((1, -1))) ** 2 mask1.scatter_(1, i + torch.topk(tmp, N, dim=1, largest=False)[1], True) q = w.clone() q[mask1[:, i]] = 0 Q1[:, i] = q - Losses1[:, i] = (w - q) ** 2 / d ** 2 + Losses1[:, i] = (w - q) ** 2 / d**2 err1 = (w - q) / d W1[:, i:] -= err1.unsqueeze(1).matmul(Hinv1[i, i:].unsqueeze(0)) @@ -469,5 +469,3 @@ def fasterprune(self, gpt, blocksize=128, percdamp=.01): module.weight.data = W.reshape(module.weight.shape).to(dtype=module.weight.data.dtype) if torch.cuda.is_available(): torch.cuda.empty_cache() - - diff --git a/neural_compressor/compression/pruner/patterns/nxm.py b/neural_compressor/compression/pruner/patterns/nxm.py index a5d6408a381..6403aee43e4 100644 --- a/neural_compressor/compression/pruner/patterns/nxm.py +++ b/neural_compressor/compression/pruner/patterns/nxm.py @@ -17,15 +17,11 @@ # limitations under the License. import numpy as np -from .base import (register_pattern, - PytorchBasePattern, - KerasBasePattern, - SparsityInfo, - ProgressivePatternUtils) +from ..utils import logger, nn, tf, torch +from .base import KerasBasePattern, ProgressivePatternUtils, PytorchBasePattern, SparsityInfo, register_pattern -from ..utils import logger, torch, tf, nn -@register_pattern('ptNxM') +@register_pattern("ptNxM") class PytorchPatternNxM(PytorchBasePattern): """Pruning Pattern. @@ -44,15 +40,15 @@ class PytorchPatternNxM(PytorchBasePattern): def __init__(self, config, modules): """Initialize the basic pruning unit of NXM pattern.""" super().__init__(config, modules) - pattern = self.pattern.split('_')[-1] - self.N = pattern.split('x')[0] - self.M = pattern.split('x')[1] + pattern = self.pattern.split("_")[-1] + self.N = pattern.split("x")[0] + self.M = pattern.split("x")[1] if self.N == "channel": # channel-wise pruning mode self.block_size = ["channel", int(self.M)] elif self.M == "channel": # channel-wise pruning mode self.block_size = [int(self.N), "channel"] else: - self.block_size = [int(pattern.split('x')[0]), int(pattern.split('x')[1])] + self.block_size = [int(pattern.split("x")[0]), int(pattern.split("x")[1])] self.total_params_cnt = -1 self.block_size = self.get_block_size_dict() @@ -133,9 +129,10 @@ def get_sparsity_ratio(self, pre_masks, return_dict=False): for key in pre_masks.keys(): if key in self.invalid_layers: continue - reduced_mask = pre_masks[key].float() if self.block \ - else self.get_reduced_masks_from_data(pre_masks[key].float(), key) - zero_cnt += (int(torch.sum(reduced_mask == 0.0).data.item())) + reduced_mask = ( + pre_masks[key].float() if self.block else self.get_reduced_masks_from_data(pre_masks[key].float(), key) + ) + zero_cnt += int(torch.sum(reduced_mask == 0.0).data.item()) total_cnt += int(reduced_mask.numel()) if total_cnt == 0: sparsity_ratio = 0.0 @@ -181,8 +178,7 @@ def _reshape_2dims_to_orig(self, data, orig_shape): Reshaped data. """ if len(orig_shape) == 4: - data = data.reshape(orig_shape[0], orig_shape[2], orig_shape[3], - orig_shape[1]) + data = data.reshape(orig_shape[0], orig_shape[2], orig_shape[3], orig_shape[1]) if isinstance(data, np.ndarray): # pragma: no cover data = np.transpose(data, (0, 3, 1, 2)) else: @@ -208,8 +204,7 @@ def reshape_orig_to_pattern(self, data, key): block_size = self.block_size[key] data = self._reshape_orig_to_2dims(data) shape = data.shape - new_shape = [shape[0] // block_size[0], block_size[0], shape[1] // block_size[1], - block_size[1]] + new_shape = [shape[0] // block_size[0], block_size[0], shape[1] // block_size[1], block_size[1]] data = data.reshape(new_shape) return data @@ -232,7 +227,7 @@ def reshape_reduced_to_orig(self, data, key, orig_shape): data = data.repeat_interleave(block_size[0], dim=0).repeat_interleave(block_size[1], dim=-1) data = self._reshape_2dims_to_orig(data, orig_shape) return data - + def reduce_score(self, score, key, force=False): """Recalculate the pruning score after reducing the data. @@ -289,8 +284,7 @@ def get_mask_per_threshold(self, score, threshold, block_size): mask = mask.float() return mask - def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, - keep_exact_sparsity_ratio=True): + def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, keep_exact_sparsity_ratio=True): """Generate masks for layers. Gather all layer's scores together and calculate a common threshold. @@ -312,7 +306,7 @@ def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, k_blockwise = self.update_residual_cnt(masks, cur_target_sparsity_ratio) if k_blockwise <= 0: return masks - new_scores = scores # if self.block else self.reduce_scores(scores) + new_scores = scores # if self.block else self.reduce_scores(scores) not_exceed_layers = [] residual_k = k_blockwise if self.min_sparsity_ratio_per_op > 0: @@ -335,10 +329,14 @@ def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, total_cnt = info["total_cnt"] current_sparsity_ratio = float(zero_cnt) / total_cnt key_new_sparsity = SparsityInfo(zero_cnt, total_cnt, current_sparsity_ratio) - need_adjust, adjust_ratio = self.adjust_ratio(masks, key, key_new_sparsity, - self.max_sparsity_ratio_per_op, - self.min_sparsity_ratio_per_op, - self.target_sparsity_ratio) + need_adjust, adjust_ratio = self.adjust_ratio( + masks, + key, + key_new_sparsity, + self.max_sparsity_ratio_per_op, + self.min_sparsity_ratio_per_op, + self.target_sparsity_ratio, + ) if need_adjust: # uptade status self.keep_mask_layers[key] = True @@ -350,7 +348,7 @@ def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, residual_k -= zero_cnt else: masks[key] = mask - + if not self.block: masks[key] = masks[key].bool() if not keep_exact_sparsity_ratio: @@ -366,7 +364,7 @@ def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, mask = self._reshape_2dims_to_orig(mask, orig_shape) masks[key] = mask layer_ratio = torch.sum(masks[key] == 0.0).data.item() / masks[key].numel() - logger.info(f'{key} sparsity is {layer_ratio}') + logger.info(f"{key} sparsity is {layer_ratio}") return masks def get_pattern_lock_masks(self, modules): @@ -408,8 +406,9 @@ def register_block_masks(self): module = self.modules[key] weight = module.weight if type(module).__name__ not in ["Linear"]: - logger.warning(f"Currently only support Linear block mask pruning," - f"{type(module).__name__} won't be pruned.") + logger.warning( + f"Currently only support Linear block mask pruning," f"{type(module).__name__} won't be pruned." + ) continue block_mask = self.get_reduced_masks_from_data(weight.detach(), key).to(dtype=weight.dtype) masks[key] = block_mask @@ -424,8 +423,12 @@ def mask_block_weights(self, masks): module = self.modules[key] block_size = self.block_size[key] org_shape = module.weight.shape - mask = masks[key].data.repeat_interleave(\ - block_size[0], dim=0).repeat_interleave(block_size[1], dim=-1).to(module.weight.device) + mask = ( + masks[key] + .data.repeat_interleave(block_size[0], dim=0) + .repeat_interleave(block_size[1], dim=-1) + .to(module.weight.device) + ) reshaped_weight = self._reshape_orig_to_2dims(module.weight.data) * mask module.weight.data = self._reshape_2dims_to_orig(reshaped_weight, org_shape) @@ -442,22 +445,24 @@ def update_progressive_masks(self, pre_masks, cur_masks, scores, progressive_ste Returns: A dict{"layer_name": Tensor} that stores the masks generated in progressive pruning. """ - score_or_linear = progressive_configs['progressive_type'] # "scores" or "linear" + score_or_linear = progressive_configs["progressive_type"] # "scores" or "linear" new_scores = {} for key in scores.keys(): new_scores[key] = self.reshape_reduced_to_orig(scores[key], key, pre_masks[key].shape) if score_or_linear == "scores": - return ProgressivePatternUtils.update_progressive_masks_scores_order(pre_masks, cur_masks, new_scores, - progressive_step, progressive_configs) + return ProgressivePatternUtils.update_progressive_masks_scores_order( + pre_masks, cur_masks, new_scores, progressive_step, progressive_configs + ) elif score_or_linear == "linear": - return ProgressivePatternUtils.update_progressive_masks_linear_order(pre_masks, cur_masks, new_scores, - progressive_step, progressive_configs, - self.block_size) + return ProgressivePatternUtils.update_progressive_masks_linear_order( + pre_masks, cur_masks, new_scores, progressive_step, progressive_configs, self.block_size + ) else: raise NotImplementedError - - def fasterprune(self, gpt, blocksize=128, percdamp=.01): + + def fasterprune(self, gpt, blocksize=128, percdamp=0.01): import transformers + sparsity = self.target_sparsity_ratio W = gpt.module.weight.data.clone() dev = gpt.dev @@ -475,10 +480,10 @@ def fasterprune(self, gpt, blocksize=128, percdamp=.01): W[:, dead] = 0 Losses = torch.zeros(rows, device=dev) - damp = percdamp * torch.mean(torch.diag(H)) # λI + damp = percdamp * torch.mean(torch.diag(H)) # λI diag = torch.arange(columns, device=dev) - H[diag, diag] += damp # H = (X*X.t() + λI) - H = torch.linalg.cholesky(H) # the default is lower triangle + H[diag, diag] += damp # H = (X*X.t() + λI) + H = torch.linalg.cholesky(H) # the default is lower triangle H = torch.cholesky_inverse(H) H = torch.linalg.cholesky(H, upper=True) Hinv = H @@ -492,7 +497,7 @@ def fasterprune(self, gpt, blocksize=128, percdamp=.01): Losses1 = torch.zeros_like(W1) Hinv1 = Hinv[i1:i2, i1:i2] - tmp = W1 ** 2 / (torch.diag(Hinv1).reshape((1, -1))) ** 2 + tmp = W1**2 / (torch.diag(Hinv1).reshape((1, -1))) ** 2 thresh = torch.sort(tmp.flatten())[0][int(tmp.numel() * sparsity)] mask1 = tmp <= thresh @@ -503,7 +508,7 @@ def fasterprune(self, gpt, blocksize=128, percdamp=.01): q[mask1[:, i]] = 0 Q1[:, i] = q - Losses1[:, i] = (w - q) ** 2 / d ** 2 + Losses1[:, i] = (w - q) ** 2 / d**2 err1 = (w - q) / d W1[:, i:] -= err1.unsqueeze(1).matmul(Hinv1[i, i:].unsqueeze(0)) @@ -523,7 +528,7 @@ def fasterprune(self, gpt, blocksize=128, percdamp=.01): torch.cuda.empty_cache() -@register_pattern('kerasNxM') +@register_pattern("kerasNxM") class KerasPatternNxM(KerasBasePattern): """Pruning Pattern. @@ -542,15 +547,15 @@ class KerasPatternNxM(KerasBasePattern): def __init__(self, config, modules): """Initialize the basic pruning unit of NXM pattern.""" super().__init__(config, modules) - pattern = self.pattern.split('_')[-1] - self.N = pattern.split('x')[0] - self.M = pattern.split('x')[1] + pattern = self.pattern.split("_")[-1] + self.N = pattern.split("x")[0] + self.M = pattern.split("x")[1] if self.N == "channel": # channel-wise pruning mode self.block_size = ["channel", int(self.M)] elif self.M == "channel": # channel-wise pruning mode self.block_size = [int(self.N), "channel"] else: - self.block_size = [int(pattern.split('x')[0]), int(pattern.split('x')[1])] + self.block_size = [int(pattern.split("x")[0]), int(pattern.split("x")[1])] self.total_params_cnt = -1 self.block_size = self.get_block_size_dict() @@ -670,8 +675,7 @@ def _reshape_2dims_to_orig(self, data, orig_shape): Reshaped data. """ if len(orig_shape) == 4: - data = data.reshape(orig_shape[0], orig_shape[2], orig_shape[3], - orig_shape[1]) + data = data.reshape(orig_shape[0], orig_shape[2], orig_shape[3], orig_shape[1]) if isinstance(data, np.ndarray): data = np.transpose(data, (0, 3, 1, 2)) else: @@ -691,8 +695,7 @@ def reshape_orig_to_pattern(self, data, key): block_size = self.block_size[key] data = self._reshape_orig_to_2dims(data) shape = data.shape - new_shape = [shape[0] // block_size[0], block_size[0], shape[1] // block_size[1], - block_size[1]] + new_shape = [shape[0] // block_size[0], block_size[0], shape[1] // block_size[1], block_size[1]] data = data.reshape(new_shape) return data @@ -740,8 +743,8 @@ def reduce_scores(self, scores): def get_mask_per_threshold(self, score, threshold, block_size): """Get the mask per threshold.""" - zero = tf.convert_to_tensor([0.]) - one = tf.convert_to_tensor([1.]) + zero = tf.convert_to_tensor([0.0]) + one = tf.convert_to_tensor([1.0]) mask = tf.where(score <= threshold, zero, one) if not self.block: mask = tf.repeat(mask, repeats=block_size[0], axis=0) @@ -749,8 +752,7 @@ def get_mask_per_threshold(self, score, threshold, block_size): mask = mask.numpy() return mask - def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, - keep_exact_sparsity_ratio=True): + def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, keep_exact_sparsity_ratio=True): """Generate masks for layers. Gather all layer's scores together and calculate a common threshold. @@ -794,10 +796,14 @@ def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, total_cnt = info["total_cnt"] current_sparsity_ratio = float(zero_cnt) / total_cnt key_new_sparsity = SparsityInfo(zero_cnt, total_cnt, current_sparsity_ratio) - need_adjust, adjust_ratio = self.adjust_ratio(masks, key, key_new_sparsity, - self.max_sparsity_ratio_per_op, - self.min_sparsity_ratio_per_op, - self.target_sparsity_ratio) + need_adjust, adjust_ratio = self.adjust_ratio( + masks, + key, + key_new_sparsity, + self.max_sparsity_ratio_per_op, + self.min_sparsity_ratio_per_op, + self.target_sparsity_ratio, + ) if need_adjust: # uptade status self.keep_mask_layers[key] = True @@ -822,7 +828,5 @@ def get_masks_global(self, scores, cur_target_sparsity_ratio, pre_masks, mask = self._reshape_2dims_to_orig(mask, orig_shape) masks[key] = mask layer_ratio = np.sum(masks[key] == 0.0) / masks[key].size - logger.info(f'{key} sparsity is {layer_ratio}') + logger.info(f"{key} sparsity is {layer_ratio}") return masks - - diff --git a/neural_compressor/compression/pruner/pruners/__init__.py b/neural_compressor/compression/pruner/pruners/__init__.py index 8a308fd79a9..227f879281e 100644 --- a/neural_compressor/compression/pruner/pruners/__init__.py +++ b/neural_compressor/compression/pruner/pruners/__init__.py @@ -1,4 +1,4 @@ -"""pruning patterns.""" +"""Pruning patterns.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -24,13 +24,10 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) -FRAMEWORK = { - 'pytorch': 'pt', - 'keras': 'keras' -} +FRAMEWORK = {"pytorch": "pt", "keras": "keras"} def parse_valid_pruner_types(): @@ -43,7 +40,7 @@ def parse_valid_pruner_types(): return valid_pruner_types -def get_pruner(config, modules, framework='pytorch'): +def get_pruner(config, modules, framework="pytorch"): """Get registered pruner class. Get a Pruner object from PRUNERS. @@ -59,11 +56,12 @@ def get_pruner(config, modules, framework='pytorch'): """ # do the ugly work here # check if it is doing self-multihead-attention pruning - assert framework in FRAMEWORK.keys(), \ - f"does not support {framework}, currently only support framework: {FRAMEWORK.keys()}" + assert ( + framework in FRAMEWORK.keys() + ), f"does not support {framework}, currently only support framework: {FRAMEWORK.keys()}" if "mha" in config["pattern"]: - assert framework == 'pytorch', 'cuurently mha only support pytorch framework.' + assert framework == "pytorch", "cuurently mha only support pytorch framework." return PRUNERS[f"{FRAMEWORK[framework]}_mha"](config, modules) # if enable progressive pruning or not. if "progressive" not in config["pruning_type"]: @@ -75,18 +73,18 @@ def get_pruner(config, modules, framework='pytorch'): config["progressive"] = True if name in CRITERIA: if config["progressive"] is False: - config['criterion_type'] = name + config["criterion_type"] = name if "block" in name or "free" in name: assert ":" not in config["pattern"], f"{name} pruner type does not support {config['pattern']} pattern." else: name = "basic" # return the basic pruner else: - config['criterion_type'] = name + config["criterion_type"] = name # name = "progressive" # return the progressive pruner name = "progressive" - name = f'{FRAMEWORK[framework]}_{name}' + name = f"{FRAMEWORK[framework]}_{name}" if name not in PRUNERS.keys(): assert False, f"does not support {name}, currently only support {parse_valid_pruner_types()}" return PRUNERS[name](config, modules) diff --git a/neural_compressor/compression/pruner/pruners/base.py b/neural_compressor/compression/pruner/pruners/base.py index 7e914234df9..2571988930e 100644 --- a/neural_compressor/compression/pruner/pruners/base.py +++ b/neural_compressor/compression/pruner/pruners/base.py @@ -17,7 +17,8 @@ # limitations under the License. import numpy as np -from ..utils import torch, F, tf + +from ..utils import F, tf, torch PRUNERS = {} @@ -70,7 +71,7 @@ class BasePruner: max_sparsity_ratio_per_op: A float showing the maximum sparsity ratio for every module. """ - def __init__(self, config, modules, framework='pytorch'): + def __init__(self, config, modules, framework="pytorch"): """Initialize.""" self.modules = modules self.config = config @@ -78,22 +79,21 @@ def __init__(self, config, modules, framework='pytorch'): self.masks = {} self.global_step = 0 self.handled_global_step = -1 - self.start_step = self.config['start_step'] - self.end_step = self.config['end_step'] - self.pruning_frequency = self.config['pruning_frequency'] + self.start_step = self.config["start_step"] + self.end_step = self.config["end_step"] + self.pruning_frequency = self.config["pruning_frequency"] # this is different with original code - self.total_prune_cnt = (self.end_step - self.start_step + self.pruning_frequency) \ - // self.pruning_frequency + self.total_prune_cnt = (self.end_step - self.start_step + self.pruning_frequency) // self.pruning_frequency self.completed_pruned_cnt = 0 self.total_prune_cnt -= 1 # not pruning at step 0 if self.total_prune_cnt == 0: self.total_prune_cnt = 1 self.completed_pruned_cnt = 1 - self.target_sparsity_ratio = self.config['target_sparsity'] + self.target_sparsity_ratio = self.config["target_sparsity"] self.current_sparsity_ratio = 0.0 self.init_sparsity_ratio = 0.0 - self.low_memory_usage = self.config['low_memory_usage'] + self.low_memory_usage = self.config["low_memory_usage"] def _init(self): """Auxiliary function for initializing.""" @@ -199,6 +199,7 @@ class PytorchBasePruner(BasePruner): target_sparsity_ratio: A float showing the final sparsity after pruning. max_sparsity_ratio_per_op: A float showing the maximum sparsity ratio for every module. """ + def __init__(self, config, modules): super().__init__(config, modules) for key in self.modules.keys(): @@ -244,6 +245,7 @@ class KerasBasePruner(BasePruner): target_sparsity_ratio: A float showing the final sparsity after pruning. max_sparsity_ratio_per_op: A float showing the maximum sparsity ratio for every module. """ + def __init__(self, config, modules): super().__init__(config, modules) for key in self.modules.keys(): @@ -259,4 +261,3 @@ def mask_weights(self): for key in self.modules.keys(): module = self.modules[key] module.set_weights([module.get_weights()[0] * self.masks[key]] + module.get_weights()[1:]) - diff --git a/neural_compressor/compression/pruner/pruners/basic.py b/neural_compressor/compression/pruner/pruners/basic.py index 04fea3b7cc1..003ae6cce21 100644 --- a/neural_compressor/compression/pruner/pruners/basic.py +++ b/neural_compressor/compression/pruner/pruners/basic.py @@ -16,15 +16,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .base import (register_pruner, - PytorchBasePruner, - KerasBasePruner) -from ..schedulers import get_scheduler -from ..patterns import get_pattern from ..criteria import get_criterion -from ..tf_criteria import get_tf_criterion +from ..patterns import get_pattern from ..regs import get_reg +from ..schedulers import get_scheduler +from ..tf_criteria import get_tf_criterion from ..utils import logger +from .base import KerasBasePruner, PytorchBasePruner, register_pruner @register_pruner("pt_basic") @@ -45,6 +43,7 @@ class PytorchBasicPruner(PytorchBasePruner): scheduler: A Scheduler object that defines how the model's sparsity changes as training/pruning proceeds. reg: A Reg object that defines regulization terms. """ + def __init__(self, config, modules): """Initialize.""" super().__init__(config, modules) @@ -76,7 +75,7 @@ def set_global_step(self, global_step): def update_masks(self, local_step): """Update the masks at a given local step.""" if self.global_step == self.start_step: - if self.config['lock_init_sparsity']: + if self.config["lock_init_sparsity"]: self.masks = self.pattern.get_pattern_lock_masks(self.modules) self.init_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) self.current_sparsity_ratio = self.init_sparsity_ratio @@ -88,10 +87,13 @@ def update_masks(self, local_step): return self.criterion.on_step_begin() - current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, - self.completed_pruned_cnt, - self.total_prune_cnt, self.masks, - self.init_sparsity_ratio) + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio( + self.target_sparsity_ratio, + self.completed_pruned_cnt, + self.total_prune_cnt, + self.masks, + self.init_sparsity_ratio, + ) logger.info(f"current target ratio is {current_target_sparsity_ratio}") self.completed_pruned_cnt += 1 @@ -136,9 +138,10 @@ class KerasBasicPruner(KerasBasePruner): scheduler: A Scheduler object that defines how the model's sparsity changes as training/pruning proceeds. reg: A Reg object that defines regulization terms. """ + def _init(self): """Auxiliary function for initializing.""" - self.pattern = get_pattern(self.config, self.modules, framework='keras') + self.pattern = get_pattern(self.config, self.modules, framework="keras") self.scheduler = get_scheduler(self.config) self.criterion = get_tf_criterion(self.config, self.modules) self.reg = get_reg(self.config, self.modules, self.pattern) @@ -163,7 +166,7 @@ def set_global_step(self, global_step): def update_masks(self, local_step): """Update the masks at a given local step.""" if self.global_step == self.start_step: - if self.config['lock_init_sparsity']: + if self.config["lock_init_sparsity"]: self.masks = self.pattern.get_pattern_lock_masks(self.modules) self.init_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) self.current_sparsity_ratio = self.init_sparsity_ratio @@ -175,10 +178,13 @@ def update_masks(self, local_step): return self.criterion.on_step_begin() - current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, - self.completed_pruned_cnt, - self.total_prune_cnt, self.masks, - self.init_sparsity_ratio) + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio( + self.target_sparsity_ratio, + self.completed_pruned_cnt, + self.total_prune_cnt, + self.masks, + self.init_sparsity_ratio, + ) logger.info(f"current target ratio is {current_target_sparsity_ratio}") self.completed_pruned_cnt += 1 diff --git a/neural_compressor/compression/pruner/pruners/block_mask.py b/neural_compressor/compression/pruner/pruners/block_mask.py index 887d3ea2247..3d7236d555c 100644 --- a/neural_compressor/compression/pruner/pruners/block_mask.py +++ b/neural_compressor/compression/pruner/pruners/block_mask.py @@ -1,4 +1,4 @@ -"""block mask pruner.""" +"""Block mask pruner.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -17,16 +17,16 @@ # limitations under the License. from functools import partial -from .base import (register_pruner, - PytorchBasePruner) -from ..schedulers import get_scheduler -from ..patterns import get_pattern + from ..criteria import get_criterion +from ..patterns import get_pattern from ..regs import get_reg -from ..utils import logger, torch, F +from ..schedulers import get_scheduler +from ..utils import F, logger, torch +from .base import PytorchBasePruner, register_pruner -@register_pruner('pt_block_mask') +@register_pruner("pt_block_mask") class PytorchBlockMaskPruner(PytorchBasePruner): """Pruning Pruner. @@ -62,15 +62,17 @@ def _init(self): if "channel" not in self.pattern.pattern: logger.info("Enabling channel-wise pattern would be a better choice.") - + def _rewrite_forward(self, pruner_masks): def forward(self, input): block_mask = pruner_masks[0][self.mask_name] - block_mask.requires_grad_(True) # Makesure that the gradient of block mask is always avilible - block_size = [self.weight.shape[0] // block_mask.shape[0], - self.weight.shape[1] // block_mask.shape[1]] - mask = block_mask.repeat_interleave(block_size[0], dim=0).repeat_interleave( - block_size[1], dim=-1).to(self.weight.device) + block_mask.requires_grad_(True) # Makesure that the gradient of block mask is always avilible + block_size = [self.weight.shape[0] // block_mask.shape[0], self.weight.shape[1] // block_mask.shape[1]] + mask = ( + block_mask.repeat_interleave(block_size[0], dim=0) + .repeat_interleave(block_size[1], dim=-1) + .to(self.weight.device) + ) return F.linear(input, self.weight * mask, self.bias) for key in self.masks.keys(): @@ -82,10 +84,10 @@ def _recover_forward(self): with torch.no_grad(): for key in self.masks.keys(): module = self.modules[key] - delattr(module, 'mask_name') + delattr(module, "mask_name") self.masks[key].requires_grad_(False) module.forward = partial(torch.nn.Linear.forward, module) - + # def on_step_begin(self, local_step): # """Implement at the start of each step. @@ -96,7 +98,7 @@ def _recover_forward(self): def update_masks(self, local_step): """Update the masks at a given local step.""" if self.global_step == self.start_step: - if self.config['lock_init_sparsity']: + if self.config["lock_init_sparsity"]: self.init_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) self.current_sparsity_ratio = self.init_sparsity_ratio @@ -107,10 +109,13 @@ def update_masks(self, local_step): return self.criterion.on_step_begin() - current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, - self.completed_pruned_cnt, - self.total_prune_cnt, self.masks, - self.init_sparsity_ratio) + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio( + self.target_sparsity_ratio, + self.completed_pruned_cnt, + self.total_prune_cnt, + self.masks, + self.init_sparsity_ratio, + ) logger.info(f"current target ratio is {current_target_sparsity_ratio}") self.completed_pruned_cnt += 1 @@ -118,7 +123,7 @@ def update_masks(self, local_step): return self.masks = self.pattern.get_masks(self.criterion.scores, current_target_sparsity_ratio, self.masks) self.pruner_masks[0] = self.masks - + self.mask_weights() self.current_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) diff --git a/neural_compressor/compression/pruner/pruners/mha.py b/neural_compressor/compression/pruner/pruners/mha.py index fda126430fd..93a36227c8d 100644 --- a/neural_compressor/compression/pruner/pruners/mha.py +++ b/neural_compressor/compression/pruner/pruners/mha.py @@ -1,4 +1,4 @@ -"""mha pruner.""" +"""Mha pruner.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -16,17 +16,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .base import (register_pruner, - PytorchBasePruner) -from ..schedulers import get_scheduler -from ..patterns import get_pattern from ..criteria import get_criterion -from ..utils import logger - -from ..utils import torch +from ..patterns import get_pattern +from ..schedulers import get_scheduler +from ..utils import logger, torch +from .base import PytorchBasePruner, register_pruner -@register_pruner('pt_mha') +@register_pruner("pt_mha") class PythonMultiheadAttentionPruner(PytorchBasePruner): """Pruning Pruner. @@ -71,22 +68,21 @@ def __init__(self, config, mha_modules): self.mha_modules = mha_modules self.global_step = 0 self.handled_global_step = -1 - self.start_step = self.config['start_step'] - self.end_step = self.config['end_step'] - self.pruning_frequency = self.config['pruning_frequency'] + self.start_step = self.config["start_step"] + self.end_step = self.config["end_step"] + self.pruning_frequency = self.config["pruning_frequency"] # this is different with original code - self.total_prune_cnt = (self.end_step - self.start_step + self.pruning_frequency) \ - // self.pruning_frequency + self.total_prune_cnt = (self.end_step - self.start_step + self.pruning_frequency) // self.pruning_frequency self.completed_pruned_cnt = 0 self.total_prune_cnt -= 1 # not pruning at step 0 if self.total_prune_cnt == 0: self.total_prune_cnt = 1 self.completed_pruned_cnt = 1 - self.target_sparsity_ratio = self.config['target_sparsity'] + self.target_sparsity_ratio = self.config["target_sparsity"] self.current_sparsity_ratio = 0.0 self.init_sparsity_ratio = 0.0 - self.criterion_reduce_type = self.config['criterion_reduce_type'] - self.pruning_scope = self.config['pruning_scope'] + self.criterion_reduce_type = self.config["criterion_reduce_type"] + self.pruning_scope = self.config["pruning_scope"] # ------------------------Custom attributes for MHA Pruner-------------------------------------- # main initialize process. # define some attributes. @@ -107,24 +103,24 @@ def __init__(self, config, mha_modules): def _init_mha_attrs(self): """Initialize self.mha_compressions, self.linear_layers, self.head_masks # similar to original mha slim process, but only hook mha modules and their attributes, - # do not call slim main functions. - """ + # do not call slim main functions.""" # auto slim related: head pruning objects from ..model_slim.weight_slim import MHACompression + for mha_module in self.mha_modules: # initialize self.mha_compressions mha_comp = MHACompression(mha_module) - self.mha_compressions[mha_module['mha_name'][0]] = mha_comp - head_nums_for_this_mha = getattr(mha_comp.mha[0], mha_comp.attributes_for_this_mha['head_nums']) + self.mha_compressions[mha_module["mha_name"][0]] = mha_comp + head_nums_for_this_mha = getattr(mha_comp.mha[0], mha_comp.attributes_for_this_mha["head_nums"]) # initialize head_masks # why use 1 x head_num shape? because this provides convenience for permute mask for qkv and ffn - self.head_masks[mha_module['mha_name'][0]] = torch.ones(1, head_nums_for_this_mha) + self.head_masks[mha_module["mha_name"][0]] = torch.ones(1, head_nums_for_this_mha) # initialize self.linear_layers - for idx in range(mha_module['qkv_name'].__len__()): + for idx in range(mha_module["qkv_name"].__len__()): # update qkv layers - self.linear_layers[mha_module['qkv_name'][idx]] = mha_module['qkv_module'][idx] - for idx in range(mha_module['ffn_name'].__len__()): - self.linear_layers[mha_module['ffn_name'][idx]] = mha_module['ffn_module'][idx] + self.linear_layers[mha_module["qkv_name"][idx]] = mha_module["qkv_module"][idx] + for idx in range(mha_module["ffn_name"].__len__()): + self.linear_layers[mha_module["ffn_name"][idx]] = mha_module["ffn_module"][idx] def reduce_mha_scores(self, score, dim=0): # an 2D tensor, return its compiled scores @@ -145,8 +141,8 @@ def update_mha_scores(self): for mha_name, mha_comp in self.mha_compressions.items(): device = mha_comp.device # step 0: obtain hooked attributes in mha modules - head_size = getattr(mha_comp.mha[0], mha_comp.attributes_for_this_mha['head_size']) - head_nums = getattr(mha_comp.mha[0], mha_comp.attributes_for_this_mha['head_nums']) + head_size = getattr(mha_comp.mha[0], mha_comp.attributes_for_this_mha["head_size"]) + head_nums = getattr(mha_comp.mha[0], mha_comp.attributes_for_this_mha["head_nums"]) # step 1: gather qkv and ffn which belong to same mha together qkv_scores_for_this_mha = {} ffn_scores_for_this_mha = {} @@ -165,7 +161,7 @@ def update_mha_scores(self): qkv_shape[0] // qkv_block_size[0], qkv_block_size[0], qkv_shape[1] // qkv_block_size[1], - qkv_block_size[1] + qkv_block_size[1], ] for qkv_name, qkv_score in qkv_scores_for_this_mha.items(): qkv_score_new = qkv_score.reshape(qkv_new_shape) @@ -179,7 +175,7 @@ def update_mha_scores(self): ffn_shape[0] // ffn_block_size[0], ffn_block_size[0], ffn_shape[1] // ffn_block_size[1], - ffn_block_size[1] + ffn_block_size[1], ] for ffn_name, ffn_score in ffn_scores_for_this_mha.items(): ffn_score_new = ffn_score.reshape(ffn_new_shape) @@ -188,13 +184,13 @@ def update_mha_scores(self): ffn_gather_scores += ffn_score_new # step 3: compile qkv ffn scores to obtain individual head's score self.mha_scores[mha_name] = qkv_gather_scores + ffn_gather_scores.permute(1, 0) - self.mha_scores[mha_name] /= (len(qkv_scores_for_this_mha) + len(ffn_scores_for_this_mha)) # should be 4 + self.mha_scores[mha_name] /= len(qkv_scores_for_this_mha) + len(ffn_scores_for_this_mha) # should be 4 return True def update_masks(self, local_step): """Update the masks at a given local step.""" if self.global_step == self.start_step: - if self.config['lock_init_sparsity']: + if self.config["lock_init_sparsity"]: self.masks = self.pattern.get_pattern_lock_masks(self.modules) self.init_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) self.current_sparsity_ratio = self.init_sparsity_ratio @@ -206,11 +202,13 @@ def update_masks(self, local_step): return self.criterion.on_step_begin() - current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, - self.completed_pruned_cnt, - self.total_prune_cnt, - self.head_masks, - self.init_sparsity_ratio) + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio( + self.target_sparsity_ratio, + self.completed_pruned_cnt, + self.total_prune_cnt, + self.head_masks, + self.init_sparsity_ratio, + ) logger.info(f"current target ratio is {current_target_sparsity_ratio}") self.completed_pruned_cnt += 1 diff --git a/neural_compressor/compression/pruner/pruners/pattern_lock.py b/neural_compressor/compression/pruner/pruners/pattern_lock.py index 1d6270723bc..a2786b871c4 100644 --- a/neural_compressor/compression/pruner/pruners/pattern_lock.py +++ b/neural_compressor/compression/pruner/pruners/pattern_lock.py @@ -1,4 +1,4 @@ -"""pattern lock pruner.""" +"""Pattern lock pruner.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -16,13 +16,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .base import (register_pruner, - PytorchBasePruner, - KerasBasePruner) from ..patterns import get_pattern +from .base import KerasBasePruner, PytorchBasePruner, register_pruner -@register_pruner('pt_pattern_lock') +@register_pruner("pt_pattern_lock") class PytorchPatternLockPruner(PytorchBasePruner): """Pruning Pruner. diff --git a/neural_compressor/compression/pruner/pruners/progressive.py b/neural_compressor/compression/pruner/pruners/progressive.py index 8739643986c..fe7e29a0a27 100644 --- a/neural_compressor/compression/pruner/pruners/progressive.py +++ b/neural_compressor/compression/pruner/pruners/progressive.py @@ -1,4 +1,4 @@ -"""progressive pruner.""" +"""Progressive pruner.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -18,15 +18,15 @@ import copy -from .base import register_pruner, PytorchBasePruner -from ..schedulers import get_scheduler -from ..patterns import get_pattern from ..criteria import get_criterion +from ..patterns import get_pattern from ..regs import get_reg +from ..schedulers import get_scheduler from ..utils import logger, torch +from .base import PytorchBasePruner, register_pruner -@register_pruner('pt_progressive') +@register_pruner("pt_progressive") class PytorchProgressivePruner(PytorchBasePruner): """Pruning Pruner. @@ -56,11 +56,7 @@ def _init(self): self.use_progressive = self.config["progressive"] # progressive parameters # dict passed to Pattern's functions - self.progressive_configs = { - "progressive_steps": 4, - "progressive_type": "scores", - "use_global": True - } + self.progressive_configs = {"progressive_steps": 4, "progressive_type": "scores", "use_global": True} self.progressive_steps = self.progressive_configs["progressive_steps"] self.progressive_type = self.progressive_configs["progressive_type"] self.use_global = self.progressive_configs["use_global"] @@ -116,7 +112,8 @@ def check_progressive_validity(self): if progressive_direction % self.progressive_steps != 0: raise ValueError( f"In layer {key}, its pruning pattern is {block_size}, " - f"while progressive steps {self.progressive_steps} is indivisible.") + f"while progressive steps {self.progressive_steps} is indivisible." + ) else: # score based progressive pruning, support both NxM and N:M patterns if type(self.pattern).__name__ == "PytorchPatternNxM": @@ -126,12 +123,14 @@ def check_progressive_validity(self): if total_block_size < self.progressive_steps: raise ValueError( f"In layer {key}, its pruning pattern is {block_size}, " - f"while progressive steps {self.progressive_steps} is overflowing.") + f"while progressive steps {self.progressive_steps} is overflowing." + ) elif type(self.pattern).__name__ == "PytorchPatternNInM": if self.pattern.N < self.progressive_steps: raise ValueError( f"Pruning pattern is {self.pattern.N} in {self.pattern.M}, " - f"while progressive steps {self.progressive_steps} is overflowing.") + f"while progressive steps {self.progressive_steps} is overflowing." + ) else: raise NotImplementedError @@ -154,29 +153,31 @@ def check_is_pruned_progressive_step(self, step): def update_masks_progressive(self, local_step): """Update the masks in progressive pruning mode at a given local step.""" if self.global_step == self.start_step: - if self.config['lock_init_sparsity']: + if self.config["lock_init_sparsity"]: self.masks = self.pattern.get_pattern_lock_masks(self.modules) self.init_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) self.current_sparsity_ratio = self.init_sparsity_ratio # case 1: step is not in [start_step, end_step] or it is not either pruning or progressive pruning step. if (self.check_is_pruned_step(self.global_step) is False) and ( - self.check_is_pruned_progressive_step(self.global_step) is False): + self.check_is_pruned_progressive_step(self.global_step) is False + ): return if self.current_sparsity_ratio > self.target_sparsity_ratio: return # case 2: step which does progressive update, but it is not a pruning step in case 3 - if self.check_is_pruned_progressive_step(self.global_step) \ - and self.check_is_pruned_step(self.global_step) is False: + if ( + self.check_is_pruned_progressive_step(self.global_step) + and self.check_is_pruned_step(self.global_step) is False + ): # do not do global pruning, only do the progressive mask update. step_offset = self.global_step - self.structured_update_step progressive_idx = step_offset // self.pruning_frequency_progressive if progressive_idx < (self.progressive_steps - 1): - self.progressive_masks = self.pattern.update_progressive_masks(self.pre_masks, self.masks, - self.criterion.scores, - progressive_idx + 1, - self.progressive_configs) + self.progressive_masks = self.pattern.update_progressive_masks( + self.pre_masks, self.masks, self.criterion.scores, progressive_idx + 1, self.progressive_configs + ) else: # in the end, directly use new masks. for n in self.masks.keys(): @@ -189,9 +190,9 @@ def update_masks_progressive(self, local_step): # case 3: a pruning step, generate new masks, progressive masks also update. tmp_step = self.global_step self.structured_update_step = tmp_step - current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, - self.completed_pruned_cnt, - self.total_prune_cnt, self.masks) + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio( + self.target_sparsity_ratio, self.completed_pruned_cnt, self.total_prune_cnt, self.masks + ) logger.info(f"current target ratio is {current_target_sparsity_ratio}") self.criterion.on_step_begin() self.completed_pruned_cnt += 1 @@ -201,10 +202,14 @@ def update_masks_progressive(self, local_step): self.pre_masks[n] = self.masks[n].clone() # update new masks if not self.use_progressive: - self.masks = self.pattern.get_masks(self.criterion.scores, current_target_sparsity_ratio, self.masks, ) - self.progressive_masks = self.pattern.update_progressive_masks(self.pre_masks, self.masks, - self.criterion.scores, 1, - self.progressive_configs) + self.masks = self.pattern.get_masks( + self.criterion.scores, + current_target_sparsity_ratio, + self.masks, + ) + self.progressive_masks = self.pattern.update_progressive_masks( + self.pre_masks, self.masks, self.criterion.scores, 1, self.progressive_configs + ) self.mask_weights_general(self.progressive_masks) if self.progressive_logger: self.print_progressive_sparsity() @@ -242,7 +247,7 @@ def on_after_optimizer_step(self): self.mask_weights_general(self.progressive_masks) self.global_step += 1 - + def mask_weights_general(self, input_masks): """Apply input masks to corresponding modules' weights. diff --git a/neural_compressor/compression/pruner/pruners/retrain_free.py b/neural_compressor/compression/pruner/pruners/retrain_free.py index 07a536e90c3..c79534e5f87 100644 --- a/neural_compressor/compression/pruner/pruners/retrain_free.py +++ b/neural_compressor/compression/pruner/pruners/retrain_free.py @@ -1,4 +1,4 @@ -"""retrain free pruner.""" +"""Retrain free pruner.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -17,15 +17,16 @@ # limitations under the License. from functools import partial -from .base import (register_pruner, - PytorchBasePruner) -from ..schedulers import get_scheduler -from ..patterns import get_pattern + from ..criteria import get_criterion +from ..patterns import get_pattern from ..regs import get_reg -from ..utils import logger, torch, F +from ..schedulers import get_scheduler +from ..utils import F, logger, torch +from .base import PytorchBasePruner, register_pruner -@register_pruner('pt_retrain_free') + +@register_pruner("pt_retrain_free") class PytorchRetrainFreePruner(PytorchBasePruner): """Pruning Pruner. The retrain_free pruner_class is derived from BasePruner. @@ -64,17 +65,20 @@ def _init(self): self.criterion = get_criterion(modules=self.modules, config=self.config, pattern=self.pattern, masks=self.masks) self.reg = get_reg(self.config, self.modules, self.pattern) logger.warning("Retrain-free pruner fixed the weights, please DO NOT turn on gradient update.") - assert "channel" in self.pattern.pattern, \ - "retrain-free pruner only supports large patterns like channel-wise pruning." + assert ( + "channel" in self.pattern.pattern + ), "retrain-free pruner only supports large patterns like channel-wise pruning." def _rewrite_forward(self, pruner_masks): def forward(self, input): block_mask = pruner_masks[0][self.mask_name] - block_mask.requires_grad_(True) # Makesure that the gradient of block mask is always avilible - block_size = [self.weight.shape[0] // block_mask.shape[0], - self.weight.shape[1] // block_mask.shape[1]] - mask = block_mask.repeat_interleave(block_size[0], dim=0).repeat_interleave( - block_size[1], dim=-1).to(self.weight.device) + block_mask.requires_grad_(True) # Makesure that the gradient of block mask is always avilible + block_size = [self.weight.shape[0] // block_mask.shape[0], self.weight.shape[1] // block_mask.shape[1]] + mask = ( + block_mask.repeat_interleave(block_size[0], dim=0) + .repeat_interleave(block_size[1], dim=-1) + .to(self.weight.device) + ) return F.linear(input, self.weight * mask, self.bias) for key in self.masks.keys(): @@ -86,10 +90,10 @@ def _recover_forward(self): with torch.no_grad(): for key in self.masks.keys(): module = self.modules[key] - delattr(module, 'mask_name') + delattr(module, "mask_name") self.masks[key].requires_grad_(False) module.forward = partial(torch.nn.Linear.forward, module) - + # def on_step_begin(self, local_step): # """Implement at the start of each step. @@ -100,7 +104,7 @@ def _recover_forward(self): def update_masks(self, local_step): """Update the masks at a given local step.""" if self.global_step == self.start_step: - if self.config['lock_init_sparsity']: + if self.config["lock_init_sparsity"]: self.init_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) self.current_sparsity_ratio = self.init_sparsity_ratio @@ -111,10 +115,13 @@ def update_masks(self, local_step): return self.criterion.on_step_begin() - current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, - self.completed_pruned_cnt, - self.total_prune_cnt, self.masks, - self.init_sparsity_ratio) + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio( + self.target_sparsity_ratio, + self.completed_pruned_cnt, + self.total_prune_cnt, + self.masks, + self.init_sparsity_ratio, + ) logger.info(f"current target ratio is {current_target_sparsity_ratio}") self.completed_pruned_cnt += 1 @@ -139,7 +146,7 @@ def on_step_end(self): # recover forward method at last prune step self._recover_forward() self.global_step += 1 - + def mask_weights(self): """Apply block masks to corresponding modules' weights. @@ -174,8 +181,7 @@ def rearrange_masks(self, masks): new_mask = torch.ones(len(indicies)).to(block_mask.device) new_mask[masked_indicies] = 0 - new_mask = new_mask * torch.ones_like(block_mask, - device=block_mask.device, dtype=block_mask.dtype) + new_mask = new_mask * torch.ones_like(block_mask, device=block_mask.device, dtype=block_mask.dtype) block_mask.data = new_mask.data return masks @@ -189,4 +195,3 @@ def zero_mask_grad(self): else: mask.grad.requires_grad_(False) mask.grad.zero_() - diff --git a/neural_compressor/compression/pruner/pruners/sparse_gpt.py b/neural_compressor/compression/pruner/pruners/sparse_gpt.py index a7f395f9364..260af7f1cd2 100644 --- a/neural_compressor/compression/pruner/pruners/sparse_gpt.py +++ b/neural_compressor/compression/pruner/pruners/sparse_gpt.py @@ -1,20 +1,36 @@ -from .base import (register_pruner, - PytorchBasePruner) -from ..schedulers import get_scheduler -from ..patterns import get_pattern -from ..criteria import get_criterion -from ..regs import get_reg -from ..utils import logger, torch, nn +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import gc import math -@register_pruner('pt_sparse_gpt') + +from ..criteria import get_criterion +from ..patterns import get_pattern +from ..regs import get_reg +from ..schedulers import get_scheduler +from ..utils import logger, nn, torch +from .base import PytorchBasePruner, register_pruner + + +@register_pruner("pt_sparse_gpt") class SparseGPTPruner(PytorchBasePruner): """Pruning Pruner. The sparse_gpt pruner_class is derived from PytorchBasePruner. SparseGPTPruner supports one-shot pruning of most Large Language Models(LLMs). Please refer to SparseGPT: Massive Language Models Can be Accurately Pruned in One-shot. (https://arxiv.org/abs/2301.00774) - + Args: modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. @@ -26,25 +42,27 @@ class SparseGPTPruner(PytorchBasePruner): scheduler: A Scheduler object that defines how the model's sparsity changes as training/pruning proceeds. reg: A Reg object that defines regulization terms. """ - - def __init__(self, config, modules, framework='pytorch'): + + def __init__(self, config, modules, framework="pytorch"): """Initialize.""" super(SparseGPTPruner, self).__init__(config, modules) - + def _init(self): """Initialize.""" self.pattern = get_pattern(self.config, self.modules) - + # self.criterion = get_criterion(config=self.config, modules=self.modules) gc.collect() self.gpts = {} logger.warning("sparse_gpt pruner fixed the weights, Please DO NOT train or update gradients.") - assert "1x1" in self.pattern.pattern or ":" in self.pattern.pattern, \ - "sparse_gpt pruner type only supports 1x1 and N:M patterns." - - class SparseGPT(): + assert ( + "1x1" in self.pattern.pattern or ":" in self.pattern.pattern + ), "sparse_gpt pruner type only supports 1x1 and N:M patterns." + + class SparseGPT: def __init__(self, module): import transformers + self.module = module self.dev = self.module.weight.device W = module.weight.data.clone() @@ -59,10 +77,11 @@ def __init__(self, module): def add_batch(self, inp, blocksize=1024): import transformers + if len(inp.shape) == 2: inp = inp.unsqueeze(0) - sample_num = inp.shape[0] # batchsize - if isinstance(self.module , nn.Linear) or isinstance(self.module, transformers.Conv1D): + sample_num = inp.shape[0] # batchsize + if isinstance(self.module, nn.Linear) or isinstance(self.module, transformers.Conv1D): if len(inp.shape) == 3: inp = inp.reshape((-1, inp.shape[-1])) inp = inp.t() @@ -73,22 +92,23 @@ def add_batch(self, inp, blocksize=1024): def register_gpt_hook(self, op_names): self.gpts = {} + def add_batch(gpt): def tmp(_, inp): - gpt.add_batch(inp[0].data) # get layer-wise matrix, H = (XX> + λI) + gpt.add_batch(inp[0].data) # get layer-wise matrix, H = (XX> + λI) + return tmp + handles = [] for name in op_names: module = self.modules[name] self.gpts[name] = self.SparseGPT(module) handles.append(module.register_forward_pre_hook(add_batch(self.gpts[name]))) return handles - + def fasterprune(self, op_names): with torch.no_grad(): for name in op_names: logger.info(f"module: {name}\t target ratio: {self.target_sparsity_ratio}") module = self.modules[name] - self.pattern.fasterprune(self.gpts[name]) # is there necessary to add a hyperparameter of blocksize - - + self.pattern.fasterprune(self.gpts[name]) # is there necessary to add a hyperparameter of blocksize diff --git a/neural_compressor/compression/pruner/pruning.py b/neural_compressor/compression/pruner/pruning.py index 3450649b522..37dd6c3639b 100644 --- a/neural_compressor/compression/pruner/pruning.py +++ b/neural_compressor/compression/pruner/pruning.py @@ -16,13 +16,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from neural_compressor.compression.pruner.utils import parse_to_prune, get_sparsity_ratio -from neural_compressor.compression.pruner.pruners import get_pruner -from neural_compressor.compression.pruner.utils import logger, torch, collect_layer_inputs, get_layers from typing import Optional +from neural_compressor.compression.pruner.pruners import get_pruner +from neural_compressor.compression.pruner.utils import ( + collect_layer_inputs, + get_layers, + get_sparsity_ratio, + logger, + parse_to_prune, + torch, +) + PRUNINGS = {} + def register_pruning(name): """Class decorator to register a pruning subclass to the registry. @@ -59,24 +67,25 @@ class BasePruning: pruners: A list. A list of Pruner objects. pruner_info: A config dict object that contains pruners' information. """ - + def __init__(self, config, model, opt=None): """Initialize.""" self._model = model self.pruners_info = config self.pruners = self._generate_pruners() - + def _generate_pruners(self): """Obtain Pruner objects.""" pruners = [] # model auto slim related - # assert isinstance(self._model, torch.nn.Module) # mha only for torch + # assert isinstance(self._model, torch.nn.Module) # mha only for torch from .model_slim.pattern_analyzer import SelfMHASearcher + for info in self.pruners_info: - if 'mha' in info['pattern']: + if "mha" in info["pattern"]: # head pruning pa_obj = SelfMHASearcher(self._model) - modules, _ = pa_obj.search(split_qkv_ffn = False) + modules, _ = pa_obj.search(split_qkv_ffn=False) modules = pa_obj.obtain_mha_module(modules) modules = pa_obj.from_layer_name_to_object(modules) if len(modules) == 0: @@ -89,10 +98,10 @@ def _generate_pruners(self): logger.warning("one pruner hooks no layers, please have a check") pruners.append(get_pruner(info, modules)) - info['modules'] = [key for key in modules.keys()] - info['len_of_modules'] = len(info['modules']) + info["modules"] = [key for key in modules.keys()] + info["len_of_modules"] = len(info["modules"]) logger.info(info) - + return pruners def on_train_begin(self, dataloader=None): @@ -102,12 +111,12 @@ def on_train_begin(self, dataloader=None): """ for pruner in self.pruners: pruner.on_train_begin(dataloader) - + def on_step_begin(self, local_step=0): """Implement at the beginning of every step.""" for pruner in self.pruners: pruner.on_step_begin(local_step) - + def on_step_end(self): """Implement at the end of each step.""" for pruner in self.pruners: @@ -117,7 +126,7 @@ def on_before_optimizer_step(self): """Implement before optimizer.step().""" for pruner in self.pruners: pruner.on_before_optimizer_step() - + def on_after_optimizer_step(self): """Implement after optimizer.step().""" for pruner in self.pruners: @@ -127,18 +136,18 @@ def on_epoch_begin(self, epoch): # pragma: no cover """Implement at the beginning of every epoch.""" for pruner in self.pruners: pruner.on_epoch_begin(epoch) - + def on_epoch_end(self): # pragma: no cover """Implement the end of every epoch.""" for pruner in self.pruners: pruner.on_epoch_end() - + def on_train_end(self): """Implement the end of training phase.""" for pruner in self.pruners: pruner.on_train_end() get_sparsity_ratio(self.pruners, self._model) - + @register_pruning("basic_pruning") class BasicPruning(BasePruning): @@ -151,31 +160,29 @@ def __init__(self, config, model, opt=None): class SparseGPTPruning(BasePruning): """SparseGPT Pruning The SparseGPT pruning_class is derived from BasePruning. - + Args: config: A config dict object that contains the pruner information. model: The model that need to be pruned. dataloader: Processed datasets, which is necessary for sparseGPT pruning. device: avilable device of pruning. """ - - def __init__(self, config, model, dataloader, - framework='pytorch', device: str =None): + + def __init__(self, config, model, dataloader, framework="pytorch", device: str = None): """Initialize.""" super().__init__(config, model) if device is None: self.dev = model.device else: - assert 'cpu' in device or 'cuda' in device, f"Only cpu and cuda are supported." + assert "cpu" in device or "cuda" in device, "Only cpu and cuda are supported." self.dev = torch.device(device) self._layers = [] self._dataloader = dataloader if dataloader is not None: self._prepare_pruners() - + def _prepare_pruners(self): - """One-shot post-training pruning. - """ + """One-shot post-training pruning.""" self.model_dev = self._model.device self._layers = get_layers(self._model) self._do_pruning() @@ -184,16 +191,18 @@ def _prepare_pruners(self): def _do_pruning(self): from tqdm.auto import tqdm + layers = self._layers self._model = self._model.cpu() - inputs, inp_dict = collect_layer_inputs(model=self._model, layers=layers, layer_idx=0, - layer_inputs=self._dataloader, device=self.dev) - if 'cuda' in self.dev.type: + inputs, inp_dict = collect_layer_inputs( + model=self._model, layers=layers, layer_idx=0, layer_inputs=self._dataloader, device=self.dev + ) + if "cuda" in self.dev.type: torch.cuda.empty_cache() with torch.no_grad(): for i in tqdm(range(len(layers))): layer = layers[i].to(self.dev) - layer_index_str = '.' + str(i) + '.' + layer_index_str = "." + str(i) + "." handles_list = [] for pruner in self.pruners: layer_op_names = [key for key in pruner.modules.keys() if layer_index_str in key] @@ -210,37 +219,41 @@ def _do_pruning(self): # the weights of current layer have been pruned, get the latest outputs as the inputs for next layer inputs[j] = layer(inputs[j], **inp_dict)[0] layers[i] = layer.cpu() - if 'cuda' in self.dev.type: + if "cuda" in self.dev.type: torch.cuda.empty_cache() - + def on_train_begin(self, dataloader): # pragma: no cover if self._dataloader is not None: - logger.info("The sparseGPT pruning is already done at initialization time, " \ - "calling on_train_begin() is a redundant operation.") + logger.info( + "The sparseGPT pruning is already done at initialization time, " + "calling on_train_begin() is a redundant operation." + ) elif dataloader is None: - logger.error("The sparseGPT pruning must be passed the 'dataloader' argument " \ - "when initializing or calling on_train_begin()") + logger.error( + "The sparseGPT pruning must be passed the 'dataloader' argument " + "when initializing or calling on_train_begin()" + ) self._dataloader = dataloader self._prepare_pruners() - - - + + @register_pruning("retrain_free_pruning") class RetrainFreePruning(BasePruning): - def __init__(self, config, model, dataloader=None, loss_func=None, framework='pytorch'): + def __init__(self, config, model, dataloader=None, loss_func=None, framework="pytorch"): """Initialize.""" super().__init__(config, model) self._dataloader = dataloader self._loss_func = loss_func if dataloader is not None: self._prepare_pruners() - + def _prepare_pruners(self): self._do_pruning() get_sparsity_ratio(self.pruners, self._model) def _do_pruning(self): from tqdm.auto import tqdm + length = len(self._dataloader.dataset) if self._dataloader.batch_sampler is not None: length = len(self._dataloader.batch_sampler) @@ -261,7 +274,7 @@ def _do_pruning(self): loss.backward() self.on_step_end() progress_bar.update(1) - + # def on_train_begin(self, dataloader): # if self._dataloader is not None: # logger.info("The retrain_free pruning is already done at initialization time, " \ @@ -271,6 +284,3 @@ def _do_pruning(self): # "when initializing or calling on_train_begin()") # self._dataloader = dataloader # self._prepare_pruners() - - - diff --git a/neural_compressor/compression/pruner/regs.py b/neural_compressor/compression/pruner/regs.py index 0f15f936118..68bedd219fa 100644 --- a/neural_compressor/compression/pruner/regs.py +++ b/neural_compressor/compression/pruner/regs.py @@ -24,7 +24,7 @@ def register_reg(name): """Register a regularizator to the registry. - + Args: name: A string that defines the scheduler type. @@ -41,26 +41,26 @@ def register(reg): def get_reg_type(config): """Obtain the regularizer type. - + Args: config: A config dict object that includes information of the regularizer. """ for key in REGS.keys(): ##assume there is only one reg - if config.get(key, None) != None: + if config.get(key, None) is not None: return key return None def get_reg(config, modules, pattern): """Get registered regularizator class. - + Args: config: A config dict object that includes information of the regularizer. modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. - pattern: A config dict object that includes information of the pattern. + pattern: A config dict object that includes information of the pattern. """ reg_type = config["reg_type"] - if reg_type == None: + if reg_type is None: return BaseReg(config, modules, pattern) if reg_type not in REGS.keys(): assert False, f"regularizator does not support {reg_type}, currently only support {REGS.keys()}" @@ -75,7 +75,7 @@ class BaseReg: Args: modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. config: A config dict object that includes information of the regularizer. - pattern: A config dict object that includes information of the pattern. + pattern: A config dict object that includes information of the pattern. """ def __init__(self, config: dict, modules: dict, pattern: PytorchBasePattern): @@ -103,7 +103,7 @@ class GroupLasso(BaseReg): Args: modules: A dict {"module_name": Tensor} that stores the pruning modules' weights. config: A config dict object that includes information of the regularizer. - pattern: A config dict object that includes information of the pattern. + pattern: A config dict object that includes information of the pattern. Attributes: reg_terms: A dict {"module_name": Tensor} of regularization terms. @@ -121,7 +121,7 @@ def __init__(self, config: dict, modules: dict, pattern: PytorchBasePattern, coe def on_before_optimizer_step(self): """Calculate the group-lasso score map.""" with torch.no_grad(): - if self.pattern.invalid_layers == None: + if self.pattern.invalid_layers is None: self.pattern.check_layer_validity() for key in self.modules.keys(): if key in self.pattern.invalid_layers: @@ -138,7 +138,7 @@ def on_after_optimizer_step(self): ##decoupled with grad descent for key in self.modules.keys(): if key in self.pattern.invalid_layers: continue - reg_term = self.pattern.reshape_reduced_to_orig(self.reg_terms[key], key, - self.modules[key].weight.shape) + reg_term = self.pattern.reshape_reduced_to_orig( + self.reg_terms[key], key, self.modules[key].weight.shape + ) self.modules[key].weight -= reg_term * self.modules[key].weight - diff --git a/neural_compressor/compression/pruner/schedulers.py b/neural_compressor/compression/pruner/schedulers.py index 22235918482..2c20fcfb12e 100644 --- a/neural_compressor/compression/pruner/schedulers.py +++ b/neural_compressor/compression/pruner/schedulers.py @@ -1,4 +1,4 @@ -"""scheduler module.""" +"""Scheduler module.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -23,17 +23,18 @@ def register_scheduler(name): """Class decorator used to register a Scheduler subclass to the registry. - + Decorator function used before a Scheduler subclass. Make sure that the Scheduler class decorated by this function can be registered in SCHEDULERS. - + Args: cls (class): The class of register. name: A string that defines the scheduler type. - + Returns: cls: The class of register. """ + def register(scheduler): SCHEDULERS[name] = scheduler return scheduler @@ -43,12 +44,12 @@ def register(scheduler): def get_scheduler(config): """Get registered scheduler class. - + Get a scheduler object from SCHEDULERS. - + Args: config: A config dict object that contains the scheduler information. - + Returns: A Scheduler object. """ @@ -65,14 +66,14 @@ class PruningScheduler: Mainly contains two types: 1. iterative scheduler. Prune the model from dense to target sparsity gradually. 2. one-shot scheduler. Prune the model in a single step and reach the target sparsity. - + Args: config: A config dict object that contains the scheduler information. - + Attributes: config: A config dict object that contains the scheduler information. """ - + def __init__(self, config): """Initialize.""" self.config = config @@ -82,16 +83,16 @@ def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_st raise NotImplementedError -@register_scheduler('oneshot') +@register_scheduler("oneshot") class OneshotScheduler(PruningScheduler): """Pruning Scheduler. - + A Scheduler class derived from Scheduler. Prune the model to target sparsity once. - + Args: config: A config dict object that contains the scheduler information. - + Attributes: Inherit from parent class Scheduler. """ @@ -102,13 +103,13 @@ def __init__(self, config): def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_steps, masks=None, init_ratio=0.0): """Update sparsity ratio. - + Args: target_ratio: A float representing the sparsity ratio after pruning. current_prune_step: An integer representing the current pruning step. total_prune_steps: An integer representing the total number of steps of the pruning process. masks: A dict {"module_name": Tensor} that stores the masks for modules' weights. - init_ratio: A float representing the sparsity ratio before pruning. + init_ratio: A float representing the sparsity ratio before pruning. Return: A float representing the sparsity ratio that the model will reach after the next pruning step. @@ -116,13 +117,13 @@ def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_st return target_ratio -@register_scheduler('iterative') +@register_scheduler("iterative") class IterativeScheduler(PruningScheduler): """Pruning Scheduler. - + A Scheduler class derived from Scheduler. Prune the model from dense to target sparsity in several steps. - + Args: config: A config dict object that contains the scheduler information. @@ -134,8 +135,9 @@ def __init__(self, config): """Initialize.""" super(IterativeScheduler, self).__init__(config) - def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_steps, masks, - init_sparsity_ratio=0.0): + def update_sparsity_ratio( + self, target_ratio, current_prune_step, total_prune_steps, masks, init_sparsity_ratio=0.0 + ): """Obtain new target sparsity ratio according to the step. Args: @@ -144,31 +146,35 @@ def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_st total_prune_steps: A integer. The total steps included in the pruning progress. masks: A dict{"module_name": Tensor}. The masks for modules' weights. init_sparsity_ratio: - + Returns: A float representing the target sparsity ratio the model will reach after the next pruning step. """ aggressive_ratio = target_ratio - aggressive_ratio = min(self.config.max_sparsity_ratio_per_op, - aggressive_ratio) ##legacy issue + aggressive_ratio = min(self.config.max_sparsity_ratio_per_op, aggressive_ratio) ##legacy issue decay_type = self.config.sparsity_decay_type if decay_type == "cos": current_target_sparsity = (aggressive_ratio - init_sparsity_ratio) * ( - 1.0 - math.cos(float(current_prune_step) / total_prune_steps * (math.pi / 2))) + init_sparsity_ratio + 1.0 - math.cos(float(current_prune_step) / total_prune_steps * (math.pi / 2)) + ) + init_sparsity_ratio elif decay_type == "exp": target_dense_change_ratio = ((1.0 - aggressive_ratio) / (1.0 - init_sparsity_ratio)) ** ( - 1 / total_prune_steps) - current_target_sparsity = 1.0 - ( - 1.0 - init_sparsity_ratio) * target_dense_change_ratio ** current_prune_step + 1 / total_prune_steps + ) + current_target_sparsity = ( + 1.0 - (1.0 - init_sparsity_ratio) * target_dense_change_ratio**current_prune_step + ) elif decay_type == "linear": current_target_sparsity = (aggressive_ratio - init_sparsity_ratio) * float( - current_prune_step) / total_prune_steps + init_sparsity_ratio + current_prune_step + ) / total_prune_steps + init_sparsity_ratio elif decay_type == "cube": current_target_sparsity = (aggressive_ratio - init_sparsity_ratio) * ( - (float(current_prune_step) / total_prune_steps) ** 3) + init_sparsity_ratio + (float(current_prune_step) / total_prune_steps) ** 3 + ) + init_sparsity_ratio else: assert False, "{} is not supported".format(decay_type) diff --git a/neural_compressor/compression/pruner/tf_criteria.py b/neural_compressor/compression/pruner/tf_criteria.py index 55ea57286cf..a953fb731e6 100644 --- a/neural_compressor/compression/pruner/tf_criteria.py +++ b/neural_compressor/compression/pruner/tf_criteria.py @@ -1,4 +1,4 @@ -"""tensorflow pruning criterion.""" +"""Tensorflow pruning criterion.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -69,7 +69,7 @@ def on_after_optimizer_step(self): pass -@register_criterion('magnitude') +@register_criterion("magnitude") class MagnitudeCriterion(PruningCriterion): """Pruning criterion. @@ -94,5 +94,3 @@ def on_step_begin(self): for key in self.modules.keys(): p = self.modules[key].get_weights()[0] self.scores[key] = np.abs(p) - - diff --git a/neural_compressor/compression/pruner/utils.py b/neural_compressor/compression/pruner/utils.py index 0c2b25bc99c..c65dca60742 100644 --- a/neural_compressor/compression/pruner/utils.py +++ b/neural_compressor/compression/pruner/utils.py @@ -1,4 +1,4 @@ -"""prune utils.""" +"""Prune utils.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -17,59 +17,78 @@ # limitations under the License. import re -import yaml + import numpy as np +import yaml + from ...config import WeightPruningConfig as WeightPruningConf try: - - from ...conf.pythonic_config import WeightPruningConfig - from ...conf.config import PrunerV2 - from ...utils.utility import LazyImport + from neural_compressor.conf.config import Pruner from neural_compressor.conf.dotdict import DotDict from neural_compressor.utils import logger - from neural_compressor.conf.config import Pruner - torch = LazyImport('torch') - nn = LazyImport('torch.nn') - F = LazyImport('torch.nn.functional') - tf = LazyImport('tensorflow') + + from ...conf.config import PrunerV2 + from ...conf.pythonic_config import WeightPruningConfig + from ...utils.utility import LazyImport + + torch = LazyImport("torch") + nn = LazyImport("torch.nn") + F = LazyImport("torch.nn.functional") + tf = LazyImport("tensorflow") except: + import logging + + import tensorflow as tf import torch import torch.nn as nn - import tensorflow as tf import torch.nn.functional as F - from .dot_dict import DotDict ##TODO - import logging + + from .dot_dict import DotDict # #TODO + logger = logging.getLogger(__name__) from .schema_check import PrunerV2 class WeightPruningConfig: """Similiar to torch optimizer's interface.""" - def __init__(self, pruning_configs=[{}], ##empty dict will use global values - target_sparsity=0.9, pruning_type="snip_momentum", pattern="4x1", op_names=[], - excluded_op_names=[], - start_step=0, end_step=0, pruning_scope="global", pruning_frequency=1, - min_sparsity_ratio_per_op=0.0, max_sparsity_ratio_per_op=0.98, - sparsity_decay_type="exp", pruning_op_types=['Conv', 'Linear'], - **kwargs): + def __init__( + self, + pruning_configs=[{}], ##empty dict will use global values + target_sparsity=0.9, + pruning_type="snip_momentum", + pattern="4x1", + op_names=[], + excluded_op_names=[], + start_step=0, + end_step=0, + pruning_scope="global", + pruning_frequency=1, + min_sparsity_ratio_per_op=0.0, + max_sparsity_ratio_per_op=0.98, + sparsity_decay_type="exp", + pruning_op_types=["Conv", "Linear"], + **kwargs, + ): """Init a WeightPruningConfig object.""" self.pruning_configs = pruning_configs - self._weight_compression = DotDict({ - 'target_sparsity': target_sparsity, - 'pruning_type': pruning_type, - 'pattern': pattern, - 'op_names': op_names, - 'excluded_op_names': excluded_op_names, ##global only - 'start_step': start_step, - 'end_step': end_step, - 'pruning_scope': pruning_scope, - 'pruning_frequency': pruning_frequency, - 'min_sparsity_ratio_per_op': min_sparsity_ratio_per_op, - 'max_sparsity_ratio_per_op': max_sparsity_ratio_per_op, - 'sparsity_decay_type': sparsity_decay_type, - 'pruning_op_types': pruning_op_types, - }) + self._weight_compression = DotDict( + { + "target_sparsity": target_sparsity, + "pruning_type": pruning_type, + "pattern": pattern, + "op_names": op_names, + "excluded_op_names": excluded_op_names, ##global only + "start_step": start_step, + "end_step": end_step, + "pruning_scope": pruning_scope, + "pruning_frequency": pruning_frequency, + "min_sparsity_ratio_per_op": min_sparsity_ratio_per_op, + "max_sparsity_ratio_per_op": max_sparsity_ratio_per_op, + "sparsity_decay_type": sparsity_decay_type, + "pruning_op_types": pruning_op_types, + } + ) self._weight_compression.update(kwargs) @property @@ -94,13 +113,13 @@ def get_sparsity_ratio(pruners, model): """ pattern_sparsity_cnt = 0 element_sparsity_cnt = 0 - if hasattr(model, 'model'): + if hasattr(model, "model"): model = model.model for pruner in pruners: if "MultiheadAttentionPruner" in type(pruner).__name__: logger.info("Calculate multihead-attention sparsity") - mha_total = .0 - mha_sparse = .0 + mha_total = 0.0 + mha_sparse = 0.0 for k, v in pruner.head_masks.items(): mha_total += v.numel() mha_sparse += v.numel() - torch.count_nonzero(v) @@ -113,7 +132,7 @@ def get_sparsity_ratio(pruners, model): cnt += modules[key].weight.numel() pattern_sparsity_cnt += int(cnt * sparsity_ratio) for key in pruner.masks.keys(): - block_num = 1 + block_num = 1 if pruner.pattern.block: block_size = pruner.pattern.block_size[key] block_num = block_size[0] * block_size[1] @@ -122,7 +141,7 @@ def get_sparsity_ratio(pruners, model): linear_conv_cnt = 0 param_cnt = 0 for name, module in model.named_modules(): - if type(module).__name__ in ["Linear"] or re.search(r'Conv.d', type(module).__name__) != None: + if type(module).__name__ in ["Linear"] or re.search(r"Conv.d", type(module).__name__) is not None: linear_conv_cnt += module.weight.numel() for n, param in model.named_parameters(): @@ -136,16 +155,17 @@ def get_sparsity_ratio(pruners, model): if param_cnt == 0: elementwise_over_all = 0 else: - elementwise_over_all = float( - element_sparsity_cnt) / param_cnt + elementwise_over_all = float(element_sparsity_cnt) / param_cnt logger.info( f"elementwise_over_matmul_gemm_conv:{elementwise_over_matmul_gemm_conv}," f" elementwise_over_all:{elementwise_over_all}," - f"blockwise_over_matmul_gemm_conv:{blockwise_over_matmul_gemm_conv}") + f"blockwise_over_matmul_gemm_conv:{blockwise_over_matmul_gemm_conv}" + ) return elementwise_over_matmul_gemm_conv, elementwise_over_all, blockwise_over_matmul_gemm_conv + def get_sparsity_ratio_tf(pruners, model): """Calculate sparsity ratio of a module/layer. @@ -157,7 +177,7 @@ def get_sparsity_ratio_tf(pruners, model): """ pattern_sparsity_cnt = 0 element_sparsity_cnt = 0 - if hasattr(model, 'model'): + if hasattr(model, "model"): model = model.model for pruner in pruners: modules = pruner.modules @@ -176,7 +196,7 @@ def get_sparsity_ratio_tf(pruners, model): linear_conv_cnt = 0 param_cnt = 0 for layer in model.layers: - if layer.__class__.__name__ in ["Dense"] or re.search(r'Conv.d', layer.__class__.__name__) != None: + if layer.__class__.__name__ in ["Dense"] or re.search(r"Conv.d", layer.__class__.__name__) is not None: linear_conv_cnt += layer.get_weights()[0].size for layer in model.layers: @@ -192,16 +212,17 @@ def get_sparsity_ratio_tf(pruners, model): if param_cnt == 0: elementwise_over_all = 0 else: - elementwise_over_all = float( - element_sparsity_cnt) / param_cnt + elementwise_over_all = float(element_sparsity_cnt) / param_cnt logger.info( f"elementwise_over_matmul_gemm_conv:{elementwise_over_matmul_gemm_conv}," f" elementwise_over_all:{elementwise_over_all}," - f"blockwise_over_matmul_gemm_conv:{blockwise_over_matmul_gemm_conv}") + f"blockwise_over_matmul_gemm_conv:{blockwise_over_matmul_gemm_conv}" + ) return elementwise_over_matmul_gemm_conv, elementwise_over_all, blockwise_over_matmul_gemm_conv + def check_config(prune_config): """Check if the configuration dict is valid for running Pruning object. @@ -214,23 +235,25 @@ def check_config(prune_config): Raises: AssertionError. """ - assert prune_config['start_step'] >= 0, "start_step should be greater than 0" - assert prune_config['end_step'] >= -1, "end_step should be greater than 0" - assert prune_config['end_step'] >= prune_config['start_step'], \ - "end_step should be greater than start_step" - assert prune_config['target_sparsity'] >= 0 and prune_config['target_sparsity'] < 1.0, \ - "begin_pruning_step should be in range [0,1)" - assert prune_config['pruning_frequency'] > 0, "pruning_frequency should be greater than 0" - assert prune_config['max_sparsity_ratio_per_op'] >= 0 and prune_config['max_sparsity_ratio_per_op'] < 1, \ - "pruning_frequency should be greater than 0" - assert prune_config['pruning_scope'] == "global" or prune_config['pruning_scope'] == "local", \ - "only support 'global' and 'local' prune domain" + assert prune_config["start_step"] >= 0, "start_step should be greater than 0" + assert prune_config["end_step"] >= -1, "end_step should be greater than 0" + assert prune_config["end_step"] >= prune_config["start_step"], "end_step should be greater than start_step" + assert ( + prune_config["target_sparsity"] >= 0 and prune_config["target_sparsity"] < 1.0 + ), "begin_pruning_step should be in range [0,1)" + assert prune_config["pruning_frequency"] > 0, "pruning_frequency should be greater than 0" + assert ( + prune_config["max_sparsity_ratio_per_op"] >= 0 and prune_config["max_sparsity_ratio_per_op"] < 1 + ), "pruning_frequency should be greater than 0" + assert ( + prune_config["pruning_scope"] == "global" or prune_config["pruning_scope"] == "local" + ), "only support 'global' and 'local' prune domain" try: - prune_config['resume_from_pruned_checkpoint'] = bool(prune_config['resume_from_pruned_checkpoint']) + prune_config["resume_from_pruned_checkpoint"] = bool(prune_config["resume_from_pruned_checkpoint"]) except: assert False, "resume_from_pruned_checkpoint should be bool value" if "x" in prune_config["pattern"]: - pattern = prune_config["pattern"].split('_')[-1].split('x') + pattern = prune_config["pattern"].split("_")[-1].split("x") if pattern[0] == "channel" or pattern[1] == "channel": pass else: @@ -242,7 +265,7 @@ def check_config(prune_config): assert N > 0, "N should be greater than 0" assert M > 0, "M should be greater than 0" if ":" in prune_config["pattern"]: - pattern = prune_config["pattern"].split('_')[-1].split(':') + pattern = prune_config["pattern"].split("_")[-1].split(":") try: N = int(pattern[0]) M = int(pattern[1]) @@ -251,16 +274,18 @@ def check_config(prune_config): assert N > 0, "N should be greater than 0" assert M > N, "M should be greater than N" max_ratio = float(N) / M - if prune_config['pruning_type']!="pattern_lock": - assert prune_config['target_sparsity'] <= max_ratio, \ - "in N:M pattern, the max sparsity is N/M={}".format(max_ratio) - prune_config['max_sparsity_ratio_per_op'] = min(max_ratio, prune_config['max_sparsity_ratio_per_op']) - if prune_config['reg_coeff'] != None: - prune_config['reg_coeff'] = float(prune_config['reg_coeff']) - assert prune_config['reg_coeff'] >= 0, "only support positive reg_type" - assert prune_config["min_sparsity_ratio_per_op"] >= 0 and prune_config["min_sparsity_ratio_per_op"] <= \ - prune_config['max_sparsity_ratio_per_op'], \ - "min_sparsity_ratio_per_op should in[0, max_sparsity_ratio_per_op]" + if prune_config["pruning_type"] != "pattern_lock": + assert prune_config["target_sparsity"] <= max_ratio, "in N:M pattern, the max sparsity is N/M={}".format( + max_ratio + ) + prune_config["max_sparsity_ratio_per_op"] = min(max_ratio, prune_config["max_sparsity_ratio_per_op"]) + if prune_config["reg_coeff"] is not None: + prune_config["reg_coeff"] = float(prune_config["reg_coeff"]) + assert prune_config["reg_coeff"] >= 0, "only support positive reg_type" + assert ( + prune_config["min_sparsity_ratio_per_op"] >= 0 + and prune_config["min_sparsity_ratio_per_op"] <= prune_config["max_sparsity_ratio_per_op"] + ), "min_sparsity_ratio_per_op should in[0, max_sparsity_ratio_per_op]" def reset_none_to_default(obj, key, default): @@ -271,19 +296,20 @@ def reset_none_to_default(obj, key, default): key: A string representing the key in obj. default: When the key is not in obj, add key by the default item in original obj. """ - if obj == None: + if obj is None: return None if isinstance(obj, dict): - if (not key in obj.keys()) or obj[key] == None: + if (key not in obj.keys()) or obj[key] is None: return default else: return obj[key] else: - if not hasattr(obj, key) or getattr(obj, key) == None: + if not hasattr(obj, key) or getattr(obj, key) is None: return default else: return getattr(obj, key) + def update_params(info): """Update parameters.""" if "parameters" in info.keys(): @@ -309,7 +335,6 @@ def process_weight_config(global_config, local_configs, default_config): default_all[key] = reset_none_to_default(default_all, key, default_config[key]) if len(local_configs) == 0: ##only one - update_params(default_all) check_config(default_all) pruner_info = DotDict(default_all) @@ -360,6 +385,7 @@ def process_yaml_config(global_config, local_configs, default_config): return pruners_info + def check_key_validity(template_config, user_config): """Check the validity of keys. @@ -367,6 +393,7 @@ def check_key_validity(template_config, user_config): template_config: A default config dict object that contains pruning parameters and configurations. user_config: A user config dict object that contains pruning parameters and configurations. """ + def check_key_validity_dict(template_config, usr_cfg_dict): """Check the validity of keys in the dict.. @@ -404,22 +431,36 @@ def check_key_validity_prunerv2(template_config, usr_cfg_dict): check_key_validity_prunerv2(template_config, user_config) return + def process_and_check_config(val): """Process and check configurations. Args: val: A dict that contains the layer-specific pruning configurations. """ - default_global_config = {'target_sparsity': 0.9, 'pruning_type': 'snip_momentum', 'pattern': '4x1', 'op_names': [], - 'excluded_op_names': [], - 'start_step': 0, 'end_step': 0, 'pruning_scope': 'global', 'pruning_frequency': 1, - 'min_sparsity_ratio_per_op': 0.0, 'max_sparsity_ratio_per_op': 0.98, - 'sparsity_decay_type': 'exp', "criterion_type": "snip_momentum", - 'pruning_op_types': ['Conv', 'Linear'], - 'low_memory_usage': False - } - default_local_config = {'resume_from_pruned_checkpoint': False, 'reg_type': None, - 'criterion_reduce_type': "mean", 'parameters': {"reg_coeff": 0.0}} + default_global_config = { + "target_sparsity": 0.9, + "pruning_type": "snip_momentum", + "pattern": "4x1", + "op_names": [], + "excluded_op_names": [], + "start_step": 0, + "end_step": 0, + "pruning_scope": "global", + "pruning_frequency": 1, + "min_sparsity_ratio_per_op": 0.0, + "max_sparsity_ratio_per_op": 0.98, + "sparsity_decay_type": "exp", + "criterion_type": "snip_momentum", + "pruning_op_types": ["Conv", "Linear"], + "low_memory_usage": False, + } + default_local_config = { + "resume_from_pruned_checkpoint": False, + "reg_type": None, + "criterion_reduce_type": "mean", + "parameters": {"reg_coeff": 0.0}, + } params_default_config = {"reg_coeff": 0.0} @@ -441,6 +482,7 @@ def process_and_check_config(val): check_key_validity(default_config, global_configs) return process_yaml_config(global_configs, pruning_configs, default_config) + def process_config(config): """Obtain a config dict object from the config file. @@ -452,27 +494,24 @@ def process_config(config): """ if isinstance(config, str): try: - with open(config, 'r') as f: + with open(config, "r") as f: content = f.read() val = yaml.safe_load(content) ##schema.validate(val) return process_and_check_config(val) except FileNotFoundError as f: logger.error("{}.".format(f)) - raise RuntimeError( - "The yaml file is not exist. Please check the file name or path." - ) + raise RuntimeError("The yaml file is not exist. Please check the file name or path.") except Exception as e: logger.error("{}.".format(e)) - raise RuntimeError( - "The yaml file format is not correct. Please refer to document." - ) + raise RuntimeError("The yaml file format is not correct. Please refer to document.") if isinstance(config, WeightPruningConfig) or isinstance(config, WeightPruningConf): return process_and_check_config(config) else: assert False, f"not supported type {config}" + def parse_last_linear(model): """Locate the last linear layers of the model. While pruning, the final linear often acts like classifier head, which might cause @@ -482,10 +521,12 @@ def parse_last_linear(model): model: The model to be pruned. """ from .model_slim.pattern_analyzer import ClassifierHeadSearcher + searcher = ClassifierHeadSearcher(model) layer = searcher.search(return_name=True) return layer + def parse_last_linear_tf(model): """Locate the last linear layers of the model. While pruning, the final linear often acts like classifier head, which might cause @@ -495,10 +536,12 @@ def parse_last_linear_tf(model): model(tf.keras.Model): The model to be pruned. """ from .model_slim.pattern_analyzer import ClassifierHeadSearcherTF + searcher = ClassifierHeadSearcherTF(model) layer = searcher.search(return_name=True) return layer + def parse_to_prune(config, model): """Keep target pruned layers. @@ -509,10 +552,10 @@ def parse_to_prune(config, model): modules = {} # additional function: exclude last layer (often a classifier head and not suitable to be pruned) classifier_head_name = parse_last_linear(model) - if classifier_head_name != None: + if classifier_head_name is not None: config["excluded_op_names"].append(classifier_head_name) # locate target layers - if config["op_names"] == None or config["op_names"] == []: + if config["op_names"] is None or config["op_names"] == []: config["op_names"] = [".*"] for raw in config["op_names"]: try: @@ -537,6 +580,7 @@ def parse_to_prune(config, model): new_modules[name] = modules[name] return new_modules + def parse_to_prune_tf(config, model): """Keep target pruned layers. @@ -547,10 +591,10 @@ def parse_to_prune_tf(config, model): modules = {} # additional function: exclude last layer (often a classifier head and not suitable to be pruned) classifier_head_name = parse_last_linear_tf(model) - if classifier_head_name != None: + if classifier_head_name is not None: config["excluded_op_names"].append(classifier_head_name) # locate target layers - if config["op_names"] == None or config["op_names"] == []: + if config["op_names"] is None or config["op_names"] == []: config["op_names"] = [".*"] for layer in model.layers: @@ -571,6 +615,7 @@ def parse_to_prune_tf(config, model): new_modules[name] = modules[name] return new_modules + def generate_pruner_config(info): """Generate pruner config object from prune information. @@ -580,17 +625,19 @@ def generate_pruner_config(info): Returns: pruner: A pruner config object. """ - return Pruner(initial_sparsity=0, - method=info.method, - target_sparsity=info.target_sparsity, - start_epoch=info.start_step, - end_epoch=info.end_step, - update_frequency=info.pruning_frequency, - ) + return Pruner( + initial_sparsity=0, + method=info.method, + target_sparsity=info.target_sparsity, + start_epoch=info.start_step, + end_epoch=info.end_step, + update_frequency=info.pruning_frequency, + ) + def get_layers(model): """Get each layer's name and its module. - + Args: model: The model to be pruned. @@ -598,19 +645,20 @@ def get_layers(model): """ layers = [] search_flag = False + def unfoldLayer(module): """Unfold each layer. - + Args: module: The modules. - + Returns: The ModuleList of model """ nonlocal search_flag nonlocal layers if search_flag: return - if hasattr(type(module),"__name__") and 'ModuleList' in type(module).__name__: + if hasattr(type(module), "__name__") and "ModuleList" in type(module).__name__: layers = module search_flag = True layer_list = list(module.named_children()) @@ -622,13 +670,14 @@ def unfoldLayer(module): unfoldLayer(model) return layers -def collect_layer_inputs(model, layers, layer_idx, layer_inputs, device='cuda:0'): + +def collect_layer_inputs(model, layers, layer_idx, layer_inputs, device="cuda:0"): """Getting the forward input of a layer. - + Args: model: The model to be pruned. layers: Selectable layers of the model. - layer_idx: The layer index. + layer_idx: The layer index. layer_inputs: The dataloader or the output of the previous layer. device: Specify the type of device to return. Returns: input list. @@ -637,33 +686,35 @@ def collect_layer_inputs(model, layers, layer_idx, layer_inputs, device='cuda:0' model_dev = model.device attention_mask = None # 'alibi' is a necessary attribute for the bloom models - inputs_info = {'attention_mask': None} - if hasattr(model, 'config'): + inputs_info = {"attention_mask": None} + if hasattr(model, "config"): model_type = model.config.model_type - else : - model_type = 'null' - if 'bloom' in model_type: - inputs_info['alibi'] = None - + else: + model_type = "null" + if "bloom" in model_type: + inputs_info["alibi"] = None + with torch.no_grad(): if layer_idx == 0: layer = layers[layer_idx] + def forward(self, hidden_states, **kwargs): # TODO solve the problem of batchsize!=1 inputs.append(hidden_states.to(device)) - inputs_info['attention_mask'] = kwargs['attention_mask'] - if 'alibi' in kwargs.keys(): - inputs_info['alibi'] = kwargs['alibi'] + inputs_info["attention_mask"] = kwargs["attention_mask"] + if "alibi" in kwargs.keys(): + inputs_info["alibi"] = kwargs["alibi"] raise ValueError - + forward_cache = layers[layer_idx].forward from functools import partial + layer.forward = partial(forward, layer) for batch in layer_inputs: try: - if 'values' in dir(batch): + if "values" in dir(batch): hidden_states = list(batch.values())[0].to(model_dev) - else : + else: hidden_states = batch[0].to(model_dev).to(model_dev) model(hidden_states) # model(**batch) @@ -674,13 +725,11 @@ def forward(self, hidden_states, **kwargs): if inputs_info[key] is not None: inputs_info[key] = inputs_info[key].to(device) else: - prev_layer = layers[layer_idx-1] - + prev_layer = layers[layer_idx - 1] + for batch in layer_inputs: prev_output = prev_layer(*batch) batch[0] = prev_output[0] inputs.append(batch) - - return inputs, inputs_info - + return inputs, inputs_info diff --git a/neural_compressor/conf/__init__.py b/neural_compressor/conf/__init__.py index 96a9dc31f6b..5193c828c62 100644 --- a/neural_compressor/conf/__init__.py +++ b/neural_compressor/conf/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index e7ffa8a2dcc..12d046cfc28 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -15,19 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy +import datetime +import itertools +import os +import re +from collections import OrderedDict + import yaml -from schema import Schema, And, Use, Optional, Or, Hook +from schema import And, Hook, Optional, Or, Schema, Use from ..adaptor import FRAMEWORKS from ..objective import OBJECTIVES from ..utils import logger from ..version import __version__ -import re -import copy -import itertools -from collections import OrderedDict from .dotdict import DotDict, deep_set -import os, datetime + # TODO WA for avoid circular import # from ..experimental.strategy import EXP_STRATEGIES EXP_STRATEGIES = ['basic', 'auto_mixed_precision', 'bayesian', 'conservative',\ @@ -83,9 +86,7 @@ def __init__(self, start_epoch=None, end_epoch=None, initial_sparsity=None, @constructor_register class PrunerV2: - """ - similiar to torch optimizer's interface - """ + """Similiar to torch optimizer's interface.""" def __init__(self, target_sparsity=None, pruning_type=None, pattern=None, op_names=None, @@ -1325,15 +1326,14 @@ def percent_to_float(data): 'loss_weights': [0.5, 0.5]}}}}): dict, Optional('evaluation', default={'accuracy': {'metric': {'topk': 1}}}): dict - + }) class Conf(object): - """config parser. + """Config parser. Args: cfg_fname (string): The path to the configuration file. - """ def __init__(self, cfg_fname): assert cfg_fname is not None @@ -1342,8 +1342,8 @@ def __init__(self, cfg_fname): def _read_cfg(self, cfg_fname): """Load a config file following yaml syntax. - Args: - cfg_fname(string): The name of configuration yaml file + Args: + cfg_fname(string): The name of configuration yaml file """ try: with open(cfg_fname, 'r') as f: @@ -1540,11 +1540,10 @@ def _convert_cfg(self, src, dst): return dst class Quantization_Conf(Conf): - """config parser. + """Config parser. Args: cfg: The path to the configuration file or DotDict object or None. - """ def __init__(self, cfg=None): @@ -1603,11 +1602,10 @@ def modelwise_tune_space(self, model_wise_quant): return self._model_wise_tune_space class Pruning_Conf(Conf): - """config parser. + """Config parser. Args: cfg: The path to the configuration file or DotDict object or None. - """ def __init__(self, cfg=None): @@ -1621,11 +1619,10 @@ def __init__(self, cfg=None): self.usr_cfg = DotDict(pruning_default_schema.validate(dict())) class Graph_Optimization_Conf(Quantization_Conf): - """config parser. + """Config parser. Args: cfg: The path to the configuration file or DotDict object or None. - """ def __init__(self, cfg=None): @@ -1638,11 +1635,10 @@ def __init__(self, cfg=None): self.usr_cfg = DotDict(graph_optimization_default_schema.validate(dict())) class MixedPrecision_Conf(Quantization_Conf): - """config parser. + """Config parser. Args: cfg: The path to the configuration file or DotDict object or None. - """ def __init__(self, cfg=None): @@ -1655,11 +1651,10 @@ def __init__(self, cfg=None): self.usr_cfg = DotDict(mixed_precision_default_schema.validate(dict())) class Benchmark_Conf(Conf): - """config parser. + """Config parser. Args: cfg: The path to the configuration file or DotDict object or None. - """ def __init__(self, cfg=None): @@ -1672,11 +1667,10 @@ def __init__(self, cfg=None): self.usr_cfg = DotDict(benchmark_default_schema.validate(dict())) class Distillation_Conf(Conf): - """config parser. + """Config parser. Args: cfg: The path to the configuration file or DotDict object or None. - """ def __init__(self, cfg=None): @@ -1689,12 +1683,11 @@ def __init__(self, cfg=None): self.usr_cfg = DotDict(distillation_default_schema.validate(dict())) class NASConfig(Conf): - """config parser. + """Config parser. Args: approach: The approach of the NAS. search_algorithm: The search algorithm for NAS procedure. - """ def __init__(self, approach=None, search_space=None, search_algorithm=None): diff --git a/neural_compressor/conf/dotdict.py b/neural_compressor/conf/dotdict.py index 1e9868363f1..6d474dab694 100644 --- a/neural_compressor/conf/dotdict.py +++ b/neural_compressor/conf/dotdict.py @@ -17,45 +17,44 @@ from functools import reduce -def deep_get(dictionary, keys, default=None): - """get the dot key's item in nested dict +def deep_get(dictionary, keys, default=None): + """Get the dot key's item in nested dict eg person = {'person':{'name':{'first':'John'}}} - deep_get(person, "person.name.first") will output 'John' - - Args: - dictionary (dict): The dict object to get keys - keys (dict): The deep keys - default (object): The return item if key not exists - Returns: - item: the item of the deep dot keys + deep_get(person, "person.name.first") will output 'John'. + + Args: + dictionary (dict): The dict object to get keys + keys (dict): The deep keys + default (object): The return item if key not exists + Returns: + item: the item of the deep dot keys """ - return reduce(lambda d, key: d.get(key, default) \ - if isinstance(d, dict) else default, keys.split("."), dictionary) + return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys.split("."), dictionary) -def deep_set(dictionary, keys, value): - """set the dot key's item in nested dict +def deep_set(dictionary, keys, value): + """Set the dot key's item in nested dict eg person = {'person':{'name':{'first':'John'}}} deep_set(person, "person.sex", 'male') will output {'person': {'name': {'first': 'John'}, 'sex': 'male'}} - Args: - dictionary (dict): The dict object to get keys - keys (dict): The deep keys - value (object): The value of the setting key + Args: + dictionary (dict): The dict object to get keys + keys (dict): The deep keys + value (object): The value of the setting key """ - keys = keys.split('.') + keys = keys.split(".") for key in keys[:-1]: dictionary = dictionary.setdefault(key, DotDict()) dictionary[keys[-1]] = value + class DotDict(dict): - """access yaml using attributes instead of using the dictionary notation. + """Access yaml using attributes instead of using the dictionary notation. Args: value (dict): The dict object to access. - """ def __init__(self, value=None): @@ -65,7 +64,7 @@ def __init__(self, value=None): for key in value: self.__setitem__(key, value[key]) else: - raise TypeError('expected dict') + raise TypeError("expected dict") def __getitem__(self, key): value = self.get(key, None) @@ -74,11 +73,9 @@ def __getitem__(self, key): def __setitem__(self, key, value): if isinstance(value, dict) and not isinstance(value, DotDict): value = DotDict(value) - if isinstance(value, list) and len(value) == 1 and isinstance( - value[0], dict): + if isinstance(value, list) and len(value) == 1 and isinstance(value[0], dict): value = DotDict(value[0]) - if isinstance(value, list) and len(value) > 1 and all(isinstance( - v, dict) for v in value): + if isinstance(value, list) and len(value) > 1 and all(isinstance(v, dict) for v in value): value = DotDict({k: v for d in value for k, v in d.items()}) super(DotDict, self).__setitem__(key, value) @@ -89,4 +86,3 @@ def __setstate__(self, d): self.__dict__.update(d) __setattr__, __getattr__ = __setitem__, __getitem__ - diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index fe3c8b8a021..1d19089c15e 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -15,11 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. """Configs for Neural Compressor 1.x.""" -import logging import datetime -from schema import Schema, And, Optional -from .dotdict import DotDict +import logging +from schema import And, Optional, Schema + +from .dotdict import DotDict logger = logging.getLogger("neural_compressor") default_workspace = './nc_workspace/{}/'.format( @@ -116,7 +117,6 @@ class Options: set_workspace("workspace_path") set_resume_from("workspace_path") set_tensorboard(True) - """ def __init__(self, random_seed=1978, workspace=default_workspace, resume_from=None, tensorboard=False): @@ -187,7 +187,7 @@ class AccuracyCriterion: from neural_compressor.config import AccuracyCriterion accuracy_criterion = AccuracyCriterion( - higher_is_better=True, # optional. + higher_is_better=True, # optional. criterion='relative', # optional. Available values are 'relative' and 'absolute'. tolerable_loss=0.01, # optional. ) @@ -340,7 +340,7 @@ class _BaseQuantizationConfig: excluded_precisions: Precisions to be excluded, Default value is empty list. Neural compressor enable the mixed precision with fp32 + bf16 + int8 by default. If you want to disable bf16 data type, you can specify excluded_precisions = ['bf16]. - quant_level: Support auto, 0 and 1, 0 is conservative strategy, 1 is basic or user-specified + quant_level: Support auto, 0 and 1, 0 is conservative strategy, 1 is basic or user-specified strategy, auto (default) is the combination of 0 and 1. accuracy_criterion: Accuracy constraint settings. use_distributed_tuning: Whether use distributed tuning or not. @@ -786,7 +786,7 @@ def keys(self): def __getitem__(self, item): """Get the dict.""" return getattr(self, item) - + @property def backend(self): """Get backend.""" @@ -904,7 +904,7 @@ def intra_num_of_threads(self, intra_num_of_threads): def model(self): """Get model.""" return self._model - + @model.setter def model(self, model): """Set model.""" @@ -920,12 +920,12 @@ def model_name(self, model_name): """Set model name.""" if _check_value("model_name", model_name, str): self._model_name = model_name - + @property def framework(self): """Set framework.""" return self._framework - + @framework.setter def framework(self, framework): """Get framework.""" @@ -991,7 +991,7 @@ def approach(self, approach): class WeightPruningConfig: """Config Class for Pruning. Define a single or a sequence of pruning configs. - + Args: pruning_configs (list of dicts, optional): Local pruning configs only valid to linked layers. Parameters defined out of pruning_configs are valid for all layers. @@ -1000,8 +1000,8 @@ class WeightPruningConfig: target_sparsity (float, optional): Sparsity ratio the model can reach after pruning. Supports a float between 0 and 1. Default to 0.90. - pruning_type (str, optional): A string define the criteria for pruning. - Supports "magnitude", "snip", "snip_momentum", + pruning_type (str, optional): A string define the criteria for pruning. + Supports "magnitude", "snip", "snip_momentum", "magnitude_progressive", "snip_progressive", "snip_momentum_progressive", "pattern_lock" Default to "snip_momentum", which is the most feasible pruning criteria under most situations. pattern (str, optional): Sparsity's structure (or unstructure) types. @@ -1017,15 +1017,15 @@ class WeightPruningConfig: end_step: (int, optional): The step to end pruning. Supports an integer. Default to 0. - pruning_scope (str, optional): Determine layers' scores should be gather together to sort - Supports "global" and "local". + pruning_scope (str, optional): Determine layers' scores should be gather together to sort + Supports "global" and "local". Default: "global", since this leads to less accuracy loss. pruning_frequency: the frequency of pruning operation. Supports an integer. Default to 1. min_sparsity_ratio_per_op (float, optional): Minimum restriction for every layer's sparsity. Supports a float between 0 and 1. - Default to 0.0. + Default to 0.0. max_sparsity_ratio_per_op (float, optional): Maximum restriction for every layer's sparsity. Supports a float between 0 and 1. Default to 0.98. @@ -1151,7 +1151,7 @@ class KnowledgeDistillationLossConfig: loss_types (list[str], optional): loss types, should be a list of length 2. First item is the loss type for student model output and groundtruth label, second item is the loss type for student model output and teacher model output. - Supported tpyes for first item are "CE", "MSE". + Supported tpyes for first item are "CE", "MSE". Supported tpyes for second item are "CE", "MSE", "KL". Defaults to ['CE', 'CE']. loss_weights (list[float], optional): loss weights, should be a list of length 2 and sum to 1.0. diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 940c9c24b1c..2d0d72fe463 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -17,12 +17,13 @@ """Configs for Neural Compressor 2.x.""" import datetime import logging -from schema import Schema, And, Optional, Or + +from schema import And, Optional, Or, Schema + from .utils import alias_param logger = logging.getLogger("neural_compressor") -default_workspace = './nc_workspace/{}/'.format( - datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) +default_workspace = "./nc_workspace/{}/".format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) QUANTMAPPING = { "auto": "post_training_auto_quant", @@ -32,43 +33,50 @@ } -ops_schema = Schema({ - Optional('weight', default=None): { - Optional('granularity'): And( - list, - lambda s: all(i in ['per_channel', 'per_tensor'] for i in s)), - Optional('scheme'): And( - list, - lambda s: all(i in ['asym', 'sym', 'asym_float'] for i in s)), - Optional('dtype'): And( - list, - lambda s: all(i in ['int', 'int4', 'nf4', 'fp4', 'fp4_e2m1_bnb', 'fp4_e2m1', 'int8', 'uint8', - 'fp32', 'bf16', 'fp16'] for i in s)), - Optional('algorithm'): And( - list, # TODO: allow AWQ+GPTQ algo - lambda s: all(i in ['minmax', 'RTN', 'AWQ', 'GPTQ', 'TEQ'] for i in s)), - Optional('bits'): And( - list, - lambda s: all(0 < i <= 8 and type(i)==int for i in s)), - Optional('group_size'): And( - list, - lambda s: all(i >= -1 and i != 0 and type(i)==int for i in s)), - }, - Optional('activation', default=None): { - Optional('granularity'): And( - list, - lambda s: all(i in ['per_channel', 'per_tensor'] for i in s)), - Optional('scheme'): And( - list, - lambda s: all(i in ['asym', 'sym'] for i in s)), - Optional('dtype'): And( - list, - lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16', 'None'] for i in s)), - Optional('algorithm'): And( - list, - lambda s: all(i in ['minmax', 'kl', 'placeholder', 'percentile'] for i in s)) +ops_schema = Schema( + { + Optional("weight", default=None): { + Optional("granularity"): And(list, lambda s: all(i in ["per_channel", "per_tensor"] for i in s)), + Optional("scheme"): And(list, lambda s: all(i in ["asym", "sym", "asym_float"] for i in s)), + Optional("dtype"): And( + list, + lambda s: all( + i + in [ + "int", + "int4", + "nf4", + "fp4", + "fp4_e2m1_bnb", + "fp4_e2m1", + "int8", + "uint8", + "fp32", + "bf16", + "fp16", + ] + for i in s + ), + ), + Optional("algorithm"): And( + list, # TODO: allow AWQ+GPTQ algo + lambda s: all(i in ["minmax", "RTN", "AWQ", "GPTQ", "TEQ"] for i in s), + ), + Optional("bits"): And(list, lambda s: all(0 < i <= 8 and type(i) == int for i in s)), + Optional("group_size"): And(list, lambda s: all(i >= -1 and i != 0 and type(i) == int for i in s)), + }, + Optional("activation", default=None): { + Optional("granularity"): And(list, lambda s: all(i in ["per_channel", "per_tensor"] for i in s)), + Optional("scheme"): And(list, lambda s: all(i in ["asym", "sym"] for i in s)), + Optional("dtype"): And( + list, lambda s: all(i in ["int8", "uint8", "fp32", "bf16", "fp16", "None"] for i in s) + ), + Optional("algorithm"): And( + list, lambda s: all(i in ["minmax", "kl", "placeholder", "percentile"] for i in s) + ), + }, } -}) +) def _check_value(name, src, supported_type, supported_value=[]): @@ -83,20 +91,21 @@ def datatype(self, datatype): self._datatype = datatype """ if isinstance(src, list) and any([not isinstance(i, supported_type) for i in src]): - assert False, ("Type of {} items should be {} but not {}".format( - name, str(supported_type), [type(i) for i in src])) + assert False, "Type of {} items should be {} but not {}".format( + name, str(supported_type), [type(i) for i in src] + ) elif not isinstance(src, list) and not isinstance(src, supported_type): - assert False, ("Type of {} should be {} but not {}".format( - name, str(supported_type), type(src))) + assert False, "Type of {} should be {} but not {}".format(name, str(supported_type), type(src)) if len(supported_value) > 0: if isinstance(src, str) and src not in supported_value: - assert False, ("{} is not in supported {}: {}. Skip setting it.".format( - src, name, str(supported_value))) - elif isinstance(src, list) and all([isinstance(i, str) for i in src]) and \ - any([i not in supported_value for i in src]): - assert False, ("{} is not in supported {}: {}. Skip setting it.".format( - src, name, str(supported_value))) + assert False, "{} is not in supported {}: {}. Skip setting it.".format(src, name, str(supported_value)) + elif ( + isinstance(src, list) + and all([isinstance(i, str) for i in src]) + and any([i not in supported_value for i in src]) + ): + assert False, "{} is not in supported {}: {}. Skip setting it.".format(src, name, str(supported_value)) return True @@ -126,7 +135,6 @@ class DotDict(dict): Args: value (dict): The dict object to access. - """ def __init__(self, value=None): @@ -137,7 +145,7 @@ def __init__(self, value=None): for key in value: self.__setitem__(key, value[key]) else: - raise TypeError('expected dict') + raise TypeError("expected dict") def __getitem__(self, key): """Get the key.""" @@ -148,11 +156,9 @@ def __setitem__(self, key, value): """Set the value to the key.""" if isinstance(value, dict) and not isinstance(value, DotDict): value = DotDict(value) - if isinstance(value, list) and len(value) == 1 and isinstance( - value[0], dict): + if isinstance(value, list) and len(value) == 1 and isinstance(value[0], dict): value = DotDict(value[0]) - if isinstance(value, list) and len(value) > 1 and all(isinstance( - v, dict) for v in value): + if isinstance(value, list) and len(value) > 1 and all(isinstance(v, dict) for v in value): value = DotDict({k: v for d in value for k, v in d.items()}) super(DotDict, self).__setitem__(key, value) @@ -198,10 +204,9 @@ class Options: set_workspace("workspace_path") set_resume_from("workspace_path") set_tensorboard(True) - """ - def __init__(self, random_seed=1978, workspace=default_workspace, - resume_from=None, tensorboard=False): + + def __init__(self, random_seed=1978, workspace=default_workspace, resume_from=None, tensorboard=False): """Init an Option object.""" self.random_seed = random_seed self.workspace = workspace @@ -216,7 +221,7 @@ def random_seed(self): @random_seed.setter def random_seed(self, random_seed): """Set random seed.""" - if _check_value('random_seed', random_seed, int): + if _check_value("random_seed", random_seed, int): self._random_seed = random_seed @property @@ -227,7 +232,7 @@ def workspace(self): @workspace.setter def workspace(self, workspace): """Set workspace.""" - if _check_value('workspace', workspace, str): + if _check_value("workspace", workspace, str): self._workspace = workspace @property @@ -238,7 +243,7 @@ def resume_from(self): @resume_from.setter def resume_from(self, resume_from): """Set resume_from.""" - if resume_from is None or _check_value('resume_from', resume_from, str): + if resume_from is None or _check_value("resume_from", resume_from, str): self._resume_from = resume_from @property @@ -249,7 +254,7 @@ def tensorboard(self): @tensorboard.setter def tensorboard(self, tensorboard): """Set tensorboard.""" - if _check_value('tensorboard', tensorboard, bool): + if _check_value("tensorboard", tensorboard, bool): self._tensorboard = tensorboard @@ -282,19 +287,22 @@ class BenchmarkConfig: conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader) """ - def __init__(self, - inputs=[], - outputs=[], - backend="default", - device="cpu", - warmup=5, - iteration=-1, - model_name="", - cores_per_instance=None, - num_of_instance=1, - inter_num_of_threads=None, - intra_num_of_threads=None, - diagnosis=False): + + def __init__( + self, + inputs=[], + outputs=[], + backend="default", + device="cpu", + warmup=5, + iteration=-1, + model_name="", + cores_per_instance=None, + num_of_instance=1, + inter_num_of_threads=None, + intra_num_of_threads=None, + diagnosis=False, + ): """Init a BenchmarkConfig object.""" self.inputs = inputs self.outputs = outputs @@ -312,9 +320,20 @@ def __init__(self, def keys(self): """Returns keys of the dict.""" - return ('inputs', 'outputs', 'backend', 'device', 'warmup', 'iteration', - 'model_name', 'cores_per_instance', 'num_of_instance', 'framework', - 'inter_num_of_threads', 'intra_num_of_threads') + return ( + "inputs", + "outputs", + "backend", + "device", + "warmup", + "iteration", + "model_name", + "cores_per_instance", + "num_of_instance", + "framework", + "inter_num_of_threads", + "intra_num_of_threads", + ) def __getitem__(self, item): """Get the dict.""" @@ -328,8 +347,12 @@ def backend(self): @backend.setter def backend(self, backend): """Set backend.""" - if _check_value('backend', backend, str, [ - 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']): + if _check_value( + "backend", + backend, + str, + ["default", "itex", "ipex", "onnxrt_trt_ep", "onnxrt_cuda_ep", "onnxrt_dnnl_ep", "onnxrt_dml_ep"], + ): self._backend = backend @property @@ -339,7 +362,7 @@ def device(self): @device.setter def device(self, device): - if _check_value('device', device, str, ['cpu', 'gpu']): + if _check_value("device", device, str, ["cpu", "gpu"]): self._device = device @property @@ -350,7 +373,7 @@ def outputs(self): @outputs.setter def outputs(self, outputs): """Set outputs.""" - if _check_value('outputs', outputs, str): + if _check_value("outputs", outputs, str): self._outputs = outputs @property @@ -361,7 +384,7 @@ def inputs(self): @inputs.setter def inputs(self, inputs): """Set inputs.""" - if _check_value('inputs', inputs, str): + if _check_value("inputs", inputs, str): self._inputs = inputs @property @@ -372,7 +395,7 @@ def warmup(self): @warmup.setter def warmup(self, warmup): """Set warmup.""" - if _check_value('warmup', warmup, int): + if _check_value("warmup", warmup, int): self._warmup = warmup @property @@ -383,7 +406,7 @@ def iteration(self): @iteration.setter def iteration(self, iteration): """Set iteration.""" - if _check_value('iteration', iteration, int): + if _check_value("iteration", iteration, int): self._iteration = iteration @property @@ -394,8 +417,7 @@ def cores_per_instance(self): @cores_per_instance.setter def cores_per_instance(self, cores_per_instance): """Set cores_per_instance.""" - if cores_per_instance is None or _check_value('cores_per_instance', cores_per_instance, - int): + if cores_per_instance is None or _check_value("cores_per_instance", cores_per_instance, int): self._cores_per_instance = cores_per_instance @property @@ -406,7 +428,7 @@ def num_of_instance(self): @num_of_instance.setter def num_of_instance(self, num_of_instance): """Set num_of_instance.""" - if _check_value('num_of_instance', num_of_instance, int): + if _check_value("num_of_instance", num_of_instance, int): self._num_of_instance = num_of_instance @property @@ -417,8 +439,7 @@ def inter_num_of_threads(self): @inter_num_of_threads.setter def inter_num_of_threads(self, inter_num_of_threads): """Set inter_num_of_threads.""" - if inter_num_of_threads is None or _check_value('inter_num_of_threads', - inter_num_of_threads, int): + if inter_num_of_threads is None or _check_value("inter_num_of_threads", inter_num_of_threads, int): self._inter_num_of_threads = inter_num_of_threads @property @@ -429,8 +450,7 @@ def intra_num_of_threads(self): @intra_num_of_threads.setter def intra_num_of_threads(self, intra_num_of_threads): """Get intra_num_of_threads.""" - if intra_num_of_threads is None or _check_value('intra_num_of_threads', - intra_num_of_threads, int): + if intra_num_of_threads is None or _check_value("intra_num_of_threads", intra_num_of_threads, int): self._intra_num_of_threads = intra_num_of_threads @property @@ -441,7 +461,7 @@ def diagnosis(self): @diagnosis.setter def diagnosis(self, diagnosis): """Set diagnosis property.""" - if _check_value('diagnosis', diagnosis, bool): + if _check_value("diagnosis", diagnosis, bool): self._diagnosis = diagnosis @property @@ -487,7 +507,8 @@ class AccuracyCriterion: tolerable_loss=0.01, # optional. ) """ - def __init__(self, higher_is_better=True, criterion='relative', tolerable_loss=0.01): + + def __init__(self, higher_is_better=True, criterion="relative", tolerable_loss=0.01): """Init an AccuracyCriterion object.""" self.higher_is_better = higher_is_better self.criterion = criterion @@ -501,33 +522,33 @@ def higher_is_better(self): @higher_is_better.setter def higher_is_better(self, higher_is_better): """Set higher_is_better.""" - if _check_value('higher_is_better', higher_is_better, bool): + if _check_value("higher_is_better", higher_is_better, bool): self._higher_is_better = higher_is_better @property def relative(self): """Get tolerable_loss when criterion is relative.""" - if self.criterion != 'relative': + if self.criterion != "relative": return None return self.tolerable_loss @relative.setter def relative(self, relative): """Set tolerable_loss and criterion to relative.""" - self.criterion = 'relative' + self.criterion = "relative" self.tolerable_loss = relative @property def absolute(self): """Get tolerable_loss when criterion is absolute.""" - if self.criterion != 'absolute': + if self.criterion != "absolute": return None return self.tolerable_loss @absolute.setter def absolute(self, absolute): """Set tolerable_loss and criterion to absolute.""" - self.criterion = 'absolute' + self.criterion = "absolute" self.tolerable_loss = absolute @property @@ -538,7 +559,7 @@ def criterion(self): @criterion.setter def criterion(self, criterion): """Set criterion.""" - if _check_value('criterion', criterion, str, ['relative', 'absolute']): + if _check_value("criterion", criterion, str, ["relative", "absolute"]): self._criterion = criterion @property @@ -549,7 +570,7 @@ def tolerable_loss(self): @tolerable_loss.setter def tolerable_loss(self, tolerable_loss): """Set tolerable_loss.""" - if _check_value('tolerable_loss', tolerable_loss, float): + if _check_value("tolerable_loss", tolerable_loss, float): self._tolerable_loss = tolerable_loss def __str__(self): @@ -558,7 +579,7 @@ def __str__(self): def keys(self): """Returns keys of the dict.""" - return ('higher_is_better', 'criterion', 'tolerable_loss') + return ("higher_is_better", "criterion", "tolerable_loss") def __getitem__(self, item): """Get the dict.""" @@ -590,8 +611,8 @@ class TuningCriterion: strategy_kwargs=None, ) """ - def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, - max_trials=100, objective="performance"): + + def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, max_trials=100, objective="performance"): """Init a TuningCriterion object.""" self.strategy = strategy self.timeout = timeout @@ -607,7 +628,7 @@ def max_trials(self): @max_trials.setter def max_trials(self, max_trials): """Set max_trials.""" - if _check_value('max_trials', max_trials, int): + if _check_value("max_trials", max_trials, int): self._max_trials = max_trials @property @@ -618,7 +639,7 @@ def timeout(self): @timeout.setter def timeout(self, timeout): """Set timeout.""" - if _check_value('timeout', timeout, int): + if _check_value("timeout", timeout, int): self._timeout = timeout @property @@ -632,7 +653,7 @@ def objective(self, objective): Args: objective: objective name or list of objective names - + Examples: objective = "performance" objective = ["performance"] @@ -644,22 +665,21 @@ def objective(self, objective): """ if isinstance(objective, list): for val in objective: - assert _check_value('objective', val, str, ['performance', 'accuracy', 'modelsize', 'footprint']) + assert _check_value("objective", val, str, ["performance", "accuracy", "modelsize", "footprint"]) self._objective = objective return - if _check_value('objective', objective, str, - ['performance', 'accuracy', 'modelsize', 'footprint']): + if _check_value("objective", objective, str, ["performance", "accuracy", "modelsize", "footprint"]): self._objective = [objective] return - if _check_value('objective', objective, dict): - if 'weight' in objective.keys() and isinstance(objective['weight'], list): - assert len(objective['objective']) == len(objective['weight']) + if _check_value("objective", objective, dict): + if "weight" in objective.keys() and isinstance(objective["weight"], list): + assert len(objective["objective"]) == len(objective["weight"]) for k, v in objective.items(): - _check_value('objective', k, str, ['objective', 'weight', 'higher_is_better']) - if k == 'objective': - _check_value('objective', v, str, ['performance', 'accuracy', 'modelsize', 'footprint']) + _check_value("objective", k, str, ["objective", "weight", "higher_is_better"]) + if k == "objective": + _check_value("objective", v, str, ["performance", "accuracy", "modelsize", "footprint"]) self._objective = objective @property @@ -670,8 +690,12 @@ def strategy(self): @strategy.setter def strategy(self, strategy): """Set strategy.""" - if _check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe', 'mse_v2', 'hawq_v2']): + if _check_value( + "strategy", + strategy, + str, + ["basic", "mse", "bayesian", "random", "exhaustive", "sigopt", "tpe", "mse_v2", "hawq_v2"], + ): self._strategy = strategy @property @@ -756,25 +780,28 @@ class _BaseQuantizationConfig: strategy, auto (default) is the combination of 0 and 1. accuracy_criterion: Accuracy constraint settings. """ - def __init__(self, - inputs=[], - outputs=[], - backend="default", - domain="auto", - model_name="", - recipes={}, - quant_format="default", - device="cpu", - calibration_sampling_size=[100], - example_inputs=None, - op_type_dict=None, - op_name_dict=None, - reduce_range=None, - excluded_precisions=[], - quant_level="auto", - accuracy_criterion=accuracy_criterion, - tuning_criterion=tuning_criterion, - diagnosis=False): + + def __init__( + self, + inputs=[], + outputs=[], + backend="default", + domain="auto", + model_name="", + recipes={}, + quant_format="default", + device="cpu", + calibration_sampling_size=[100], + example_inputs=None, + op_type_dict=None, + op_name_dict=None, + reduce_range=None, + excluded_precisions=[], + quant_level="auto", + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, + diagnosis=False, + ): """Initialize _BaseQuantizationConfig class.""" self.inputs = inputs self.outputs = outputs @@ -805,8 +832,7 @@ def domain(self): @domain.setter def domain(self, domain): """Set domain.""" - if _check_value("domain", domain, str, - ["auto", "cv", "object_detection", "nlp", "recommendation_system"]): + if _check_value("domain", domain, str, ["auto", "cv", "object_detection", "nlp", "recommendation_system"]): self._domain = domain @property @@ -856,8 +882,9 @@ def smooth_quant_args(val=None): else: logger.warning("Ignore the alpha as it's not a list, int or float.") if isinstance(val[k], list): - assert all([vv >= 0.0 and vv <=1.0 for vv in val[k]]), \ - "The candidate value of smooth quantization alpha should be between 0 and 1." + assert all( + [vv >= 0.0 and vv <= 1.0 for vv in val[k]] + ), "The candidate value of smooth quantization alpha should be between 0 and 1." return True else: @@ -919,8 +946,12 @@ def gemm_to_matmul(val=None): def graph_optimization_level(val=None): if val is not None: - return _check_value("graph_optimization_level", val, str, - ["DISABLE_ALL", "ENABLE_BASIC", "ENABLE_EXTENDED", "ENABLE_ALL"]) + return _check_value( + "graph_optimization_level", + val, + str, + ["DISABLE_ALL", "ENABLE_BASIC", "ENABLE_EXTENDED", "ENABLE_ALL"], + ) else: return None @@ -960,25 +991,26 @@ def dedicated_qdq_pair(val=None): else: return False - RECIPES = {"smooth_quant": smooth_quant, - "smooth_quant_args": smooth_quant_args, - "layer_wise_quant": layer_wise_quant, - "layer_wise_quant_args": layer_wise_quant_args, - "fast_bias_correction": fast_bias_correction, - "weight_correction": weight_correction, - "gemm_to_matmul": gemm_to_matmul, - "graph_optimization_level": graph_optimization_level, - "first_conv_or_matmul_quantization": first_conv_or_matmul_quantization, - "last_conv_or_matmul_quantization": last_conv_or_matmul_quantization, - "pre_post_process_quantization": pre_post_process_quantization, - "add_qdq_pair_to_weight": add_qdq_pair_to_weight, - "optypes_to_exclude_output_quant": optypes_to_exclude_output_quant, - "dedicated_qdq_pair": dedicated_qdq_pair, - "rtn_args": rtn_args, - "awq_args": awq_args, - "gptq_args": gptq_args, - "teq_args": teq_args, - } + RECIPES = { + "smooth_quant": smooth_quant, + "smooth_quant_args": smooth_quant_args, + "layer_wise_quant": layer_wise_quant, + "layer_wise_quant_args": layer_wise_quant_args, + "fast_bias_correction": fast_bias_correction, + "weight_correction": weight_correction, + "gemm_to_matmul": gemm_to_matmul, + "graph_optimization_level": graph_optimization_level, + "first_conv_or_matmul_quantization": first_conv_or_matmul_quantization, + "last_conv_or_matmul_quantization": last_conv_or_matmul_quantization, + "pre_post_process_quantization": pre_post_process_quantization, + "add_qdq_pair_to_weight": add_qdq_pair_to_weight, + "optypes_to_exclude_output_quant": optypes_to_exclude_output_quant, + "dedicated_qdq_pair": dedicated_qdq_pair, + "rtn_args": rtn_args, + "awq_args": awq_args, + "gptq_args": gptq_args, + "teq_args": teq_args, + } self._recipes = {} for k in RECIPES.keys(): if k in recipes and RECIPES[k](recipes[k]): @@ -1030,7 +1062,7 @@ def reduce_range(self): @reduce_range.setter def reduce_range(self, reduce_range): - if reduce_range is None or _check_value('reduce_range', reduce_range, bool): + if reduce_range is None or _check_value("reduce_range", reduce_range, bool): self._reduce_range = reduce_range @property @@ -1047,8 +1079,7 @@ def op_name_dict(self, op_name_dict): ops_schema.validate(v) self._op_name_dict = op_name_dict else: - assert False, ("Type of op_name_dict should be dict but not {}, ".format( - type(op_name_dict))) + assert False, "Type of op_name_dict should be dict but not {}, ".format(type(op_name_dict)) @property def op_type_dict(self): @@ -1064,8 +1095,7 @@ def op_type_dict(self, op_type_dict): ops_schema.validate(v) self._op_type_dict = op_type_dict else: - assert False, ("Type of op_type_dict should be dict but not {}".format( - type(op_type_dict))) + assert False, "Type of op_type_dict should be dict but not {}".format(type(op_type_dict)) @property def calibration_sampling_size(self): @@ -1073,7 +1103,7 @@ def calibration_sampling_size(self): @calibration_sampling_size.setter def calibration_sampling_size(self, sampling_size): - if _check_value('calibration_sampling_size', sampling_size, int): + if _check_value("calibration_sampling_size", sampling_size, int): if isinstance(sampling_size, int): sampling_size = [sampling_size] self._calibration_sampling_size = sampling_size @@ -1084,7 +1114,7 @@ def device(self): @device.setter def device(self, device): - if _check_value('device', device, str, ['cpu', 'gpu']): + if _check_value("device", device, str, ["cpu", "gpu"]): self._device = device @property @@ -1093,8 +1123,7 @@ def quant_format(self): @quant_format.setter def quant_format(self, quant_format): - if _check_value('quant_format', quant_format, str, - ['default', 'QDQ', 'QOperator']): + if _check_value("quant_format", quant_format, str, ["default", "QDQ", "QOperator"]): self._quant_format = quant_format @property @@ -1103,8 +1132,12 @@ def backend(self): @backend.setter def backend(self, backend): - if _check_value('backend', backend, str, [ - 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']): + if _check_value( + "backend", + backend, + str, + ["default", "itex", "ipex", "onnxrt_trt_ep", "onnxrt_cuda_ep", "onnxrt_dnnl_ep", "onnxrt_dml_ep"], + ): self._backend = backend @property @@ -1113,7 +1146,7 @@ def outputs(self): @outputs.setter def outputs(self, outputs): - if _check_value('outputs', outputs, str): + if _check_value("outputs", outputs, str): self._outputs = outputs @property @@ -1122,7 +1155,7 @@ def inputs(self): @inputs.setter def inputs(self, inputs): - if _check_value('inputs', inputs, str): + if _check_value("inputs", inputs, str): self._inputs = inputs @property @@ -1174,7 +1207,7 @@ class PostTrainingQuantConfig(_BaseQuantizationConfig): quant_format: Support 'default', 'QDQ' and 'QOperator', only required in ONNXRuntime. inputs: Inputs of model, only required in tensorflow. outputs: Outputs of model, only required in tensorflow. - approach: Post-Training Quantization method. Neural compressor support 'static', 'dynamic', + approach: Post-Training Quantization method. Neural compressor support 'static', 'dynamic', 'weight_only' and 'auto' method. Default value is 'static'. For strategy 'basic', 'auto' method means neural compressor will quantize all OPs support PTQ static @@ -1239,43 +1272,48 @@ class PostTrainingQuantConfig(_BaseQuantizationConfig): ), ) """ - def __init__(self, - device="cpu", - backend="default", - domain="auto", - recipes={}, - quant_format="default", - inputs=[], - outputs=[], - approach="static", - calibration_sampling_size=[100], - op_type_dict=None, - op_name_dict=None, - reduce_range=None, - example_inputs=None, - excluded_precisions=[], - quant_level="auto", - accuracy_criterion=accuracy_criterion, - tuning_criterion=tuning_criterion, - diagnosis=False): + + def __init__( + self, + device="cpu", + backend="default", + domain="auto", + recipes={}, + quant_format="default", + inputs=[], + outputs=[], + approach="static", + calibration_sampling_size=[100], + op_type_dict=None, + op_name_dict=None, + reduce_range=None, + example_inputs=None, + excluded_precisions=[], + quant_level="auto", + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, + diagnosis=False, + ): """Init a PostTrainingQuantConfig object.""" - super().__init__(inputs=inputs, - outputs=outputs, - device=device, - backend=backend, - domain=domain, - recipes=recipes, - quant_format=quant_format, - calibration_sampling_size=calibration_sampling_size, - op_type_dict=op_type_dict, - op_name_dict=op_name_dict, - reduce_range=reduce_range, - example_inputs=example_inputs, - excluded_precisions=excluded_precisions, - quant_level=quant_level, - accuracy_criterion=accuracy_criterion, - tuning_criterion=tuning_criterion, - diagnosis=diagnosis) + super().__init__( + inputs=inputs, + outputs=outputs, + device=device, + backend=backend, + domain=domain, + recipes=recipes, + quant_format=quant_format, + calibration_sampling_size=calibration_sampling_size, + op_type_dict=op_type_dict, + op_name_dict=op_name_dict, + reduce_range=reduce_range, + example_inputs=example_inputs, + excluded_precisions=excluded_precisions, + quant_level=quant_level, + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, + diagnosis=diagnosis, + ) self.approach = approach self.diagnosis = diagnosis @@ -1287,10 +1325,10 @@ def approach(self): @approach.setter def approach(self, approach): """Set approach.""" - if 'static' in approach: - approach = 'static' - if 'dynamic' in approach: - approach = 'dynamic' + if "static" in approach: + approach = "static" + if "dynamic" in approach: + approach = "dynamic" if _check_value("approach", approach, str, ["static", "dynamic", "auto", "weight_only"]): self._approach = QUANTMAPPING[approach] @@ -1302,7 +1340,7 @@ def diagnosis(self): @diagnosis.setter def diagnosis(self, diagnosis): """Set diagnosis.""" - if _check_value('diagnosis', diagnosis, bool): + if _check_value("diagnosis", diagnosis, bool): self._diagnosis = diagnosis @@ -1369,35 +1407,40 @@ class QuantizationAwareTrainingConfig(_BaseQuantizationConfig): ) compression_manager = prepare_compression(model, conf) """ - def __init__(self, - device="cpu", - backend="default", - inputs=[], - outputs=[], - op_type_dict=None, - op_name_dict=None, - reduce_range=None, - model_name="", - quant_format="default", - excluded_precisions=[], - quant_level="auto", - accuracy_criterion=accuracy_criterion, - tuning_criterion=tuning_criterion): + + def __init__( + self, + device="cpu", + backend="default", + inputs=[], + outputs=[], + op_type_dict=None, + op_name_dict=None, + reduce_range=None, + model_name="", + quant_format="default", + excluded_precisions=[], + quant_level="auto", + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, + ): """Init a QuantizationAwareTrainingConfig object.""" - super().__init__(inputs=inputs, - outputs=outputs, - device=device, - backend=backend, - op_type_dict=op_type_dict, - op_name_dict=op_name_dict, - reduce_range=reduce_range, - model_name=model_name, - quant_format=quant_format, - excluded_precisions=excluded_precisions, - quant_level=quant_level, - accuracy_criterion=accuracy_criterion, - tuning_criterion=tuning_criterion) - self._approach = 'quant_aware_training' + super().__init__( + inputs=inputs, + outputs=outputs, + device=device, + backend=backend, + op_type_dict=op_type_dict, + op_name_dict=op_name_dict, + reduce_range=reduce_range, + model_name=model_name, + quant_format=quant_format, + excluded_precisions=excluded_precisions, + quant_level=quant_level, + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, + ) + self._approach = "quant_aware_training" self._framework = None @property @@ -1487,33 +1530,47 @@ class WeightPruningConfig: prune.model = self.model """ - def __init__(self, pruning_configs=[{}], ##empty dict will use global values - target_sparsity=0.9, pruning_type="snip_momentum", pattern="4x1", op_names=[], - excluded_op_names=[], backend=None, - start_step=0, end_step=0, pruning_scope="global", pruning_frequency=1, - min_sparsity_ratio_per_op=0.0, max_sparsity_ratio_per_op=0.98, - sparsity_decay_type="exp", pruning_op_types=['Conv', 'Linear'], - low_memory_usage=False, - **kwargs): + def __init__( + self, + pruning_configs=[{}], ##empty dict will use global values + target_sparsity=0.9, + pruning_type="snip_momentum", + pattern="4x1", + op_names=[], + excluded_op_names=[], + backend=None, + start_step=0, + end_step=0, + pruning_scope="global", + pruning_frequency=1, + min_sparsity_ratio_per_op=0.0, + max_sparsity_ratio_per_op=0.98, + sparsity_decay_type="exp", + pruning_op_types=["Conv", "Linear"], + low_memory_usage=False, + **kwargs, + ): """Init a WeightPruningConfig object.""" self.backend = backend self.pruning_configs = pruning_configs - self._weight_compression = DotDict({ - 'target_sparsity': target_sparsity, - 'pruning_type': pruning_type, - 'pattern': pattern, - 'op_names': op_names, - 'excluded_op_names': excluded_op_names, ##global only - 'start_step': start_step, - 'end_step': end_step, - 'pruning_scope': pruning_scope, - 'pruning_frequency': pruning_frequency, - 'min_sparsity_ratio_per_op': min_sparsity_ratio_per_op, - 'max_sparsity_ratio_per_op': max_sparsity_ratio_per_op, - 'sparsity_decay_type': sparsity_decay_type, - 'pruning_op_types': pruning_op_types, - 'low_memory_usage': low_memory_usage - }) + self._weight_compression = DotDict( + { + "target_sparsity": target_sparsity, + "pruning_type": pruning_type, + "pattern": pattern, + "op_names": op_names, + "excluded_op_names": excluded_op_names, ##global only + "start_step": start_step, + "end_step": end_step, + "pruning_scope": pruning_scope, + "pruning_frequency": pruning_frequency, + "min_sparsity_ratio_per_op": min_sparsity_ratio_per_op, + "max_sparsity_ratio_per_op": max_sparsity_ratio_per_op, + "sparsity_decay_type": sparsity_decay_type, + "pruning_op_types": pruning_op_types, + "low_memory_usage": low_memory_usage, + } + ) self._weight_compression.update(kwargs) @property @@ -1529,22 +1586,18 @@ def weight_compression(self, weight_compression): class HPOConfig: """Config class for hyperparameter optimization. - + Args: search_space (dict): A dictionary for defining the search space. searcher(str): The name of search algorithms, currently support: grid, random, bo and xgb. higher_is_better(bool, optional): This flag indicates whether the metric higher is the better. min_train_sample(int, optional): The min number of samples to start training the search model. seed(int, optional): Random seed. - """ - def __init__(self, - search_space, - searcher='xgb', - higher_is_better=True, - loss_type='reg', - min_train_samples=10, - seed=42): + + def __init__( + self, search_space, searcher="xgb", higher_is_better=True, loss_type="reg", min_train_samples=10, seed=42 + ): """Init an HPOConfig object.""" self.search_space = search_space self.searcher = searcher @@ -1553,6 +1606,7 @@ def __init__(self, self.min_train_samples = min_train_samples self.seed = seed + class KnowledgeDistillationLossConfig: """Config Class for Knowledge Distillation Loss. @@ -1580,15 +1634,18 @@ class KnowledgeDistillationLossConfig: compression_manager = prepare_compression(model, d_conf) model = compression_manager.model """ - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], loss_weights=[0.5, 0.5]): + + def __init__(self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5]): """Init a KnowledgeDistillationLossConfig object.""" - self.config = DotDict({ - 'KnowledgeDistillationLoss': { - 'temperature': temperature, - 'loss_types': loss_types, - 'loss_weights': loss_weights + self.config = DotDict( + { + "KnowledgeDistillationLoss": { + "temperature": temperature, + "loss_types": loss_types, + "loss_weights": loss_weights, + } } - }) + ) class IntermediateLayersKnowledgeDistillationLossConfig: @@ -1639,16 +1696,19 @@ class IntermediateLayersKnowledgeDistillationLossConfig: compression_manager = prepare_compression(model, d_conf) model = compression_manager.model """ + def __init__(self, layer_mappings=[], loss_types=[], loss_weights=[], add_origin_loss=False): """Init an IntermediateLayersKnowledgeDistillationLossConfig object.""" - self.config = DotDict({ - 'IntermediateLayersKnowledgeDistillationLoss': { - 'layer_mappings': layer_mappings, - 'loss_types': loss_types, - 'loss_weights': loss_weights, - 'add_origin_loss': add_origin_loss + self.config = DotDict( + { + "IntermediateLayersKnowledgeDistillationLoss": { + "layer_mappings": layer_mappings, + "loss_types": loss_types, + "loss_weights": loss_weights, + "add_origin_loss": add_origin_loss, + } } - }) + ) class SelfKnowledgeDistillationLossConfig: @@ -1692,26 +1752,25 @@ class SelfKnowledgeDistillationLossConfig: compression_manager = prepare_compression(model, conf) model = compression_manager.model """ - def __init__(self, - layer_mappings=[], - temperature=1.0, - loss_types=[], - loss_weights=[], - add_origin_loss=False): + + def __init__(self, layer_mappings=[], temperature=1.0, loss_types=[], loss_weights=[], add_origin_loss=False): """Init a SelfKnowledgeDistillationLossConfig object.""" - self.config = DotDict({ - 'SelfKnowledgeDistillationLoss': { - 'layer_mappings': layer_mappings, - 'temperature': temperature, - 'loss_types': loss_types, - 'loss_weights': loss_weights, - 'add_origin_loss': add_origin_loss, + self.config = DotDict( + { + "SelfKnowledgeDistillationLoss": { + "layer_mappings": layer_mappings, + "temperature": temperature, + "loss_types": loss_types, + "loss_weights": loss_weights, + "add_origin_loss": add_origin_loss, + } } - }) + ) criterion = KnowledgeDistillationLossConfig() + class DistillationConfig: """Config of distillation. @@ -1734,12 +1793,8 @@ class DistillationConfig: compression_manager = prepare_compression(model, conf) model = compression_manager.model """ - def __init__(self, - teacher_model=None, - criterion=criterion, - optimizer={'SGD': { - 'learning_rate': 0.0001 - }}): + + def __init__(self, teacher_model=None, criterion=criterion, optimizer={"SGD": {"learning_rate": 0.0001}}): """Init a DistillationConfig object.""" self.criterion = criterion self.optimizer = optimizer @@ -1832,21 +1887,24 @@ class MixedPrecisionConfig(object): conf = MixedPrecisionConfig() converted_model = mix_precision.fit(model, conf=conf) """ + @alias_param("precisions", param_alias="precision") - def __init__(self, - device="cpu", - backend="default", - precisions="bf16", - model_name="", - inputs=[], - outputs=[], - quant_level="auto", - tuning_criterion=tuning_criterion, - accuracy_criterion=accuracy_criterion, - excluded_precisions=[], - op_name_dict={}, - op_type_dict={}, - example_inputs=None): + def __init__( + self, + device="cpu", + backend="default", + precisions="bf16", + model_name="", + inputs=[], + outputs=[], + quant_level="auto", + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion, + excluded_precisions=[], + op_name_dict={}, + op_type_dict={}, + example_inputs=None, + ): """Init a MixedPrecisionConfig object.""" self.inputs = inputs self.outputs = outputs @@ -1876,8 +1934,9 @@ def precisions(self, precision): assert precision in ["fp16", "bf16"], "Only support 'fp16' and 'bf16' for mix precision." self._precisions = [precision] elif isinstance(precision, list): - assert all([i in ["fp16", "bf16"] for i in precision]), "Only " \ - "support 'fp16' and 'bf16' for mix precision." + assert all([i in ["fp16", "bf16"] for i in precision]), ( + "Only " "support 'fp16' and 'bf16' for mix precision." + ) self._precisions = precision @property @@ -1931,7 +1990,7 @@ def device(self): @device.setter def device(self, device): """Set device.""" - if _check_value('device', device, str, ['cpu', 'gpu']): + if _check_value("device", device, str, ["cpu", "gpu"]): self._device = device @property @@ -1942,8 +2001,12 @@ def backend(self): @backend.setter def backend(self, backend): """Set backend.""" - if _check_value('backend', backend, str, [ - 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', 'onnxrt_dnnl_ep', 'onnxrt_dml_ep']): + if _check_value( + "backend", + backend, + str, + ["default", "itex", "ipex", "onnxrt_trt_ep", "onnxrt_cuda_ep", "onnxrt_dnnl_ep", "onnxrt_dml_ep"], + ): self._backend = backend @property @@ -1954,7 +2017,7 @@ def outputs(self): @outputs.setter def outputs(self, outputs): """Set outputs.""" - if _check_value('outputs', outputs, str): + if _check_value("outputs", outputs, str): self._outputs = outputs @property @@ -1965,7 +2028,7 @@ def inputs(self): @inputs.setter def inputs(self, inputs): """Set inputs.""" - if _check_value('inputs', inputs, str): + if _check_value("inputs", inputs, str): self._inputs = inputs @property @@ -2005,8 +2068,7 @@ def op_name_dict(self, op_name_dict): ops_schema.validate(v) self._op_name_dict = op_name_dict else: - assert False, ("Type of op_name_dict should be dict but not {}, ".format( - type(op_name_dict))) + assert False, "Type of op_name_dict should be dict but not {}, ".format(type(op_name_dict)) @property def op_type_dict(self): @@ -2023,8 +2085,7 @@ def op_type_dict(self, op_type_dict): ops_schema.validate(v) self._op_type_dict = op_type_dict else: - assert False, ("Type of op_type_dict should be dict but not {}".format( - type(op_type_dict))) + assert False, "Type of op_type_dict should be dict but not {}".format(type(op_type_dict)) @property def example_inputs(self): @@ -2051,6 +2112,7 @@ class ExportConfig: output_names (list, optional): A list of model output names. Defaults to None. dynamic_axes (dict, optional): A dictionary of dynamic axes information. Defaults to None. """ + def __init__( self, dtype="int8", @@ -2143,6 +2205,7 @@ def dynamic_axes(self, dynamic_axes): class ONNXQlinear2QDQConfig: """Config Class for ONNXQlinear2QDQ.""" + def __init__(self): """Init an ONNXQlinear2QDQConfig object.""" pass @@ -2180,16 +2243,17 @@ class Torch2ONNXConfig(ExportConfig): ) q_model.export('int8-model.onnx', int8_onnx_config) """ + def __init__( - self, - dtype="int8", - opset_version=14, - quant_format="QDQ", - example_inputs=None, - input_names=None, - output_names=None, - dynamic_axes=None, - **kwargs, + self, + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=None, + input_names=None, + output_names=None, + dynamic_axes=None, + **kwargs, ): """Init a Torch2ONNXConfig object.""" super().__init__( @@ -2227,16 +2291,17 @@ class TF2ONNXConfig(ExportConfig): config = TF2ONNXConfig() q_model.export(output_graph, config) """ + def __init__( - self, - dtype="int8", - opset_version=14, - quant_format="QDQ", - example_inputs=None, - input_names=None, - output_names=None, - dynamic_axes=None, - **kwargs, + self, + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=None, + input_names=None, + output_names=None, + dynamic_axes=None, + **kwargs, ): """Init a TF2ONNXConfig object.""" super().__init__( @@ -2253,20 +2318,32 @@ def __init__( class NASConfig: """Config class for NAS approaches.""" - def __init__(self, approach=None, search_space=None, search_algorithm=None, - metrics=[], higher_is_better=[], max_trials=3, seed=42, dynas=None): + + def __init__( + self, + approach=None, + search_space=None, + search_algorithm=None, + metrics=[], + higher_is_better=[], + max_trials=3, + seed=42, + dynas=None, + ): """Init a NASConfig object.""" self._approach = approach - self._search = DotDict({ - 'search_space': search_space, - 'search_algorithm': search_algorithm, - 'metrics': metrics, - 'higher_is_better': higher_is_better, - 'max_trials': max_trials, - 'seed': seed - }) + self._search = DotDict( + { + "search_space": search_space, + "search_algorithm": search_algorithm, + "metrics": metrics, + "higher_is_better": higher_is_better, + "max_trials": max_trials, + "seed": seed, + } + ) self.dynas = None - if approach == 'dynas' and dynas: + if approach == "dynas" and dynas: self.dynas = dynas.config @property @@ -2292,6 +2369,7 @@ def search(self, search): class MXNet: """Base config class for MXNet.""" + def __init__(self, precisions=None): """Init an MXNet object.""" self._precisions = precisions @@ -2307,12 +2385,13 @@ def precisions(self, precisions): if not isinstance(precisions, list): precisions = [precisions] for pr in precisions: - _check_value('precisions', pr, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']) + _check_value("precisions", pr, str, ["int8", "uint8", "fp32", "bf16", "fp16"]) self._precisions = precisions class ONNX(MXNet): """Config class for ONNX.""" + def __init__(self, graph_optimization_level=None, precisions=None): """Init an ONNX object.""" super().__init__(precisions) @@ -2326,13 +2405,18 @@ def graph_optimization_level(self): @graph_optimization_level.setter def graph_optimization_level(self, graph_optimization_level): """Set graph optimization level.""" - if _check_value('graph_optimization_level', graph_optimization_level, str, - ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL']): + if _check_value( + "graph_optimization_level", + graph_optimization_level, + str, + ["DISABLE_ALL", "ENABLE_BASIC", "ENABLE_EXTENDED", "ENABLE_ALL"], + ): self._graph_optimization_level = graph_optimization_level class TensorFlow(MXNet): """Config class for TensorFlow.""" + def __init__(self, precisions=None): """Init a TensorFlow object.""" super().__init__(precisions) @@ -2340,6 +2424,7 @@ def __init__(self, precisions=None): class Keras(MXNet): """Config class for Keras.""" + def __init__(self, precisions=None): """Init a Keras object.""" super().__init__(precisions) @@ -2347,6 +2432,7 @@ def __init__(self, precisions=None): class PyTorch(MXNet): """Config class for PyTorch.""" + def __init__(self, precisions=None): """Init a PyTorch object.""" super().__init__(precisions) @@ -2368,19 +2454,22 @@ def __init__(self, precisions=None): class _Config: """Main config class.""" - def __init__(self, - quantization=quantization, - benchmark=benchmark, - mixed_precision=mixed_precision, - pruning=pruning, - distillation=distillation, - nas=nas, - onnxruntime=onnxruntime_config, - tensorflow=tensorflow_config, - pytorch=pytorch_config, - mxnet=mxnet_config, - keras=keras_config, - diagnosis=None): + + def __init__( + self, + quantization=quantization, + benchmark=benchmark, + mixed_precision=mixed_precision, + pruning=pruning, + distillation=distillation, + nas=nas, + onnxruntime=onnxruntime_config, + tensorflow=tensorflow_config, + pytorch=pytorch_config, + mxnet=mxnet_config, + keras=keras_config, + diagnosis=None, + ): """Init a config object.""" self._quantization = quantization self._benchmark = benchmark @@ -2395,8 +2484,7 @@ def __init__(self, self._keras = keras if diagnosis is None: diagnosis = False - if (quantization is not None and quantization.diagnosis) or \ - (benchmark is not None and benchmark.diagnosis): + if (quantization is not None and quantization.diagnosis) or (benchmark is not None and benchmark.diagnosis): diagnosis = True if diagnosis: tuning_criterion.max_trials = 1 @@ -2457,10 +2545,10 @@ def onnxruntime(self): """Get the onnxruntime object.""" return self._onnxruntime - @property def diagnosis(self): """Get the diagnosis value.""" return self._diagnosis + config = _Config() diff --git a/neural_compressor/contrib/__init__.py b/neural_compressor/contrib/__init__.py index c2b506951b7..a3ff38fab97 100644 --- a/neural_compressor/contrib/__init__.py +++ b/neural_compressor/contrib/__init__.py @@ -14,6 +14,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in strategy for multiple framework backends.""" -from .strategy import * \ No newline at end of file +from .strategy import * diff --git a/neural_compressor/contrib/strategy/__init__.py b/neural_compressor/contrib/strategy/__init__.py index b7c3b803bda..18b6ba3bb31 100644 --- a/neural_compressor/contrib/strategy/__init__.py +++ b/neural_compressor/contrib/strategy/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in strategy for multiple framework backends.""" from os.path import dirname, basename, isfile, join import glob @@ -22,6 +21,5 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) - diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py index 336c9adf91c..81b819c2899 100644 --- a/neural_compressor/contrib/strategy/sigopt.py +++ b/neural_compressor/contrib/strategy/sigopt.py @@ -14,17 +14,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The SigOpt Tuning Strategy provides support for the quantization process.""" import copy -from neural_compressor.utils import logger -from neural_compressor.utils.utility import LazyImport -from neural_compressor.strategy.strategy import strategy_registry, TuneStrategy from collections import OrderedDict + +from neural_compressor.strategy.strategy import TuneStrategy, strategy_registry from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport + +sigopt = LazyImport("sigopt") -sigopt = LazyImport('sigopt') @strategy_registry class SigOptTuneStrategy(TuneStrategy): @@ -69,19 +70,20 @@ def eval_func(model): return accuracy dicts (dict, optional): The dict containing resume information. Defaults to None. - """ - def __init__(self, - model, - conf, - q_dataloader=None, - q_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, - q_hooks=None): + def __init__( + self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None, + ): """Initialize the SigOpt tuning strategy if the user specified to use it. Args: @@ -98,16 +100,18 @@ def __init__(self, q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. """ - super().__init__(model=model, - conf=conf, - q_dataloader=q_dataloader, - q_func=q_func, - eval_func=eval_func, - eval_dataloader=eval_dataloader, - eval_metric=eval_metric, - resume=resume, - q_hooks=q_hooks) - logger.info(f"*** Initialize SigOpt tuning") + super().__init__( + model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks, + ) + logger.info("*** Initialize SigOpt tuning") self.config = self._initialize_config(conf) strategy_name = self.config.tuning_criterion.strategy if strategy_name.lower() == "sigopt": @@ -117,38 +121,45 @@ def __init__(self, try: import subprocess import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", "sigopt"]) - import sigopt # pylint: disable=import-error + import sigopt # pylint: disable=import-error except: assert False, "Unable to import sigopt from the local environment." else: pass # SigOpt init strategy_kwargs = self.config.tuning_criterion.strategy_kwargs - client_token = strategy_kwargs.get('sigopt_api_token', None) - self.project_id = strategy_kwargs.get('sigopt_project_id', None) - self.experiment_name = strategy_kwargs.get('sigopt_experiment_name', None) + client_token = strategy_kwargs.get("sigopt_api_token", None) + self.project_id = strategy_kwargs.get("sigopt_project_id", None) + self.experiment_name = strategy_kwargs.get("sigopt_experiment_name", None) try: - assert client_token != None - except(AssertionError): - logger.error("`sigopt_api_token` field in yaml file is required. " \ - "Please refer to details in /docs/sigopt_strategy.md.") + assert client_token is not None + except AssertionError: + logger.error( + "`sigopt_api_token` field in yaml file is required. " + "Please refer to details in /docs/sigopt_strategy.md." + ) exit(0) try: - assert self.project_id != None - logger.warning('Project id is {}, ' \ - 'Please check whether it is created in the sigopt account.'\ - .format(self.project_id)) - except(AssertionError): - logger.error("`sigopt_project_id` field in yaml file is required. " \ - "Please refer to details in /docs/sigopt_strategy.md.") + assert self.project_id is not None + logger.warning( + "Project id is {}, " "Please check whether it is created in the sigopt account.".format(self.project_id) + ) + except AssertionError: + logger.error( + "`sigopt_project_id` field in yaml file is required. " + "Please refer to details in /docs/sigopt_strategy.md." + ) exit(0) - if self.experiment_name == 'nc-tune': - logger.info("Default experiment name `nc-tune` is used, " \ - "Please refer to details in /docs/sigopt_strategy.md " \ - "if user wants to modify it.") + if self.experiment_name == "nc-tune": + logger.info( + "Default experiment name `nc-tune` is used, " + "Please refer to details in /docs/sigopt_strategy.md " + "if user wants to modify it." + ) else: - logger.info("Experiment name is {}.".format(self.experiment_name)) + logger.info("Experiment name is {}.".format(self.experiment_name)) self.conn = sigopt.Connection(client_token) self.experiment = None @@ -156,14 +167,14 @@ def __init__(self, def params_to_tune_configs(self, params): """Get the parameters of the tuning strategy.""" op_tuning_cfg = {} - calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name("calib_sampling_size").options for op_name_type, configs in self.op_configs.items(): if len(configs) == 1: op_tuning_cfg[op_name_type] = configs[0] else: op_tuning_cfg[op_name_type] = configs[min(len(configs) - 1, int(params[op_name_type[0]]))] - calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params['calib_sampling_size']))] - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params["calib_sampling_size"]))] + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size return op_tuning_cfg def next_tune_cfg(self): @@ -172,20 +183,20 @@ def next_tune_cfg(self): suggestion = self.conn.experiments(self.experiment.id).suggestions().create() yield self.params_to_tune_configs(suggestion.assignments) values = [ - dict(name='accuracy', value=self.last_tune_result[0]), - dict(name='latency', value=self.last_tune_result[1]) + dict(name="accuracy", value=self.last_tune_result[0]), + dict(name="latency", value=self.last_tune_result[1]), ] - obs = self.conn.experiments(self.experiment.id).observations().create( - suggestion=suggestion.id, values=values) - logger.debug("`suggestion_id` is {}, `observation_id` is {}.". - format(suggestion.id, obs.id)) + obs = ( + self.conn.experiments(self.experiment.id).observations().create(suggestion=suggestion.id, values=values) + ) + logger.debug("`suggestion_id` is {}, `observation_id` is {}.".format(suggestion.id, obs.id)) self.experiment = self.conn.experiments(self.experiment.id).fetch() def get_acc_target(self, base_acc): """Get the tuning target of the accuracy ceiterion.""" accuracy_criterion_conf = self.config.accuracy_criterion - if accuracy_criterion_conf.criterion == 'relative': - return base_acc * (1. - accuracy_criterion_conf.tolerable_loss) + if accuracy_criterion_conf.criterion == "relative": + return base_acc * (1.0 - accuracy_criterion_conf.tolerable_loss) else: return base_acc - accuracy_criterion_conf.tolerable_loss @@ -196,10 +207,19 @@ def traverse(self): """ self._prepare_tuning() - baseline_msg = '[Accuracy: {:.4f}'.format(self.baseline[0]) + \ - ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ - self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) + ']' \ - if self.baseline else 'n/a' + baseline_msg = ( + "[Accuracy: {:.4f}".format(self.baseline[0]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + if self.baseline + else "n/a" + ) logger.info("FP32 baseline is: {}".format(baseline_msg)) self.experiment = self.create_exp(acc_target=self.get_acc_target(self.baseline[0])) trials_count = 0 @@ -208,15 +228,14 @@ def traverse(self): trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) if tuning_history and trials_count < self.config.tuning_criterion.max_trials: - self.last_tune_result = tuning_history['last_tune_result'] - self.best_tune_result = tuning_history['best_tune_result'] + self.last_tune_result = tuning_history["last_tune_result"] + self.best_tune_result = tuning_history["best_tune_result"] logger.warn("Find evaluated tuning config, skip.") continue logger.debug("Dump current tuning configuration:") logger.debug(tune_cfg) - self.last_qmodel = self.adaptor.quantize( - tune_cfg, self.model, self.calib_dataloader, self.q_func) + self.last_qmodel = self.adaptor.quantize(tune_cfg, self.model, self.calib_dataloader, self.q_func) assert self.last_qmodel # Return the last quantized model as a result. if performance only. if self._not_tuning: @@ -240,16 +259,17 @@ def create_exp(self, acc_target): """Set the config for the experiment.""" params = [] from copy import deepcopy + tuning_space = self.tuning_space initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options # step1. collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ["static", "dynamic", "bf16", "fp16", "fp32"] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -266,22 +286,20 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() for op, configs in self.op_configs.items(): if len(configs) > 1: - params.append(dict(name=op[0], type='int', - bounds=dict(min=0, max=len(configs) - 1))) - params.append(dict(name='calib_sampling_size', type='int', - bounds=dict(min=0, max=len(calib_sampling_size_lst) - 1))) + params.append(dict(name=op[0], type="int", bounds=dict(min=0, max=len(configs) - 1))) + params.append( + dict(name="calib_sampling_size", type="int", bounds=dict(min=0, max=len(calib_sampling_size_lst) - 1)) + ) experiment = self.conn.experiments().create( name=self.experiment_name, parameters=params, metrics=[ - dict(name='accuracy', objective='maximize', strategy='constraint', \ - threshold=acc_target), - dict(name='latency', objective='minimize', strategy='optimize'), + dict(name="accuracy", objective="maximize", strategy="constraint", threshold=acc_target), + dict(name="latency", objective="minimize", strategy="optimize"), ], parallel_bandwidth=1, # Define an Observation Budget for your experiment @@ -289,7 +307,6 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): project=self.project_id, ) - logger.debug("Create experiment at https://app.sigopt.com/experiment/{}". - format(experiment.id)) + logger.debug("Create experiment at https://app.sigopt.com/experiment/{}".format(experiment.id)) return experiment diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index 18bf0a76105..73f5e57610c 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -14,21 +14,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Fefine the tuning strategy that uses tpe search in tuning space.""" import copy import os -from pathlib import Path +from collections import OrderedDict from functools import partial +from pathlib import Path + import numpy as np -from neural_compressor.utils import logger -from neural_compressor.utils.utility import LazyImport -from neural_compressor.strategy.strategy import strategy_registry, TuneStrategy -from collections import OrderedDict + +from neural_compressor.strategy.strategy import TuneStrategy, strategy_registry from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport -hyperopt = LazyImport('hyperopt') +hyperopt = LazyImport("hyperopt") try: import pandas as pd @@ -80,22 +81,25 @@ def eval_func(model): return accuracy dicts (dict, optional): The dict containing resume information. Defaults to None. - """ - def __init__(self, - model, - conf, - q_dataloader=None, - q_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, - q_hooks=None): + + def __init__( + self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None, + ): """Initialize the tpe tuning strategy if the user specified to use it.""" self.config = self._initialize_config(conf) - assert self.config.approach == 'post_training_static_quant', \ - "TPE strategy is only for post training static quantization!" + assert ( + self.config.approach == "post_training_static_quant" + ), "TPE strategy is only for post training static quantization!" """Initialize the tpe tuning strategy if the user specified to use it.""" strategy_name = self.config.tuning_criterion.strategy if strategy_name.lower() == "tpe": @@ -105,8 +109,9 @@ def __init__(self, try: import subprocess import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", "hyperopt"]) - import hyperopt # pylint: disable=import-error + import hyperopt # pylint: disable=import-error except: assert False, "Unable to import hyperopt from the local environment." else: @@ -119,37 +124,26 @@ def __init__(self, if self.config.tuning_criterion.max_trials: self.max_trials = self.config.tuning_criterion.max_trials - self.loss_function_config = { - 'acc_th': 0.01, - 'acc_weight': 1.0, - 'lat_weight': 1.0 - } + self.loss_function_config = {"acc_th": 0.01, "acc_weight": 1.0, "lat_weight": 1.0} accuracy_criterion = self.config.accuracy_criterion - if accuracy_criterion.criterion == 'relative': - self.loss_function_config['acc_th'] = accuracy_criterion.tolerable_loss - - self.tpe_params = { - 'n_initial_point': 10, - 'gamma': 0.3, - 'n_EI_candidates': 100, - 'prior_weight': 1.0 - } - self.best_result = { - 'best_loss': float('inf'), - 'best_acc_loss': float('inf'), - 'best_lat_diff': 0.0 - } + if accuracy_criterion.criterion == "relative": + self.loss_function_config["acc_th"] = accuracy_criterion.tolerable_loss + + self.tpe_params = {"n_initial_point": 10, "gamma": 0.3, "n_EI_candidates": 100, "prior_weight": 1.0} + self.best_result = {"best_loss": float("inf"), "best_acc_loss": float("inf"), "best_lat_diff": 0.0} self._algo = None - super().__init__(model=model, - conf=conf, - q_dataloader=q_dataloader, - q_func=q_func, - eval_func=eval_func, - eval_dataloader=eval_dataloader, - eval_metric=eval_metric, - resume=resume, - q_hooks=q_hooks) + super().__init__( + model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks, + ) def __getstate__(self): """Magic method for pickle saving. @@ -158,13 +152,13 @@ def __getstate__(self): dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_conf(history['cfg'], self.conf): - history['warm_start'] = True - history['hpopt_trials'] = self.hpopt_trials - history['loss_function_config'] = self.loss_function_config - history['tpe_params'] = self.tpe_params - history['hpopt_search_space'] = self.hpopt_search_space - history['_algo'] = self._algo + if self._same_conf(history["cfg"], self.conf): + history["warm_start"] = True + history["hpopt_trials"] = self.hpopt_trials + history["loss_function_config"] = self.loss_function_config + history["tpe_params"] = self.tpe_params + history["hpopt_search_space"] = self.hpopt_search_space + history["_algo"] = self._algo save_dict = super().__getstate__() return save_dict @@ -174,18 +168,19 @@ def _configure_hpopt_search_space_and_params(self, search_space): for param, configs in search_space.items(): self.hpopt_search_space[(param)] = hyperopt.hp.choice((param[0]), configs) # Find minimum number of choices for params with more than one choice - multichoice_params = [len(configs) for param, configs in search_space.items() - if len(configs) > 1] + multichoice_params = [len(configs) for param, configs in search_space.items() if len(configs) > 1] if not multichoice_params: return False min_param_size = min(multichoice_params) if len(multichoice_params) > 0 else 1 - self.tpe_params['n_EI_candidates'] = min_param_size - self.tpe_params['prior_weight'] = 1 / min_param_size - self._algo = partial(hyperopt.tpe.suggest, - n_startup_jobs=self.tpe_params['n_initial_point'], - gamma=self.tpe_params['gamma'], - n_EI_candidates=self.tpe_params['n_EI_candidates'], - prior_weight=self.tpe_params['prior_weight']) + self.tpe_params["n_EI_candidates"] = min_param_size + self.tpe_params["prior_weight"] = 1 / min_param_size + self._algo = partial( + hyperopt.tpe.suggest, + n_startup_jobs=self.tpe_params["n_initial_point"], + gamma=self.tpe_params["gamma"], + n_EI_candidates=self.tpe_params["n_EI_candidates"], + prior_weight=self.tpe_params["prior_weight"], + ) return True def traverse(self): @@ -193,26 +188,26 @@ def traverse(self): logger.info("Start to run tpe strategy.") self._prepare_tuning() # prepare log file - trials_file = os.path.join(os.path.dirname(self.history_path), 'tpe_trials.csv') - best_result_file = os.path.join(os.path.dirname(self.history_path), 'tpe_best_result.csv') - logger.debug("trials_file: {} ".format(trials_file) + \ - "best_result_file: {}".format(best_result_file)) + trials_file = os.path.join(os.path.dirname(self.history_path), "tpe_trials.csv") + best_result_file = os.path.join(os.path.dirname(self.history_path), "tpe_best_result.csv") + logger.debug("trials_file: {} ".format(trials_file) + "best_result_file: {}".format(best_result_file)) if Path(trials_file).exists(): os.remove(trials_file) status = True tuning_history = self._find_self_tuning_history() from copy import deepcopy + tuning_space = self.tuning_space initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options # step1. collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ["static", "dynamic", "bf16", "fp16", "fp32"] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -227,48 +222,43 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_item_dtype_dict = OrderedDict() for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() self.opwise_tune_cfgs = {} for key, val in self.op_configs.items(): - self.opwise_tune_cfgs[key[0]] =val - self.opwise_tune_cfgs['calib_sampling_size'] = \ - self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + self.opwise_tune_cfgs[key[0]] = val + self.opwise_tune_cfgs["calib_sampling_size"] = self.tuning_space.root_item.get_option_by_name( + "calib_sampling_size" + ).options if tuning_history and not self.warm_start: # prepare loss function scaling (best result from basic can be used) best_lat, worse_acc_loss = 0, 0 - for history in tuning_history['history']: - acc_loss, lat_diff = self._calculate_acc_lat_diff( - history['tune_result'][0], - history['tune_result'][1]) + for history in tuning_history["history"]: + acc_loss, lat_diff = self._calculate_acc_lat_diff(history["tune_result"][0], history["tune_result"][1]) if lat_diff > best_lat: best_lat = lat_diff if acc_loss > worse_acc_loss: worse_acc_loss = acc_loss - self._calculate_loss_function_scaling_components( - worse_acc_loss, - best_lat, - self.loss_function_config) - first_run_cfg = self.add_loss_to_tuned_history_and_find_best(tuning_history['history']) + self._calculate_loss_function_scaling_components(worse_acc_loss, best_lat, self.loss_function_config) + first_run_cfg = self.add_loss_to_tuned_history_and_find_best(tuning_history["history"]) # Prepare hpopt config with best cfg from history self._configure_hpopt_search_space_and_params(first_run_cfg) # Run first iteration with best result from history trials_count = len(self.hpopt_trials.trials) + 1 - hyperopt.fmin(partial(self.object_evaluation, model=self.model), - space=self.hpopt_search_space, - algo=self._algo, - max_evals=trials_count, - trials=self.hpopt_trials, - show_progressbar=False) + hyperopt.fmin( + partial(self.object_evaluation, model=self.model), + space=self.hpopt_search_space, + algo=self._algo, + max_evals=trials_count, + trials=self.hpopt_trials, + show_progressbar=False, + ) if pd is not None: self._save_trials(trials_file) self._update_best_result(best_result_file) # Prepare full hpopt search space - new_tune_cfgs = self._prepare_final_searchspace( - first_run_cfg, - self.opwise_tune_cfgs) + new_tune_cfgs = self._prepare_final_searchspace(first_run_cfg, self.opwise_tune_cfgs) status = self._configure_hpopt_search_space_and_params(new_tune_cfgs) elif not self.warm_start: self._calculate_loss_function_scaling_components(0.01, 2, self.loss_function_config) @@ -282,27 +272,36 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): self.baseline = self._evaluate(self.model) self._add_tuning_history() - baseline_msg = '[Accuracy: {:.4f}'.format(self.baseline[0]) + \ - ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ - self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) \ - + ']' if self.baseline else 'n/a' + baseline_msg = ( + "[Accuracy: {:.4f}".format(self.baseline[0]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + if self.baseline + else "n/a" + ) logger.info("FP32 baseline is: {}".format(baseline_msg)) if not self.objectives.relative: - self.loss_function_config['acc_th'] =\ - (self.baseline[0] - self.objectives.acc_goal) / self.baseline[0] + self.loss_function_config["acc_th"] = (self.baseline[0] - self.objectives.acc_goal) / self.baseline[0] # start trials exit = False while not exit: self.cfg_evaluated = False - logger.debug("Trial iteration start: {} / {}.".format( - trials_count, self.max_trials)) - hyperopt.fmin(partial(self.object_evaluation, model=self.model), - space=self.hpopt_search_space, - algo=self._algo, - max_evals=trials_count, - trials=self.hpopt_trials, - show_progressbar=False) + logger.debug("Trial iteration start: {} / {}.".format(trials_count, self.max_trials)) + hyperopt.fmin( + partial(self.object_evaluation, model=self.model), + space=self.hpopt_search_space, + algo=self._algo, + max_evals=trials_count, + trials=self.hpopt_trials, + show_progressbar=False, + ) trials_count += 1 if pd is not None: self._save_trials(trials_file) @@ -330,62 +329,62 @@ def add_loss_to_tuned_history_and_find_best(self, tuning_history_list): first_run_cfg = None for history in tuning_history_list: result = self._compute_metrics( - history['tune_cfg']['op'], - history['tune_result'][0], - history['tune_result'][1]) - if best_loss is None or result['loss'] < best_loss: - best_loss = result['loss'] - first_run_cfg = history['tune_cfg']['op'].copy() - result['source'] = 'finetune' - history['result'] = result + history["tune_cfg"]["op"], history["tune_result"][0], history["tune_result"][1] + ) + if best_loss is None or result["loss"] < best_loss: + best_loss = result["loss"] + first_run_cfg = history["tune_cfg"]["op"].copy() + result["source"] = "finetune" + history["result"] = result logger.debug( - "Resumed iteration loss is {}, acc_loss is {}, lat_diff is {}, " \ - "quantization_ratio is {}.".format(result['loss'], - result['acc_loss'], - result['lat_diff'], - result['quantization_ratio'])) + "Resumed iteration loss is {}, acc_loss is {}, lat_diff is {}, " + "quantization_ratio is {}.".format( + result["loss"], result["acc_loss"], result["lat_diff"], result["quantization_ratio"] + ) + ) for op, cfg in first_run_cfg.items(): - first_run_cfg[op] = [cfg,] + first_run_cfg[op] = [ + cfg, + ] return first_run_cfg def object_evaluation(self, tune_cfg, model): """Check if config was alredy evaluated.""" for k, v in self.op_configs.items(): - tune_cfg.update({k : tune_cfg.pop(k[0])}) + tune_cfg.update({k: tune_cfg.pop(k[0])}) op_cfgs = self._tune_cfg_converter(tune_cfg) self.last_qmodel = self.adaptor.quantize(op_cfgs, self.model, self.calib_dataloader) self.last_tune_cfg = copy.deepcopy(tune_cfg) self.last_tune_result = self._evaluate(self.last_qmodel) - logger.info("The last tune result is {}.".format( - (self.last_tune_result[0], self.last_tune_result[1][0]))) + logger.info("The last tune result is {}.".format((self.last_tune_result[0], self.last_tune_result[1][0]))) saved_tune_cfg = copy.deepcopy(op_cfgs) saved_last_tune_result = copy.deepcopy(self.last_tune_result) # prepare result - result = self._compute_metrics( - op_cfgs['op'], - self.last_tune_result[0], - self.last_tune_result[1][0]) - result['source'] = 'tpe' + result = self._compute_metrics(op_cfgs["op"], self.last_tune_result[0], self.last_tune_result[1][0]) + result["source"] = "tpe" self._add_tuning_history(saved_tune_cfg, saved_last_tune_result, result=result) - logger.info("Current iteration loss is {}, acc_loss is {}, lat_diff is {}, " \ - "quantization_ratio is {}.".format(result['loss'], - result['acc_loss'], - result['lat_diff'], - result['quantization_ratio'])) + logger.info( + "Current iteration loss is {}, acc_loss is {}, lat_diff is {}, " + "quantization_ratio is {}.".format( + result["loss"], result["acc_loss"], result["lat_diff"], result["quantization_ratio"] + ) + ) return result def _compute_metrics(self, tune_cfg, acc, lat): - quantization_ratio = 1 - len([param for param in tune_cfg.values() - if param['activation']['dtype'] =='fp32']) / len(tune_cfg) + quantization_ratio = 1 - len( + [param for param in tune_cfg.values() if param["activation"]["dtype"] == "fp32"] + ) / len(tune_cfg) acc_diff, lat_diff = self._calculate_acc_lat_diff(acc, lat) return { - 'loss': self.calculate_loss(acc_diff, lat_diff, self.loss_function_config), - 'acc' : acc, - 'lat' : lat, - 'acc_loss': acc_diff, - 'lat_diff': lat_diff, - 'quantization_ratio': quantization_ratio, - 'status': hyperopt.STATUS_OK} + "loss": self.calculate_loss(acc_diff, lat_diff, self.loss_function_config), + "acc": acc, + "lat": lat, + "acc_loss": acc_diff, + "lat_diff": lat_diff, + "quantization_ratio": quantization_ratio, + "status": hyperopt.STATUS_OK, + } def _calculate_acc_lat_diff(self, acc, lat): int8_acc = acc @@ -401,13 +400,13 @@ def calculate_loss(self, acc_diff, lat_diff, config): gamma_penalty = 40 # penalty term acc_loss_component = self._calculate_acc_loss_component(acc_diff) lat_loss_component = self._calculate_lat_diff_component(lat_diff) - acc_weight = config['acc_weight'] if acc_diff > config['acc_th'] else 0.0 - if acc_weight == 0 and config['lat_weight'] == 0: + acc_weight = config["acc_weight"] if acc_diff > config["acc_th"] else 0.0 + if acc_weight == 0 and config["lat_weight"] == 0: acc_weight = 1.0 - loss = acc_weight * (config['acc_scale'] * (acc_loss_component - config['acc_min'])) \ - + config['lat_weight']\ - * (config['lat_scale'] * (lat_loss_component - config['lat_min'])) - if acc_diff > config['acc_th']: + loss = acc_weight * (config["acc_scale"] * (acc_loss_component - config["acc_min"])) + config["lat_weight"] * ( + config["lat_scale"] * (lat_loss_component - config["lat_min"]) + ) + if acc_diff > config["acc_th"]: loss += 2 * gamma_penalty return loss @@ -421,16 +420,16 @@ def _calculate_loss_function_scaling_components(self, acc_loss, lat_diff, config acc_min = self._calculate_acc_loss_component(0) acc_max = self._calculate_acc_loss_component(acc_loss) if acc_max == acc_min: - acc_max = self._calculate_acc_loss_component(config['acc_th']) - config['acc_min'] = acc_min - config['acc_scale'] = 10 / np.abs(acc_max - acc_min) + acc_max = self._calculate_acc_loss_component(config["acc_th"]) + config["acc_min"] = acc_min + config["acc_scale"] = 10 / np.abs(acc_max - acc_min) lat_min = self._calculate_lat_diff_component(lat_diff) lat_max = self._calculate_lat_diff_component(1) if lat_min == lat_max: lat_min = self._calculate_lat_diff_component(2) - config['lat_min'] = lat_min - config['lat_scale'] = 10 / np.abs(lat_max - lat_min) + config["lat_min"] = lat_min + config["lat_scale"] = 10 / np.abs(lat_max - lat_min) def _save_trials(self, trials_log): """Save the trial result to the log file.""" @@ -440,50 +439,56 @@ def _save_trials(self, trials_log): def _update_best_result(self, best_result_file): if not self.hpopt_trials: - raise Exception( - 'No trials loaded to get best result') + raise Exception("No trials loaded to get best result") trials_results = pd.DataFrame(self.hpopt_trials.results) - if not trials_results[trials_results.acc_loss <= - self.loss_function_config['acc_th']].empty: + if not trials_results[trials_results.acc_loss <= self.loss_function_config["acc_th"]].empty: # If accuracy threshold reached, choose best latency - best_result = trials_results[trials_results.acc_loss <= - self.loss_function_config['acc_th']] \ - .reset_index(drop=True).sort_values(by=['lat_diff', 'acc_loss'], - ascending=[False, True]) \ - .reset_index(drop=True).loc[0] + best_result = ( + trials_results[trials_results.acc_loss <= self.loss_function_config["acc_th"]] + .reset_index(drop=True) + .sort_values(by=["lat_diff", "acc_loss"], ascending=[False, True]) + .reset_index(drop=True) + .loc[0] + ) else: # If accuracy threshold is not reached, choose based on loss function - best_result = \ - trials_results.sort_values('loss', ascending=True).reset_index(drop=True).loc[0] + best_result = trials_results.sort_values("loss", ascending=True).reset_index(drop=True).loc[0] update_best_result = False - if not self.best_result['best_loss']: + if not self.best_result["best_loss"]: update_best_result = True - elif self.best_result['best_acc_loss'] <= self.loss_function_config['acc_th']: - if best_result['acc_loss'] <= self.loss_function_config['acc_th'] \ - and best_result['lat_diff'] > self.best_result['best_lat_diff']: + elif self.best_result["best_acc_loss"] <= self.loss_function_config["acc_th"]: + if ( + best_result["acc_loss"] <= self.loss_function_config["acc_th"] + and best_result["lat_diff"] > self.best_result["best_lat_diff"] + ): update_best_result = True else: - if best_result['acc_loss'] <= self.loss_function_config['acc_th'] or \ - best_result['loss'] < self.best_result['best_loss']: + if ( + best_result["acc_loss"] <= self.loss_function_config["acc_th"] + or best_result["loss"] < self.best_result["best_loss"] + ): update_best_result = True if update_best_result: best_result.to_csv(best_result_file, header=False) - self.best_result['best_loss'] = best_result['loss'] - self.best_result['best_acc_loss'] = best_result['acc_loss'] - self.best_result['best_lat_diff'] = best_result['lat_diff'] - self.best_result['quantization_ratio'] = best_result['quantization_ratio'] - - logger.info("Trial iteration end is {} / {}, best loss is {}, acc_loss is {}, " \ - "lat_diff is {}, quantization_ratio is {}.".format( - len(self.hpopt_trials.trials), - self.max_trials, - self.best_result['best_loss'], - self.best_result['best_acc_loss'], - self.best_result['best_lat_diff'], - self.best_result['quantization_ratio'])) + self.best_result["best_loss"] = best_result["loss"] + self.best_result["best_acc_loss"] = best_result["acc_loss"] + self.best_result["best_lat_diff"] = best_result["lat_diff"] + self.best_result["quantization_ratio"] = best_result["quantization_ratio"] + + logger.info( + "Trial iteration end is {} / {}, best loss is {}, acc_loss is {}, " + "lat_diff is {}, quantization_ratio is {}.".format( + len(self.hpopt_trials.trials), + self.max_trials, + self.best_result["best_loss"], + self.best_result["best_acc_loss"], + self.best_result["best_lat_diff"], + self.best_result["quantization_ratio"], + ) + ) def stop(self, timeout, trials_count): """Check if need to stop traversing the tuning space, either accuracy goal is met or timeout is reach. @@ -502,21 +507,35 @@ def stop(self, timeout, trials_count): else: del self.last_qmodel - last_tune_msg = '[Accuracy ({}|fp32): {:.4f}|{:.4f}'.format( \ - 'int8', self.last_tune_result[0], self.baseline[0]) + \ - ''.join([', {} ({}|fp32): {:.4f}|{:.4f}'.format(x,'int8',y,z) \ - for x,y,z in zip(self.objectives.representation, \ - self.last_tune_result[1], self.baseline[1]) if x != 'Accuracy']) + ']' \ - if self.last_tune_result else 'n/a' - - best_tune_msg = '[Accuracy: {:.4f}'.format(self.best_tune_result[0]) + \ - ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ - self.objectives.representation, self.best_tune_result[1]) if x != 'Accuracy']) \ - + ']' if self.best_tune_result else 'n/a' - - logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, - last_tune_msg, - best_tune_msg)) + last_tune_msg = ( + "[Accuracy ({}|fp32): {:.4f}|{:.4f}".format("int8", self.last_tune_result[0], self.baseline[0]) + + "".join( + [ + ", {} ({}|fp32): {:.4f}|{:.4f}".format(x, "int8", y, z) + for x, y, z in zip(self.objectives.representation, self.last_tune_result[1], self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + if self.last_tune_result + else "n/a" + ) + + best_tune_msg = ( + "[Accuracy: {:.4f}".format(self.best_tune_result[0]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.best_tune_result[1]) + if x != "Accuracy" + ] + ) + + "]" + if self.best_tune_result + else "n/a" + ) + + logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, last_tune_msg, best_tune_msg)) if timeout == 0 and self.best_tune_result: need_stop = True diff --git a/neural_compressor/data/__init__.py b/neural_compressor/data/__init__.py index 68fbe546f49..e1ade06b247 100644 --- a/neural_compressor/data/__init__.py +++ b/neural_compressor/data/__init__.py @@ -42,24 +42,25 @@ "IterableDataset", "COCORecordDataset", "dataset_registry", - 'TensorflowImageRecord', + "TensorflowImageRecord", "TRANSFORMS", "BaseTransform", "ComposeTransform", "transform_registry", "Postprocess", - 'LabelShift', - "ResizeTFTransform", + "LabelShift", + "ResizeTFTransform", "RescaleTFTransform", - 'TensorflowShiftRescale', + "TensorflowShiftRescale", "NormalizeTFTransform", "ParseDecodeCocoTransform", - 'BilinearImagenetTransform', + "BilinearImagenetTransform", "TensorflowResizeWithRatio", - 'TensorflowResizeCropImagenetTransform', + "TensorflowResizeCropImagenetTransform", "FILTERS", "Filter", "filter_registry", "LabelBalanceCOCORecordFilter", "TFSquadV1PostTransform", - "TFSquadV1ModelZooPostTransform"] + "TFSquadV1ModelZooPostTransform", +] diff --git a/neural_compressor/data/dataloaders/__init__.py b/neural_compressor/data/dataloaders/__init__.py index b2568bad678..9ce168deec2 100644 --- a/neural_compressor/data/dataloaders/__init__.py +++ b/neural_compressor/data/dataloaders/__init__.py @@ -18,7 +18,4 @@ from .dataloader import DataLoader, DATALOADERS -__all__ = [ - "DataLoader", - "DATALOADERS" -] \ No newline at end of file +__all__ = ["DataLoader", "DATALOADERS"] diff --git a/neural_compressor/data/dataloaders/base_dataloader.py b/neural_compressor/data/dataloaders/base_dataloader.py index 9349760239e..597bdfa5664 100644 --- a/neural_compressor/data/dataloaders/base_dataloader.py +++ b/neural_compressor/data/dataloaders/base_dataloader.py @@ -20,18 +20,27 @@ from abc import abstractmethod -class BaseDataLoader: # pragma: no cover +class BaseDataLoader: # pragma: no cover """Base class for all DataLoaders. _generate_dataloader is needed to create a dataloader object from the general params like batch_size and sampler. The dynamic batching is just to generate a new dataloader by setting batch_size and last_batch. - """ - - def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, - sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, - shuffle=False, distributed=False): + + def __init__( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + shuffle=False, + distributed=False, + ): """Initialize BaseDataLoader. Args: @@ -58,7 +67,7 @@ def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None self.shuffle = shuffle self.distributed = distributed self.last_batch = last_batch - self.drop_last = False if last_batch == 'rollover' else True + self.drop_last = False if last_batch == "rollover" else True self.dataloader = self._generate_dataloader( self.dataset, @@ -70,7 +79,8 @@ def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None num_workers=num_workers, pin_memory=pin_memory, shuffle=shuffle, - distributed=distributed) + distributed=distributed, + ) def batch(self, batch_size, last_batch=None): """Set batch size for dataloader. @@ -94,7 +104,8 @@ def batch(self, batch_size, last_batch=None): self.num_workers, self.pin_memory, self.shuffle, - self.distributed) + self.distributed, + ) @property def batch_size(self): @@ -114,6 +125,17 @@ def __iter__(self): return iter(self.dataloader) @abstractmethod - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, - batch_sampler, num_workers, pin_memory, shuffle, distributed): + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): raise NotImplementedError diff --git a/neural_compressor/data/dataloaders/dataloader.py b/neural_compressor/data/dataloaders/dataloader.py index 2ce38dd3f3b..2b8c3909ae9 100644 --- a/neural_compressor/data/dataloaders/dataloader.py +++ b/neural_compressor/data/dataloaders/dataloader.py @@ -14,92 +14,117 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in dataloaders for multiple framework backends.""" -from .tensorflow_dataloader import TensorflowDataLoader from .mxnet_dataloader import MXNetDataLoader -from .pytorch_dataloader import PyTorchDataLoader from .onnxrt_dataloader import ONNXRTDataLoader +from .pytorch_dataloader import PyTorchDataLoader +from .tensorflow_dataloader import TensorflowDataLoader + +DATALOADERS = { + "tensorflow": TensorflowDataLoader, + "tensorflow_itex": TensorflowDataLoader, + "keras": TensorflowDataLoader, + "mxnet": MXNetDataLoader, + "pytorch": PyTorchDataLoader, + "pytorch_ipex": PyTorchDataLoader, + "pytorch_fx": PyTorchDataLoader, + "onnxruntime": ONNXRTDataLoader, + "onnxrt_qlinearops": ONNXRTDataLoader, + "onnxrt_integerops": ONNXRTDataLoader, + "onnxrt_qdq": ONNXRTDataLoader, + "onnxrt_qoperator": ONNXRTDataLoader, +} -DATALOADERS = {"tensorflow": TensorflowDataLoader, - "tensorflow_itex": TensorflowDataLoader, - "keras": TensorflowDataLoader, - "mxnet": MXNetDataLoader, - "pytorch": PyTorchDataLoader, - "pytorch_ipex": PyTorchDataLoader, - "pytorch_fx": PyTorchDataLoader, - "onnxruntime": ONNXRTDataLoader, - "onnxrt_qlinearops": ONNXRTDataLoader, - "onnxrt_integerops": ONNXRTDataLoader, - "onnxrt_qdq": ONNXRTDataLoader, - "onnxrt_qoperator": ONNXRTDataLoader, - } class DataLoader(object): """Entrance of all configured DataLoaders.""" - def __new__(cls, framework, dataset, batch_size=1, collate_fn=None, - last_batch='rollover', sampler=None, batch_sampler=None, - num_workers=0, pin_memory=False, shuffle=False, distributed=False): + def __new__( + cls, + framework, + dataset, + batch_size=1, + collate_fn=None, + last_batch="rollover", + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + shuffle=False, + distributed=False, + ): """Initialize a Dataloader with needed information. Args: framework (str): different frameworks, such as tensorflow, pytorch, onnx. - dataset (object): A dataset object from which to get data. Dataset must implement + dataset (object): A dataset object from which to get data. Dataset must implement __iter__ or __getitem__ method. batch_size (int, optional): How many samples per batch to load. Defaults to 1. - collate_fn (Callable, optional): Callable function that processes the batch you + collate_fn (Callable, optional): Callable function that processes the batch you want to return from your dataloader. Defaults to None. - last_batch (str, optional): How to handle the last batch if the batch size does - not evenly divide by the number of examples in the dataset. 'discard': throw + last_batch (str, optional): How to handle the last batch if the batch size does + not evenly divide by the number of examples in the dataset. 'discard': throw it away. 'rollover': insert the examples to the beginning of the next batch. Defaults to 'rollover'. sampler (Iterable, optional): Defines the strategy to draw samples from the dataset. Defaults to None. batch_sampler (Iterable, optional): Returns a batch of indices at a time. Defaults to None. - num_workers (int, optional): how many subprocesses to use for data loading. + num_workers (int, optional): how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. Defaults to 0. - pin_memory (bool, optional): If True, the data loader will copy Tensors into device + pin_memory (bool, optional): If True, the data loader will copy Tensors into device pinned memory before returning them. Defaults to False. shuffle (bool, optional): Set to ``True`` to have the data reshuffled at every epoch. Defaults to False. - distributed (bool, optional): Set to ``True`` to support distributed computing. + distributed (bool, optional): Set to ``True`` to support distributed computing. Defaults to False. """ - assert framework in ('tensorflow', 'tensorflow_itex', 'keras',\ - 'pytorch', 'pytorch_ipex', 'pytorch_fx', 'onnxruntime', 'onnxrt_qdqops', \ - 'onnxrt_qlinearops', 'onnxrt_integerops', 'mxnet'), \ - "framework support tensorflow pytorch mxnet onnxruntime" - return DATALOADERS[framework](dataset=dataset, - batch_size=batch_size, - last_batch=last_batch, - collate_fn=collate_fn, - sampler=sampler, - batch_sampler=batch_sampler, - num_workers=num_workers, - pin_memory=pin_memory, - shuffle=shuffle, - distributed=distributed) + assert framework in ( + "tensorflow", + "tensorflow_itex", + "keras", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxruntime", + "onnxrt_qdqops", + "onnxrt_qlinearops", + "onnxrt_integerops", + "mxnet", + ), "framework support tensorflow pytorch mxnet onnxruntime" + return DATALOADERS[framework]( + dataset=dataset, + batch_size=batch_size, + last_batch=last_batch, + collate_fn=collate_fn, + sampler=sampler, + batch_sampler=batch_sampler, + num_workers=num_workers, + pin_memory=pin_memory, + shuffle=shuffle, + distributed=distributed, + ) def _generate_common_dataloader(dataloader, framework, distributed=False): """Generate common dataloader. Args: - dataloader (generator): A dataloader which can yield tuple of (input, label)/(input, _) + dataloader (generator): A dataloader which can yield tuple of (input, label)/(input, _) batched data. framework (str): The string of supported framework. - distributed (bool, optional): Set to ``True`` to support distributed computing. + distributed (bool, optional): Set to ``True`` to support distributed computing. Defaults to False. Returns: BaseDataLoader: neural_compressor built-in dataloader """ if not isinstance(dataloader, DataLoader): - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' - assert not distributed, "Please use \ + assert hasattr(dataloader, "__iter__") and hasattr( + dataloader, "batch_size" + ), "dataloader must implement __iter__ method and batch_size attribute" + assert ( + not distributed + ), "Please use \ neural_compressor.data.DataLoader to support distributed computing" return dataloader else: @@ -113,12 +138,13 @@ def _generate_common_dataloader(dataloader, framework, distributed=False): num_workers=dataloader.num_workers, pin_memory=dataloader.pin_memory, shuffle=dataloader.shuffle, - distributed=bool(dataloader.distributed or distributed)) + distributed=bool(dataloader.distributed or distributed), + ) def check_dataloader(dataloader): """Check if the dataloader meets requirement of neural_compressor.""" - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' + assert hasattr(dataloader, "__iter__") and hasattr( + dataloader, "batch_size" + ), "dataloader must implement __iter__ method and batch_size attribute" return True diff --git a/neural_compressor/data/dataloaders/default_dataloader.py b/neural_compressor/data/dataloaders/default_dataloader.py index d9a2d74fb26..14e3bfc9ae9 100644 --- a/neural_compressor/data/dataloaders/default_dataloader.py +++ b/neural_compressor/data/dataloaders/default_dataloader.py @@ -18,14 +18,17 @@ """Default dataloader for multiple framework backends.""" import collections -import numpy as np -from math import ceil, floor from abc import abstractmethod -from .sampler import IterableSampler, SequentialSampler, BatchSampler -from .fetcher import FETCHERS +from math import ceil, floor + +import numpy as np + from .base_dataloader import BaseDataLoader +from .fetcher import FETCHERS +from .sampler import BatchSampler, IterableSampler, SequentialSampler + -def default_collate(batch): # pragma: no cover +def default_collate(batch): # pragma: no cover """Merge data with outer dimension batch size.""" elem = batch[0] if isinstance(elem, collections.abc.Mapping): @@ -41,12 +44,23 @@ def default_collate(batch): # pragma: no cover else: return batch -class DefaultDataLoader(BaseDataLoader): # pragma: no cover + +class DefaultDataLoader(BaseDataLoader): # pragma: no cover """DefaultDataLoader for multiple framework backends.""" - - def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, - sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, - shuffle=False, distributed=False): + + def __init__( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + shuffle=False, + distributed=False, + ): """Initialize DefaultDataLoader. Args: @@ -61,7 +75,7 @@ def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None num_workers (int, optional): number of subprocesses to use for data loading. Defaults to 0. pin_memory (bool, optional): whether to copy data into pinned memory before returning. Defaults to False. shuffle (bool, optional): whether to shuffle data. Defaults to False. - distributed (bool, optional): whether the dataloader is distributed. Defaults to False. + distributed (bool, optional): whether the dataloader is distributed. Defaults to False. """ self.dataset = dataset self.last_batch = last_batch @@ -73,11 +87,11 @@ def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None self._batch_size = batch_size self.shuffle = shuffle self.distributed = distributed - self.drop_last = False if last_batch == 'rollover' else True - if self.collate_fn == None: + self.drop_last = False if last_batch == "rollover" else True + if self.collate_fn is None: self.collate_fn = default_collate - def batch(self, batch_size, last_batch='rollover'): + def batch(self, batch_size, last_batch="rollover"): """Set batch_size and last_batch.""" self._batch_size = batch_size self.last_batch = last_batch @@ -99,7 +113,8 @@ def __iter__(self): num_workers=self.num_workers, pin_memory=self.pin_memory, shuffle=self.shuffle, - distributed=self.distributed) + distributed=self.distributed, + ) def __len__(self): """Get dataset length.""" @@ -110,17 +125,29 @@ def __len__(self): for _ in self.dataset: dataset_len += 1 except Exception: - raise ValueError(f"{self.dataset} is invalid, {self.dataset}" \ - " does not support calculating the length of its dataloader") - if self.drop_last == False: + raise ValueError( + f"{self.dataset} is invalid, {self.dataset}" + " does not support calculating the length of its dataloader" + ) + if self.drop_last is False: dataloader_len = ceil(dataset_len / self.batch_size) else: dataloader_len = floor(dataset_len / self.batch_size) return dataloader_len - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, - batch_sampler, num_workers, pin_memory, shuffle, distributed): - + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): sampler = self._generate_sampler(dataset, distributed) self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last) self.fetcher = FETCHERS[self.dataset_type](dataset, collate_fn, self.drop_last, distributed) @@ -134,10 +161,10 @@ def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, samp def _generate_sampler(self, dataset, distributed): if hasattr(dataset, "__getitem__"): - self.dataset_type = 'index' + self.dataset_type = "index" return SequentialSampler(dataset, distributed) elif hasattr(dataset, "__iter__"): - self.dataset_type = 'iter' + self.dataset_type = "iter" return IterableSampler(dataset) else: raise ValueError("dataset type only support (index, iter)") diff --git a/neural_compressor/data/dataloaders/fetcher.py b/neural_compressor/data/dataloaders/fetcher.py index 01ab6d895fa..e035fa06d31 100644 --- a/neural_compressor/data/dataloaders/fetcher.py +++ b/neural_compressor/data/dataloaders/fetcher.py @@ -19,14 +19,15 @@ from abc import abstractmethod -class Fetcher(object): # pragma: no cover + +class Fetcher(object): # pragma: no cover """Base class for different fetchers.""" def __init__(self, dataset, collate_fn, drop_last): """Initialize Fetcher. Args: - dataset (object): dataset object from which to get data + dataset (object): dataset object from which to get data collate_fn (callable): merge data with outer dimension batch size drop_last (bool): whether to drop the last batch if it is incomplete """ @@ -40,11 +41,11 @@ def __call__(self, batched_indices): Args: batched_indices (list): fetch data according to batched_indices - """ raise NotImplementedError -class IterableFetcher(Fetcher): # pragma: no cover + +class IterableFetcher(Fetcher): # pragma: no cover """Iterate to get next batch-size samples as a batch.""" def __init__(self, dataset, collate_fn, drop_last, distributed): @@ -55,38 +56,39 @@ def __init__(self, dataset, collate_fn, drop_last, distributed): collate_fn (callable): merge data with outer dimension batch size drop_last (bool): whether to drop the last batch if it is incomplete distributed (bool): whether the dataloader is distributed - """ super(IterableFetcher, self).__init__(dataset, collate_fn, drop_last) self.dataset_iter = iter(dataset) self.index_whole = 0 - self.process_rank = 0 # The default rank is 0, which represents the main process - self.process_size = 1 # By default, process_size=1, only the main process is running + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running if distributed: import horovod.tensorflow as hvd + hvd.init() self.process_rank = hvd.rank() self.process_size = hvd.size() if self.process_size < 2: - raise EnvironmentError("The program is now trying to traverse" \ - " the distributed TensorFlow DefaultDataLoader in only one process." \ - " If you do not want to use distributed DataLoader, please set" \ - " 'distributed: False'. Or If you want to use distributed DataLoader," \ - " please set 'distributed: True' and launch multiple processes.") + raise EnvironmentError( + "The program is now trying to traverse" + " the distributed TensorFlow DefaultDataLoader in only one process." + " If you do not want to use distributed DataLoader, please set" + " 'distributed: False'. Or If you want to use distributed DataLoader," + " please set 'distributed: True' and launch multiple processes." + ) def __call__(self, batched_indices): """Fetch data. Args: batched_indices (list): fetch data according to batched_indices - """ batch_data = [] batch_size = len(batched_indices) while True: try: iter_data = next(self.dataset_iter) - if (self.index_whole-self.process_rank)%self.process_size == 0: + if (self.index_whole - self.process_rank) % self.process_size == 0: batch_data.append(iter_data) self.index_whole += 1 if len(batch_data) == batch_size: @@ -97,7 +99,8 @@ def __call__(self, batched_indices): raise StopIteration return self.collate_fn(batch_data) -class IndexFetcher(Fetcher): # pragma: no cover + +class IndexFetcher(Fetcher): # pragma: no cover """Take single index or a batch of indices to fetch samples as a batch.""" def __init__(self, dataset, collate_fn, drop_last, distributed): @@ -116,9 +119,12 @@ def __call__(self, batched_indices): Args: batched_indices (list): fetch data according to batched_indices - """ data = [self.dataset[idx] for idx in batched_indices] return self.collate_fn(data) -FETCHERS = {"index": IndexFetcher, "iter": IterableFetcher, } + +FETCHERS = { + "index": IndexFetcher, + "iter": IterableFetcher, +} diff --git a/neural_compressor/data/dataloaders/mxnet_dataloader.py b/neural_compressor/data/dataloaders/mxnet_dataloader.py index 352f63fc731..17772c7a4a3 100644 --- a/neural_compressor/data/dataloaders/mxnet_dataloader.py +++ b/neural_compressor/data/dataloaders/mxnet_dataloader.py @@ -17,26 +17,41 @@ # ============================================================================== """MXNet Dataloader implementation.""" +import logging + from neural_compressor.utils.utility import LazyImport + from .base_dataloader import BaseDataLoader -import logging -mx = LazyImport('mxnet') -class MXNetDataLoader(BaseDataLoader): # pragma: no cover +mx = LazyImport("mxnet") + + +class MXNetDataLoader(BaseDataLoader): # pragma: no cover """Subclass of BaseDataLoader.""" - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed): + + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): """Overwrite _generate_dataloader function.""" if shuffle: - logging.warning('Shuffle is not supported yet in MXNetDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in MXNetDataLoader, " "ignoring shuffle keyword.") return mx.gluon.data.DataLoader( - dataset, - batch_size=batch_size, - batchify_fn=collate_fn, - last_batch=last_batch, - num_workers=num_workers, - pin_memory=pin_memory, - sampler=sampler, - batch_sampler=batch_sampler) + dataset, + batch_size=batch_size, + batchify_fn=collate_fn, + last_batch=last_batch, + num_workers=num_workers, + pin_memory=pin_memory, + sampler=sampler, + batch_sampler=batch_sampler, + ) diff --git a/neural_compressor/data/dataloaders/onnxrt_dataloader.py b/neural_compressor/data/dataloaders/onnxrt_dataloader.py index 028bcdb6981..0e22b8098af 100644 --- a/neural_compressor/data/dataloaders/onnxrt_dataloader.py +++ b/neural_compressor/data/dataloaders/onnxrt_dataloader.py @@ -17,66 +17,102 @@ # ============================================================================== """Built-in dataloaders for onnxruntime framework backends.""" +import logging + from neural_compressor.utils.utility import LazyImport + +from ..datasets.bert_dataset import ONNXRTBertDataset from .base_dataloader import BaseDataLoader from .default_dataloader import DefaultDataLoader -from ..datasets.bert_dataset import ONNXRTBertDataset -import logging -torch = LazyImport('torch') -class ONNXRTBertDataLoader(DefaultDataLoader): # pragma: no cover +torch = LazyImport("torch") + + +class ONNXRTBertDataLoader(DefaultDataLoader): # pragma: no cover """Built-in dataloader for onnx bert model and its varients.""" - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed): + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): import numpy as np from torch.utils.data import DataLoader, SequentialSampler + sampler = SequentialSampler(dataset) - dataloader = DataLoader(dataset, sampler=sampler, \ - batch_size=batch_size) + dataloader = DataLoader(dataset, sampler=sampler, batch_size=batch_size) dynamic_length = dataset.dynamic_length model_type = dataset.model_type max_seq_length = dataset.max_seq_length for batch in dataloader: try: - batch_seq_length = max_seq_length if not dynamic_length \ - else torch.max(batch[-2], 0)[0].item() - batch = tuple(t.detach().cpu().numpy() \ - if not isinstance(t, np.ndarray) else t \ - for t in batch) - if model_type == 'bert': + batch_seq_length = max_seq_length if not dynamic_length else torch.max(batch[-2], 0)[0].item() + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in batch) + if model_type == "bert": data = [ - batch[0][:,:batch_seq_length], - batch[1][:,:batch_seq_length], - batch[2][:,:batch_seq_length] + batch[0][:, :batch_seq_length], + batch[1][:, :batch_seq_length], + batch[2][:, :batch_seq_length], ] else: - data = [ - batch[0][:,:batch_seq_length], - batch[1][:,:batch_seq_length] - ] + data = [batch[0][:, :batch_seq_length], batch[1][:, :batch_seq_length]] label = batch[-1] yield data, label except StopIteration: return -class ONNXRTDataLoader(BaseDataLoader): # pragma: no cover + +class ONNXRTDataLoader(BaseDataLoader): # pragma: no cover """Built-in dataloader for onnxruntime framework backends.""" - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed): + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): if shuffle: - logging.warning('Shuffle is not supported yet in ONNXRTDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in ONNXRTDataLoader, " "ignoring shuffle keyword.") if isinstance(dataset, ONNXRTBertDataset): - return ONNXRTBertDataLoader(dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed) + return ONNXRTBertDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) else: - return DefaultDataLoader(dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed) + return DefaultDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) diff --git a/neural_compressor/data/dataloaders/pytorch_dataloader.py b/neural_compressor/data/dataloaders/pytorch_dataloader.py index 301519a8acf..85d4c9a4be8 100644 --- a/neural_compressor/data/dataloaders/pytorch_dataloader.py +++ b/neural_compressor/data/dataloaders/pytorch_dataloader.py @@ -14,21 +14,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Initialize the DATASETS class.""" import numpy as np + from neural_compressor.utils.utility import LazyImport + from .base_dataloader import BaseDataLoader -torch = LazyImport('torch') -hvd = LazyImport('horovod.torch') -class PyTorchDataLoader(BaseDataLoader): # pragma: no cover +torch = LazyImport("torch") +hvd = LazyImport("horovod.torch") + + +class PyTorchDataLoader(BaseDataLoader): # pragma: no cover """PyTorchDataLoader inherits from BaseDataLoader.""" - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed): + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): """Generate PyTorch dataloader. Args: @@ -46,15 +59,15 @@ def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, Returns: _type_: _description_ """ - drop_last = False if last_batch == 'rollover' else True - assert len(dataset) != 0, \ - "Warning: Dataset is empty, Please check dataset path!" + drop_last = False if last_batch == "rollover" else True + assert len(dataset) != 0, "Warning: Dataset is empty, Please check dataset path!" if distributed and sampler is None: # TODO: lazy init here hvd.init() # sampler option is mutually exclusive with shuffle pytorch self.sampler = sampler = torch.utils.data.distributed.DistributedSampler( - dataset, num_replicas=hvd.size(), rank=hvd.rank()) + dataset, num_replicas=hvd.size(), rank=hvd.rank() + ) return torch.utils.data.DataLoader( dataset, @@ -65,5 +78,5 @@ def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, num_workers=num_workers, pin_memory=pin_memory, sampler=sampler, - batch_sampler=batch_sampler) - + batch_sampler=batch_sampler, + ) diff --git a/neural_compressor/data/dataloaders/sampler.py b/neural_compressor/data/dataloaders/sampler.py index a383e6d9891..210fb7f0bfd 100644 --- a/neural_compressor/data/dataloaders/sampler.py +++ b/neural_compressor/data/dataloaders/sampler.py @@ -19,9 +19,10 @@ from abc import abstractmethod -class Sampler(object): # pragma: no cover + +class Sampler(object): # pragma: no cover """Base class for all Samplers. - + __iter__ is needed no matter whether you use IterableSampler or Squential sampler, if you want implement your own sampler, make clear what the type is your Dataset, if IterableDataset(method __iter__ implemented), try to use IterableSampler, @@ -39,9 +40,9 @@ def __iter__(self): raise NotImplementedError -class IterableSampler(Sampler): # pragma: no cover +class IterableSampler(Sampler): # pragma: no cover """Interally samples elements. - + Used for datasets retrieved element by interator. Yield None to act as a placeholder for each iteration. """ @@ -63,7 +64,8 @@ def __len__(self): """Return the length of dataset.""" raise NotImplementedError("'__len__' for IterableDataset object has not defined") -class SequentialSampler(Sampler): # pragma: no cover + +class SequentialSampler(Sampler): # pragma: no cover """Sequentially samples elements, used for datasets retrieved element by index.""" def __init__(self, dataset, distributed): @@ -78,26 +80,30 @@ def __init__(self, dataset, distributed): def __iter__(self): """Yield data in iterative order.""" - self.process_rank = 0 # The default rank is 0, which represents the main process - self.process_size = 1 # By default, process_size=1, only the main process is running + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running if self.distributed: import horovod.tensorflow as hvd + hvd.init() self.process_rank = hvd.rank() self.process_size = hvd.size() if self.process_size < 2: - raise EnvironmentError("The program is now trying to traverse" \ - " the distributed TensorFlow DefaultDataLoader in only one process." \ - " If you do not want to use distributed DataLoader, please set" \ - " 'distributed: False'. Or If you want to use distributed DataLoader," \ - " please set 'distributed: True' and launch multiple processes.") + raise EnvironmentError( + "The program is now trying to traverse" + " the distributed TensorFlow DefaultDataLoader in only one process." + " If you do not want to use distributed DataLoader, please set" + " 'distributed: False'. Or If you want to use distributed DataLoader," + " please set 'distributed: True' and launch multiple processes." + ) return iter(range(self.process_rank, len(self.whole_dataset), self.process_size)) def __len__(self): """Return the length of dataset.""" return len(self.whole_dataset) -class BatchSampler(Sampler): # pragma: no cover + +class BatchSampler(Sampler): # pragma: no cover """Yield a batch of indices and number of batches.""" def __init__(self, sampler, batch_size, drop_last=True): diff --git a/neural_compressor/data/dataloaders/tensorflow_dataloader.py b/neural_compressor/data/dataloaders/tensorflow_dataloader.py index 57f76faa528..51db0ce8bbc 100644 --- a/neural_compressor/data/dataloaders/tensorflow_dataloader.py +++ b/neural_compressor/data/dataloaders/tensorflow_dataloader.py @@ -16,45 +16,48 @@ # limitations under the License. """TensorFlow Dataloader implementation.""" -from neural_compressor.utils.utility import LazyImport -from abc import abstractmethod import collections -import numpy as np +import logging import sys +from abc import abstractmethod from math import ceil, floor -from .sampler import IterableSampler, SequentialSampler, BatchSampler -from .fetcher import FETCHERS -from .default_dataloader import default_collate -from .default_dataloader import DefaultDataLoader + +import numpy as np + +from neural_compressor.utils.utility import LazyImport + from ..datasets.bert_dataset import TensorflowBertDataset, TensorflowModelZooBertDataset from .base_dataloader import BaseDataLoader -import logging +from .default_dataloader import DefaultDataLoader, default_collate +from .fetcher import FETCHERS +from .sampler import BatchSampler, IterableSampler, SequentialSampler + +tf = LazyImport("tensorflow") +neural_compressor = LazyImport("neural_compressor") -tf = LazyImport('tensorflow') -neural_compressor = LazyImport('neural_compressor') -class TFDataDataLoader(BaseDataLoader): # pragma: no cover +class TFDataDataLoader(BaseDataLoader): # pragma: no cover """Tensorflow dataloader class. - + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict method to do session run, this dataloader is designed to satisfy the usage of feed dict in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. - + Args: dataset: obj. wrapper of needed data. batch_size: int. batch size """ - def __init__(self, dataset, batch_size=1, last_batch='rollover'): + def __init__(self, dataset, batch_size=1, last_batch="rollover"): """Initialize `TFDataDataLoader` class.""" self.dataset = dataset self.last_batch = last_batch self._batch_size = batch_size dataset = dataset.batch(batch_size) - def batch(self, batch_size, last_batch='rollover'): + def batch(self, batch_size, last_batch="rollover"): """Dataset return data per batch.""" - drop_last = False if last_batch == 'rollover' else True + drop_last = False if last_batch == "rollover" else True self._batch_size = batch_size self.dataset = self.dataset.batch(batch_size, drop_last) @@ -63,17 +66,26 @@ def __iter__(self): return self._generate_dataloader( self.dataset, batch_size=self.batch_size, - last_batch=self.last_batch,) + last_batch=self.last_batch, + ) - def _generate_dataloader(self, dataset, batch_size=1, last_batch='rollover', \ - collate_fn=None, sampler=None, batch_sampler=None, \ - num_workers=None, pin_memory=None, shuffle=False, \ - distributed=False): + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + shuffle=False, + distributed=False, + ): """Yield data.""" - drop_last = False if last_batch == 'rollover' else True + drop_last = False if last_batch == "rollover" else True if shuffle: - logging.warning('Shuffle is not supported yet in TFDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in TFDataLoader, " "ignoring shuffle keyword.") def check_dynamic_shape(element_spec): if isinstance(element_spec, collections.abc.Sequence): @@ -81,7 +93,7 @@ def check_dynamic_shape(element_spec): elif isinstance(element_spec, tf.TensorSpec): return True if element_spec.shape.num_elements() is None else False else: - raise ValueError('unrecognized element spec...') + raise ValueError("unrecognized element spec...") def squeeze_output(output): if isinstance(output, collections.abc.Sequence): @@ -89,19 +101,19 @@ def squeeze_output(output): elif isinstance(output, np.ndarray): return np.squeeze(output, axis=0) else: - raise ValueError('not supported output format....') + raise ValueError("not supported output format....") if tf.executing_eagerly(): index = 0 outputs = [] for iter_tensors in dataset: samples = [] - iter_inputs, iter_labels = iter_tensors[0],iter_tensors[1] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] if isinstance(iter_inputs, tf.Tensor): samples.append(iter_inputs.numpy()) else: samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) - if isinstance(iter_labels,tf.Tensor): + if isinstance(iter_labels, tf.Tensor): samples.append(iter_labels.numpy()) else: samples.append([np.array(l) for l in iter_labels]) @@ -127,6 +139,7 @@ def squeeze_output(output): data_sess = tf.compat.v1.Session(config=data_config) # pylint: disable=no-name-in-module from tensorflow.python.framework.errors_impl import OutOfRangeError + while True: if not try_single_batch: try: @@ -152,27 +165,37 @@ def squeeze_output(output): data_sess.close() return -class TensorflowBertDataLoader(DefaultDataLoader): # pragma: no cover + +class TensorflowBertDataLoader(DefaultDataLoader): # pragma: no cover """Subclass of DefaultDataLoader. - + this dataloader is designed to satisfy the usage of Bert models. """ - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, shuffle, - distributed): - + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): if shuffle: - logging.warning('Shuffle is not supported yet in TensorflowBertDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in TensorflowBertDataLoader, " "ignoring shuffle keyword.") + def bert_collate_fn(batch): elem = batch[0] return elem - drop_last = False if last_batch == 'rollover' else True + + drop_last = False if last_batch == "rollover" else True sampler = self._generate_sampler(dataset, distributed) self.batch_sampler = BatchSampler(sampler, batch_size, drop_last) - self.fetcher = FETCHERS[self.dataset_type]\ - (dataset, bert_collate_fn, drop_last, distributed) + self.fetcher = FETCHERS[self.dataset_type](dataset, bert_collate_fn, drop_last, distributed) for batched_indices in self.batch_sampler: try: @@ -181,19 +204,29 @@ def bert_collate_fn(batch): except StopIteration: return -class TensorflowModelZooBertDataLoader(DefaultDataLoader): # pragma: no cover + +class TensorflowModelZooBertDataLoader(DefaultDataLoader): # pragma: no cover """Subclass of DefaultDataLoader. - + this dataloader is designed to satisfy the usage of Model Zoo Bert models. """ - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, shuffle, - distributed): - + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): if shuffle: - logging.warning('Shuffle is not supported yet in TensorflowBertDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in TensorflowBertDataLoader, " "ignoring shuffle keyword.") + def bert_collate_fn(batch): input_ids = [] input_mask = [] @@ -204,11 +237,11 @@ def bert_collate_fn(batch): segment_ids.append(elem[0][2][0]) inputs = [input_ids, input_mask, segment_ids] return inputs, batch[0][1] - drop_last = False if last_batch == 'rollover' else True + + drop_last = False if last_batch == "rollover" else True sampler = self._generate_sampler(dataset, distributed) self.batch_sampler = BatchSampler(sampler, batch_size, drop_last) - self.fetcher = FETCHERS[self.dataset_type]\ - (dataset, bert_collate_fn, drop_last, distributed) + self.fetcher = FETCHERS[self.dataset_type](dataset, bert_collate_fn, drop_last, distributed) inputs = [] for batched_indices in self.batch_sampler: @@ -218,65 +251,109 @@ def bert_collate_fn(batch): except StopIteration: return -class TensorflowDataLoader(BaseDataLoader): # pragma: no cover + +class TensorflowDataLoader(BaseDataLoader): # pragma: no cover """DataLoader for framework Tensorflow. - - if it's a tf.data.Dataset we will directly use the dataloader in the other case + + if it's a tf.data.Dataset we will directly use the dataloader in the other case will use DefaultDataLoader instead. """ - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, \ - sampler, batch_sampler, num_workers, pin_memory, shuffle, distributed): - + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): if shuffle: - logging.warning('Shuffle is not supported yet in TensorflowDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in TensorflowDataLoader, " "ignoring shuffle keyword.") if isinstance(dataset, tf.data.Dataset): if int(tf.__version__[0]) > 1: - has_batch = hasattr(dataset, '_batch_size') + has_batch = hasattr(dataset, "_batch_size") else: - has_batch = hasattr(dataset._dataset, '_batch_size') + has_batch = hasattr(dataset._dataset, "_batch_size") if has_batch: - raise TypeError(f"Parameter 'batch_size={batch_size}'" \ - " conflicts with 'tf.data.Dataset'," \ - f" because {dataset} is already a BatchDataset." \ - f" Please pass in 'tf.data.Dataset' without batch attributes.") - process_rank = 0 # The default rank is 0, which represents the main process - process_size = 1 # By default, process_size=1, only the main process is running + raise TypeError( + f"Parameter 'batch_size={batch_size}'" + " conflicts with 'tf.data.Dataset'," + f" because {dataset} is already a BatchDataset." + f" Please pass in 'tf.data.Dataset' without batch attributes." + ) + process_rank = 0 # The default rank is 0, which represents the main process + process_size = 1 # By default, process_size=1, only the main process is running if self.distributed: import horovod.tensorflow as hvd + hvd.init() process_rank = hvd.rank() process_size = hvd.size() if process_size < 2: - raise EnvironmentError("The program is now trying to generate" \ - " the distributed TensorflowDataLoader in only one process." \ - " If you do not want to use distributed DataLoader, please set" \ - " 'distributed: False'. Or If you want to use distributed DataLoader," \ - " please set 'distributed: True' and launch multiple processes.") + raise EnvironmentError( + "The program is now trying to generate" + " the distributed TensorflowDataLoader in only one process." + " If you do not want to use distributed DataLoader, please set" + " 'distributed: False'. Or If you want to use distributed DataLoader," + " please set 'distributed: True' and launch multiple processes." + ) dataset = dataset.shard(process_size, process_rank) tf_dataloader = TFDataDataLoader(dataset, batch_size, last_batch=last_batch) return tf_dataloader elif isinstance(dataset, TensorflowBertDataset): if distributed: - raise NotImplementedError("Distributed TensorflowBertDataLoader" \ - " is not yet supported, please set 'distributed: False'") - tf_bert_dataloader = TensorflowBertDataLoader(dataset, batch_size, \ - last_batch, collate_fn, sampler, batch_sampler, \ - num_workers, pin_memory, shuffle, distributed) + raise NotImplementedError( + "Distributed TensorflowBertDataLoader" " is not yet supported, please set 'distributed: False'" + ) + tf_bert_dataloader = TensorflowBertDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) return tf_bert_dataloader elif isinstance(dataset, TensorflowModelZooBertDataset): if distributed: - raise NotImplementedError("Distributed TensorflowBertDataLoader" \ - " is not yet supported, please set 'distributed: False'") - tf_bert_dataloader = TensorflowModelZooBertDataLoader(dataset, batch_size, \ - last_batch, collate_fn, sampler, batch_sampler, \ - num_workers, pin_memory, shuffle, distributed) + raise NotImplementedError( + "Distributed TensorflowBertDataLoader" " is not yet supported, please set 'distributed: False'" + ) + tf_bert_dataloader = TensorflowModelZooBertDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) return tf_bert_dataloader else: - return DefaultDataLoader(dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, - pin_memory, shuffle, distributed) + return DefaultDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) def __bool__(self): """Judgement if the dataloader exists.""" @@ -293,28 +370,34 @@ def __len__(self): dataset_len = 0 for _ in self.dataset: dataset_len += 1 - except RuntimeError: return sum([1 for _ in self]) + except RuntimeError: + return sum([1 for _ in self]) except Exception: - raise ValueError(f"{self.dataset} is invalid, {self.dataset}" \ - " does not support calculating the length of its dataloader") - process_rank = 0 # The default rank is 0, which represents the main process - process_size = 1 # By default, process_size=1, only the main process is running + raise ValueError( + f"{self.dataset} is invalid, {self.dataset}" + " does not support calculating the length of its dataloader" + ) + process_rank = 0 # The default rank is 0, which represents the main process + process_size = 1 # By default, process_size=1, only the main process is running if self.distributed: import horovod.tensorflow as hvd + hvd.init() process_rank = hvd.rank() process_size = hvd.size() if process_size < 2: - raise EnvironmentError("The program is now trying to get length of" \ - " the distributed TensorflowDataLoader in only one process." \ - " If you do not want to use distributed DataLoader, please set" \ - " 'distributed: False'. Or If you want to use distributed DataLoader," \ - " please set 'distributed: True' and launch multiple processes.") + raise EnvironmentError( + "The program is now trying to get length of" + " the distributed TensorflowDataLoader in only one process." + " If you do not want to use distributed DataLoader, please set" + " 'distributed: False'. Or If you want to use distributed DataLoader," + " please set 'distributed: True' and launch multiple processes." + ) if process_rank < (dataset_len % process_size): self.dis_dataset_len = dataset_len // process_size + 1 else: self.dis_dataset_len = dataset_len // process_size - if self.drop_last == False: + if self.drop_last is False: dataloader_len = ceil(self.dis_dataset_len / self.batch_size) else: dataloader_len = floor(self.dis_dataset_len / self.batch_size) diff --git a/neural_compressor/data/datasets/__init__.py b/neural_compressor/data/datasets/__init__.py index 44253088a64..56d73afe728 100644 --- a/neural_compressor/data/datasets/__init__.py +++ b/neural_compressor/data/datasets/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in datasets class for multiple framework backends.""" from .coco_dataset import COCORecordDataset @@ -25,9 +24,8 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) -__all__ = ["Datasets", "Dataset", "IterableDataset", "dataset_registry", "TensorflowImageRecord", - "COCORecordDataset"] +__all__ = ["Datasets", "Dataset", "IterableDataset", "dataset_registry", "TensorflowImageRecord", "COCORecordDataset"] diff --git a/neural_compressor/data/datasets/bert_dataset.py b/neural_compressor/data/datasets/bert_dataset.py index 6402691a551..8b33c6efb48 100644 --- a/neural_compressor/data/datasets/bert_dataset.py +++ b/neural_compressor/data/datasets/bert_dataset.py @@ -14,26 +14,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in BERT datasets class for multiple framework backends.""" -import os -import logging -import json import dataclasses +import json +import logging +import os from dataclasses import dataclass from typing import List, Optional, Union + from neural_compressor.utils.utility import LazyImport -from .dataset import dataset_registry, Dataset -torch = LazyImport('torch') -transformers = LazyImport('transformers') + +from .dataset import Dataset, dataset_registry + +torch = LazyImport("torch") +transformers = LazyImport("transformers") logger = logging.getLogger("neural_compressor") -@dataset_registry(dataset_type="bert", framework="pytorch", dataset_format='') -class PytorchBertDataset(Dataset): # pragma: no cover + +@dataset_registry(dataset_type="bert", framework="pytorch", dataset_format="") +class PytorchBertDataset(Dataset): # pragma: no cover """PyTorch dataset used for model Bert. - + This Dataset is to construct from the Bert TensorDataset and not a full implementation from yaml config. The original repo link is: https://github.com/huggingface/transformers. When you want use this Dataset, you should add it before you initialize your DataLoader. @@ -66,7 +69,7 @@ class PytorchBertDataset(Dataset): # pragma: no cover transform=preprocess, filter=filter) """ - def __init__(self, dataset, task, model_type='bert', transform=None, filter=None): + def __init__(self, dataset, task, model_type="bert", transform=None, filter=None): """Initialize the attributes of class.""" self.dataset = dataset assert task in ("classifier", "squad"), "Bert task support only classifier squad" @@ -86,36 +89,37 @@ def __getitem__(self, index): sample = self.dataset[index] if self.transform is not None: sample = self.transform(sample) - if self.task == 'classifier': - inputs = { - 'input_ids': sample[0], - 'attention_mask': sample[1], - 'labels': sample[3]} + if self.task == "classifier": + inputs = {"input_ids": sample[0], "attention_mask": sample[1], "labels": sample[3]} - if self.model_type != 'distilbert': + if self.model_type != "distilbert": # XLM, DistilBERT and RoBERTa don't use segment_ids - if self.model_type in ['bert', 'xlnet']: - inputs['token_type_ids'] = sample[2] - sample = (inputs, inputs['labels']) + if self.model_type in ["bert", "xlnet"]: + inputs["token_type_ids"] = sample[2] + sample = (inputs, inputs["labels"]) - elif self.task == 'squad': + elif self.task == "squad": inputs = { - 'input_ids': sample[0], - 'attention_mask': sample[1], } - if self.model_type != 'distilbert': + "input_ids": sample[0], + "attention_mask": sample[1], + } + if self.model_type != "distilbert": # XLM, DistilBERT and RoBERTa don't use segment_ids - inputs['token_type_ids'] = sample[2] if self.model_type in [ - 'bert', 'xlnet'] else None - if self.model_type in ['xlnet', 'xlm']: - inputs.update({'cls_index': sample[4], 'p_mask': sample[5]}) + inputs["token_type_ids"] = sample[2] if self.model_type in ["bert", "xlnet"] else None + if self.model_type in ["xlnet", "xlm"]: + inputs.update({"cls_index": sample[4], "p_mask": sample[5]}) example_indices = sample[3] sample = (inputs, example_indices) return sample -@dataset_registry(dataset_type="GLUE", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') -class ONNXRTBertDataset(Dataset): # pragma: no cover +@dataset_registry( + dataset_type="GLUE", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) +class ONNXRTBertDataset(Dataset): # pragma: no cover """ONNXRT dataset used for model Bert. Args: data_dir (str): The input data dir. @@ -141,24 +145,39 @@ class ONNXRTBertDataset(Dataset): # pragma: no cover dataset = ONNXRTBertDataset(data_dir=data_dir, model_name_or_path='bert-base-uncase', transform=preprocess, filter=filter) """ - def __init__(self, data_dir, model_name_or_path, max_seq_length=128,\ - do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\ - evaluate=True, transform=None, filter=None): + + def __init__( + self, + data_dir, + model_name_or_path, + max_seq_length=128, + do_lower_case=True, + task="mrpc", + model_type="bert", + dynamic_length=False, + evaluate=True, + transform=None, + filter=None, + ): """Initialize the attributes of class.""" task = task.lower() model_type = model_type.lower() - assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli'], 'Unsupported task type' - assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \ - model type' + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli"], "Unsupported task type" + assert model_type in [ + "distilbert", + "bert", + "mobilebert", + "roberta", + ], "Unsupported \ + model type" self.dynamic_length = dynamic_length self.model_type = model_type self.max_seq_length = max_seq_length - tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, - do_lower_case=do_lower_case) - self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ - max_seq_length, task, model_type, tokenizer, evaluate) + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples( + data_dir, model_name_or_path, max_seq_length, task, model_type, tokenizer, evaluate + ) def __len__(self): """Length of the dataset.""" @@ -172,8 +191,9 @@ def __getitem__(self, index): return self.dataset[index] -def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ - model_type, tokenizer, evaluate): # pragma: no cover +def load_and_cache_examples( + data_dir, model_name_or_path, max_seq_length, task, model_type, tokenizer, evaluate +): # pragma: no cover """Load and cache the examples. Helper Function for ONNXRTBertDataset. @@ -185,28 +205,32 @@ def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, # Load data features from cache or dataset file if not os.path.exists("./dataset_cached"): os.makedirs("./dataset_cached") - cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format( - 'dev' if evaluate else 'train', - list(filter(None, model_name_or_path.split('/'))).pop(), - str(max_seq_length), - str(task))) + cached_features_file = os.path.join( + "./dataset_cached", + "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task), + ), + ) if os.path.exists(cached_features_file): logger.info("Load features from cached file {}.".format(cached_features_file)) features = torch.load(cached_features_file) else: logger.info("Create features from dataset file at {}.".format(data_dir)) label_list = processor.get_labels() - if task in ['mnli', 'mnli-mm'] and model_type in ['roberta']: + if task in ["mnli", "mnli-mm"] and model_type in ["roberta"]: # HACK(label indices are swapped in RoBERTa pretrained model) label_list[1], label_list[2] = label_list[2], label_list[1] - examples = processor.get_dev_examples(data_dir) if evaluate else \ - processor.get_train_examples(data_dir) - features = convert_examples_to_features(examples, - tokenizer, - task=task, - label_list=label_list, - max_length=max_seq_length, - output_mode=output_mode, + examples = processor.get_dev_examples(data_dir) if evaluate else processor.get_train_examples(data_dir) + features = convert_examples_to_features( + examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, ) logger.info("Save features into cached file {}.".format(cached_features_file)) torch.save(features, cached_features_file) @@ -219,8 +243,7 @@ def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, all_labels = torch.tensor([f.label for f in features], dtype=torch.long) elif output_mode == "regression": all_labels = torch.tensor([f.label for f in features], dtype=torch.float) - dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ - all_seq_lengths, all_labels) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_seq_lengths, all_labels) return dataset @@ -234,7 +257,7 @@ def convert_examples_to_features( pad_token=0, pad_token_segment_id=0, mask_padding_with_zero=True, - ): # pragma: no cover +): # pragma: no cover """Convert examples to features. Helper function for load_and_cache_examples. @@ -245,7 +268,7 @@ def convert_examples_to_features( logger.info("Use label list {} for task {}.".format(label_list, task)) label_map = {label: i for i, label in enumerate(label_list)} features = [] - for (ex_index, example) in enumerate(examples): + for ex_index, example in enumerate(examples): inputs = tokenizer.encode_plus( example.text_a, example.text_b, @@ -264,19 +287,14 @@ def convert_examples_to_features( padding_length = max_length - len(input_ids) input_ids = input_ids + ([pad_token] * padding_length) - attention_mask = attention_mask + \ - ([0 if mask_padding_with_zero else 1] * padding_length) + attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length) token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) - assert len(input_ids) == max_length, \ - "Error with input_ids length {} vs {}".format( - len(input_ids), max_length) - assert len(attention_mask) == max_length, \ - "Error with attention_mask length {} vs {}".format( + assert len(input_ids) == max_length, "Error with input_ids length {} vs {}".format(len(input_ids), max_length) + assert len(attention_mask) == max_length, "Error with attention_mask length {} vs {}".format( len(attention_mask), max_length ) - assert len(token_type_ids) == max_length, \ - "Error with token_type_ids length {} vs {}".format( + assert len(token_type_ids) == max_length, "Error with token_type_ids length {} vs {}".format( len(token_type_ids), max_length ) if output_mode == "classification": @@ -298,7 +316,7 @@ def convert_examples_to_features( @dataclass(frozen=True) -class InputFeatures: # pragma: no cover +class InputFeatures: # pragma: no cover """Single set of features of data. Property names are the same names as the corresponding inputs to a model. @@ -326,8 +344,8 @@ def to_json_string(self): return json.dumps(dataclasses.asdict(self)) + "\n" -@dataset_registry(dataset_type="bert", framework="tensorflow, tensorflow_itex", dataset_format='') -class TensorflowBertDataset(Dataset): # pragma: no cover +@dataset_registry(dataset_type="bert", framework="tensorflow, tensorflow_itex", dataset_format="") +class TensorflowBertDataset(Dataset): # pragma: no cover """Tensorflow dataset used for model Bert. This dataset supports tfrecord data, please refer to Guide to create tfrecord file first. @@ -341,14 +359,14 @@ class TensorflowBertDataset(Dataset): # pragma: no cover to specific conditions """ - def __init__(self, root, label_file, task='squad', - model_type='bert', transform=None, filter=None): + def __init__(self, root, label_file, task="squad", model_type="bert", transform=None, filter=None): """Initialize the attributes of class.""" import json + with open(label_file) as lf: label_json = json.load(lf) - assert label_json['version'] == '1.1', 'only support squad 1.1' - self.label = label_json['data'] + assert label_json["version"] == "1.1", "only support squad 1.1" + self.label = label_json["data"] self.root = root self.transform = transform self.filter = filter @@ -365,7 +383,7 @@ def __len__(self): return 1 -class ParseDecodeBert(): # pragma: no cover +class ParseDecodeBert: # pragma: no cover """Helper function for TensorflowModelZooBertDataset. Parse the features from sample. @@ -378,26 +396,25 @@ def __call__(self, sample): sample: Data to be parsed. """ import tensorflow as tf + # Dense features in Example proto. feature_map = { - 'input_ids': - tf.compat.v1.VarLenFeature(dtype=tf.int64), - 'input_mask': - tf.compat.v1.VarLenFeature(dtype=tf.int64), - 'segment_ids': - tf.compat.v1.VarLenFeature(dtype=tf.int64), + "input_ids": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "input_mask": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "segment_ids": tf.compat.v1.VarLenFeature(dtype=tf.int64), } features = tf.io.parse_single_example(sample, feature_map) - input_ids = features['input_ids'].values - input_mask = features['input_mask'].values - segment_ids = features['segment_ids'].values + input_ids = features["input_ids"].values + input_mask = features["input_mask"].values + segment_ids = features["segment_ids"].values return (input_ids, input_mask, segment_ids) -@dataset_registry(dataset_type="mzbert", framework="tensorflow, tensorflow_itex", dataset_format='') -class TensorflowModelZooBertDataset(Dataset): # pragma: no cover + +@dataset_registry(dataset_type="mzbert", framework="tensorflow, tensorflow_itex", dataset_format="") +class TensorflowModelZooBertDataset(Dataset): # pragma: no cover """Tensorflow dataset for three-input Bert in tf record format. Root is a full path to tfrecord file, which contains the file name. @@ -410,37 +427,43 @@ class TensorflowModelZooBertDataset(Dataset): # pragma: no cover filter (Filter objects, default=None): filter out examples according. """ - def __init__(self, root, label_file, task='squad', - model_type='bert', transform=None, filter=None, num_cores=28): + def __init__(self, root, label_file, task="squad", model_type="bert", transform=None, filter=None, num_cores=28): """Initialize the attributes of class.""" import json + with open(label_file) as lf: label_json = json.load(lf) - assert label_json['version'] == '1.1', 'only support squad 1.1' - self.label = label_json['data'] + assert label_json["version"] == "1.1", "only support squad 1.1" + self.label = label_json["data"] import tensorflow as tf + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) example = tf.train.SequenceExample() for element in record_iterator: example.ParseFromString(element) break feature = example.context.feature - if len(feature['input_ids'].int64_list.value) == 0 \ - and len(feature['input_mask'].int64_list.value) == 0: - raise ValueError("Tfrecord format is incorrect, please refer\ + if len(feature["input_ids"].int64_list.value) == 0 and len(feature["input_mask"].int64_list.value) == 0: + raise ValueError( + "Tfrecord format is incorrect, please refer\ 'https://github.com/tensorflow/models/blob/master/research/\ - object_detection/dataset_tools/' to create correct tfrecord") + object_detection/dataset_tools/' to create correct tfrecord" + ) # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave + tfrecord_paths = [root] ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) ds = ds.apply( - parallel_interleave(tf.data.TFRecordDataset, - cycle_length=num_cores, - block_length=5, - sloppy=True, - buffer_output_elements=10000, - prefetch_input_elements=10000)) + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) if transform is not None: transform.transform_list.insert(0, ParseDecodeBert()) else: @@ -450,6 +473,7 @@ def __init__(self, root, label_file, task='squad', ds = ds.filter(filter) ds = ds.prefetch(buffer_size=1000) from ..dataloaders.tensorflow_dataloader import TFDataDataLoader + ds = TFDataDataLoader(ds) self.root = [] for inputs in ds: diff --git a/neural_compressor/data/datasets/coco_dataset.py b/neural_compressor/data/datasets/coco_dataset.py index b4988973e7f..b90e881ce2d 100644 --- a/neural_compressor/data/datasets/coco_dataset.py +++ b/neural_compressor/data/datasets/coco_dataset.py @@ -29,18 +29,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """Built-in COCO datasets class for multiple framework backends.""" from PIL import Image + from neural_compressor.utils.utility import LazyImport -from .dataset import dataset_registry, IterableDataset, Dataset -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -torch = LazyImport('torch') +from .dataset import Dataset, IterableDataset, dataset_registry + +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +torch = LazyImport("torch") + -class ParseDecodeCoco(): # pragma: no cover +class ParseDecodeCoco: # pragma: no cover """Helper function for TensorflowModelZooBertDataset. Parse the features from sample. @@ -54,30 +56,31 @@ def __call__(self, sample): """ # Dense features in Example proto. feature_map = { - 'image/encoded': - tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=''), - 'image/object/class/text': - tf.compat.v1.VarLenFeature(dtype=tf.string), - 'image/object/class/label': - tf.compat.v1.VarLenFeature(dtype=tf.int64), - 'image/source_id':tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=''), + "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string), + "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), } sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. - feature_map.update({ - k: sparse_float32 - for k in [ - 'image/object/bbox/xmin', 'image/object/bbox/ymin', - 'image/object/bbox/xmax', 'image/object/bbox/ymax' - ] - }) + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) features = tf.io.parse_single_example(sample, feature_map) - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0) + ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0) + xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0) + ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0) bbox = tf.concat([ymin, xmin, ymax, xmax], 0) # Force the variable number of bounding boxes into the shape @@ -85,18 +88,19 @@ def __call__(self, sample): bbox = tf.expand_dims(bbox, 0) bbox = tf.transpose(bbox, [0, 2, 1]) - encoded_image = features['image/encoded'] + encoded_image = features["image/encoded"] image_tensor = tf.image.decode_image(encoded_image, channels=3) image_tensor.set_shape([None, None, 3]) - str_label = features['image/object/class/text'].values - int_label = features['image/object/class/label'].values - image_id = features['image/source_id'] + str_label = features["image/object/class/text"].values + int_label = features["image/object/class/label"].values + image_id = features["image/source_id"] return image_tensor, (bbox[0], str_label, int_label, image_id) -@dataset_registry(dataset_type="COCORecord", framework="tensorflow, tensorflow_itex", dataset_format='') -class COCORecordDataset(IterableDataset): # pragma: no cover + +@dataset_registry(dataset_type="COCORecord", framework="tensorflow, tensorflow_itex", dataset_format="") +class COCORecordDataset(IterableDataset): # pragma: no cover """Tensorflow COCO dataset in tf record format. Root is a full path to tfrecord file, which contains the file name. @@ -105,7 +109,7 @@ class COCORecordDataset(IterableDataset): # pragma: no cover Args: root (str): Root directory of dataset. num_cores (int, default=28):The number of input Datasets to interleave from in parallel. transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according + filter (Filter objects, default=None): filter out examples according to specific conditions. """ @@ -117,23 +121,31 @@ def __new__(cls, root, num_cores=28, transform=None, filter=filter): example.ParseFromString(element) break feature = example.context.feature - if len(feature['image/object/class/text'].bytes_list.value) == 0 \ - and len(feature['image/object/class/label'].int64_list.value) == 0: - raise ValueError("Tfrecord format is incorrect, please refer\ + if ( + len(feature["image/object/class/text"].bytes_list.value) == 0 + and len(feature["image/object/class/label"].int64_list.value) == 0 + ): + raise ValueError( + "Tfrecord format is incorrect, please refer\ 'https://github.com/tensorflow/models/blob/master/research/\ object_detection/dataset_tools/create_coco_tf_record.py' to\ - create correct tfrecord") + create correct tfrecord" + ) # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave + tfrecord_paths = [root] ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) ds = ds.apply( - parallel_interleave(tf.data.TFRecordDataset, - cycle_length=num_cores, - block_length=5, - sloppy=True, - buffer_output_elements=10000, - prefetch_input_elements=10000)) + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) if transform is not None: transform.transform_list.insert(0, ParseDecodeCoco()) else: @@ -144,10 +156,15 @@ def __new__(cls, root, num_cores=28, transform=None, filter=filter): ds = ds.prefetch(buffer_size=1000) return ds -@dataset_registry(dataset_type="COCORaw", framework="onnxrt_qlinearops, \ + +@dataset_registry( + dataset_type="COCORaw", + framework="onnxrt_qlinearops, \ onnxrt_integerops, pytorch, mxnet, tensorflow, \ - tensorflow_itex", dataset_format='') -class COCORaw(Dataset): # pragma: no cover + tensorflow_itex", + dataset_format="", +) +class COCORaw(Dataset): # pragma: no cover """Coco raw dataset. Please arrange data in this way: @@ -162,17 +179,20 @@ class COCORaw(Dataset): # pragma: no cover img_dir (str, default='val2017'): image file directory. anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according + filter (Filter objects, default=None): filter out examples according to specific conditions. """ - def __init__(self, root, img_dir='val2017', \ - anno_dir='annotations/instances_val2017.json', transform=None, filter=filter): + def __init__( + self, root, img_dir="val2017", anno_dir="annotations/instances_val2017.json", transform=None, filter=filter + ): """Initialize the attributes of class.""" import json import os + import numpy as np from pycocotools.coco import COCO + self.image_list = [] self.transform = transform img_path = os.path.join(root, img_dir) @@ -186,21 +206,25 @@ def __init__(self, root, img_dir='val2017', \ labels = [] ids = [] img_detail = coco.loadImgs(img_id)[0] - ids.append(img_detail['file_name'].encode('utf-8')) - pic_height = img_detail['height'] - pic_width = img_detail['width'] + ids.append(img_detail["file_name"].encode("utf-8")) + pic_height = img_detail["height"] + pic_width = img_detail["width"] - ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_ids) anns = coco.loadAnns(ann_ids) for ann in anns: - bbox = ann['bbox'] + bbox = ann["bbox"] if len(bbox) == 0: continue - bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\ - bbox[2]/float(pic_width), bbox[3]/float(pic_height)] - bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]]) - labels.append(coco.cats[ann['category_id']]['name'].encode('utf8')) - img_file = os.path.join(img_path, img_detail['file_name']) + bbox = [ + bbox[0] / float(pic_width), + bbox[1] / float(pic_height), + bbox[2] / float(pic_width), + bbox[3] / float(pic_height), + ] + bboxes.append([bbox[1], bbox[0], bbox[1] + bbox[3], bbox[0] + bbox[2]]) + labels.append(coco.cats[ann["category_id"]]["name"].encode("utf8")) + img_file = os.path.join(img_path, img_detail["file_name"]) if not os.path.exists(img_file) or len(bboxes) == 0: continue @@ -208,10 +232,18 @@ def __init__(self, root, img_dir='val2017', \ continue with Image.open(img_file) as image: - image = np.array(image.convert('RGB')) + image = np.array(image.convert("RGB")) self.image_list.append( - (image, [np.array(bboxes), np.array(labels), np.array([]),\ - np.array(img_detail['file_name'].encode('utf-8'))])) + ( + image, + [ + np.array(bboxes), + np.array(labels), + np.array([]), + np.array(img_detail["file_name"].encode("utf-8")), + ], + ) + ) def __len__(self): """Length of the dataset.""" @@ -224,13 +256,18 @@ def __getitem__(self, index): """ sample = self.image_list[index] if self.transform is not None: - sample= self.transform(sample) + sample = self.transform(sample) return sample -@dataset_registry(dataset_type="COCONpy", framework="onnxrt_qlinearops, \ + +@dataset_registry( + dataset_type="COCONpy", + framework="onnxrt_qlinearops, \ onnxrt_integerops, pytorch, mxnet, tensorflow, \ - tensorflow_itex", dataset_format='') -class COCONpy(Dataset): # pragma: no cover + tensorflow_itex", + dataset_format="", +) +class COCONpy(Dataset): # pragma: no cover """COCO npy dataset. Please arrange data in this way: @@ -244,17 +281,20 @@ class COCONpy(Dataset): # pragma: no cover npy_dir (str, default='val2017'): npy file directory. anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according + filter (Filter objects, default=None): filter out examples according to specific conditions. """ - def __init__(self, root, npy_dir='val2017', \ - anno_dir='annotations/instances_val2017.json', transform=None, filter=None): + def __init__( + self, root, npy_dir="val2017", anno_dir="annotations/instances_val2017.json", transform=None, filter=None + ): """Initialize the attributes of class.""" import json import os + import numpy as np from pycocotools.coco import COCO + self.image_list = [] npy_path = os.path.join(root, npy_dir) anno_path = os.path.join(root, anno_dir) @@ -266,26 +306,25 @@ def __init__(self, root, npy_dir='val2017', \ labels = [] ids = [] img_detail = coco.loadImgs(img_id)[0] - ids.append(img_detail['file_name'].encode('utf-8')) - pic_height = img_detail['height'] - pic_width = img_detail['width'] + ids.append(img_detail["file_name"].encode("utf-8")) + pic_height = img_detail["height"] + pic_width = img_detail["width"] - ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_ids) anns = coco.loadAnns(ann_ids) for ann in anns: - bbox = ann['bbox'] - category_id = ann['category_id'] + bbox = ann["bbox"] + category_id = ann["category_id"] if len(bbox) == 0: continue labels.append((np.array(category_id), np.array(bbox))) - npy_file = os.path.join(npy_path, img_detail['file_name']) + npy_file = os.path.join(npy_path, img_detail["file_name"]) npy_file = npy_file + ".npy" if not os.path.exists(npy_file): continue image = np.load(npy_file) - self.image_list.append( - (image, labels)) + self.image_list.append((image, labels)) def __len__(self): """Length of the dataset.""" diff --git a/neural_compressor/data/datasets/dataset.py b/neural_compressor/data/datasets/dataset.py index 6116c91c631..1d57ca162eb 100644 --- a/neural_compressor/data/datasets/dataset.py +++ b/neural_compressor/data/datasets/dataset.py @@ -14,28 +14,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """This is the base class for each framework.""" -from abc import abstractmethod import os -from neural_compressor.utils.utility import LazyImport, singleton +from abc import abstractmethod + from PIL import Image -torch = LazyImport('torch') -torchvision = LazyImport('torchvision') -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -np = LazyImport('numpy') -hashlib = LazyImport('hashlib') -gzip = LazyImport('gzip') -tarfile = LazyImport('tarfile') -zipfile = LazyImport('zipfile') -pickle = LazyImport('pickle') -glob = LazyImport('glob') + +from neural_compressor.utils.utility import LazyImport, singleton + +torch = LazyImport("torch") +torchvision = LazyImport("torchvision") +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +np = LazyImport("numpy") +hashlib = LazyImport("hashlib") +gzip = LazyImport("gzip") +tarfile = LazyImport("tarfile") +zipfile = LazyImport("zipfile") +pickle = LazyImport("pickle") +glob = LazyImport("glob") @singleton -class TensorflowDatasets(object): # pragma: no cover +class TensorflowDatasets(object): # pragma: no cover """The base class of Tensorflow datasets class.""" def __init__(self): @@ -45,20 +47,19 @@ def __init__(self): @singleton -class PyTorchDatasets(object): # pragma: no cover +class PyTorchDatasets(object): # pragma: no cover """The base class of PyTorch datasets class.""" def __init__(self): """Initialize the attributes of class.""" self.datasets = { - 'ImageFolder': PytorchMxnetWrapDataset( - torchvision.datasets.ImageFolder), + "ImageFolder": PytorchMxnetWrapDataset(torchvision.datasets.ImageFolder), } self.datasets.update(PYTORCH_DATASETS) @singleton -class MXNetDatasets(object): # pragma: no cover +class MXNetDatasets(object): # pragma: no cover """The base class of MXNet datasets class.""" def __init__(self): @@ -68,7 +69,7 @@ def __init__(self): @singleton -class ONNXRTQLDatasets(object): # pragma: no cover +class ONNXRTQLDatasets(object): # pragma: no cover """The base class of ONNXRT QLinear datasets class.""" def __init__(self): @@ -78,7 +79,7 @@ def __init__(self): @singleton -class ONNXRTITDatasets(object): # pragma: no cover +class ONNXRTITDatasets(object): # pragma: no cover """The base class of ONNXRT IT datasets class.""" def __init__(self): @@ -87,7 +88,7 @@ def __init__(self): self.datasets.update(ONNXRTIT_DATASETS) -class PytorchMxnetWrapDataset(): # pragma: no cover +class PytorchMxnetWrapDataset: # pragma: no cover """The base class for PyTorch and MXNet frameworks. Args: @@ -100,11 +101,10 @@ def __init__(self, datafunc): def __call__(self, transform=None, filter=None, *args, **kwargs): """Wrap the dataset for PyTorch and MXNet framework.""" - return PytorchMxnetWrapFunction(self.datafunc, transform=transform, \ - filter=filter, *args, **kwargs) + return PytorchMxnetWrapFunction(self.datafunc, transform=transform, filter=filter, *args, **kwargs) -class PytorchMxnetWrapFunction(): # pragma: no cover +class PytorchMxnetWrapFunction: # pragma: no cover """The Helper class for PytorchMxnetWrapDataset. Args: @@ -135,30 +135,30 @@ def __getitem__(self, index): return sample -framework_datasets = {"tensorflow": TensorflowDatasets, - "tensorflow_itex": TensorflowDatasets, - "mxnet": MXNetDatasets, - "pytorch": PyTorchDatasets, - "pytorch_ipex": PyTorchDatasets, - "pytorch_fx": PyTorchDatasets, - "onnxrt_qdq": ONNXRTQLDatasets, - "onnxrt_qlinearops": ONNXRTQLDatasets, - "onnxruntime": ONNXRTQLDatasets, - "onnxrt_integerops": ONNXRTITDatasets, - } - +framework_datasets = { + "tensorflow": TensorflowDatasets, + "tensorflow_itex": TensorflowDatasets, + "mxnet": MXNetDatasets, + "pytorch": PyTorchDatasets, + "pytorch_ipex": PyTorchDatasets, + "pytorch_fx": PyTorchDatasets, + "onnxrt_qdq": ONNXRTQLDatasets, + "onnxrt_qlinearops": ONNXRTQLDatasets, + "onnxruntime": ONNXRTQLDatasets, + "onnxrt_integerops": ONNXRTITDatasets, +} """The datasets supported by neural_compressor, it's model specific and can be configured by yaml file. - User could add new datasets by implementing new Dataset subclass under this directory. - The naming convention of new dataset subclass should be something like ImageClassifier, user - could choose this dataset by setting "imageclassifier" string in tuning.strategy field of yaml. +User could add new datasets by implementing new Dataset subclass under this directory. +The naming convention of new dataset subclass should be something like ImageClassifier, user +could choose this dataset by setting "imageclassifier" string in tuning.strategy field of yaml. - Datasets variable is used to store all implemented Dataset subclasses to support - model specific dataset. +Datasets variable is used to store all implemented Dataset subclasses to support +model specific dataset. """ -class Datasets(object): # pragma: no cover +class Datasets(object): # pragma: no cover """A base class for all framework datasets. Args: @@ -169,10 +169,19 @@ class Datasets(object): # pragma: no cover def __init__(self, framework): """Initialize the attributes of class.""" - assert framework in ["tensorflow", "tensorflow_itex", "keras",\ - "mxnet", "onnxrt_qdq", "onnxrt_qlinearops", "onnxrt_integerops", \ - "pytorch", "pytorch_ipex", "pytorch_fx", "onnxruntime"], \ - "framework support tensorflow pytorch mxnet onnxrt" + assert framework in [ + "tensorflow", + "tensorflow_itex", + "keras", + "mxnet", + "onnxrt_qdq", + "onnxrt_qlinearops", + "onnxrt_integerops", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxruntime", + ], "framework support tensorflow pytorch mxnet onnxrt" self.datasets = framework_datasets[framework]().datasets def __getitem__(self, dataset_type): @@ -180,8 +189,7 @@ def __getitem__(self, dataset_type): x[i] is roughly equivalent to type(x).__getitem__(x, index) """ - assert dataset_type in self.datasets.keys(), "dataset type only support {}".\ - format(self.datasets.keys()) + assert dataset_type in self.datasets.keys(), "dataset type only support {}".format(self.datasets.keys()) return self.datasets[dataset_type] @@ -195,20 +203,21 @@ def __getitem__(self, dataset_type): ONNXRTQL_DATASETS = {} ONNXRTIT_DATASETS = {} -registry_datasets = {"tensorflow": TENSORFLOW_DATASETS, - "tensorflow_itex": TENSORFLOWITEX_DATASETS, - "mxnet": MXNET_DATASETS, - "pytorch": PYTORCH_DATASETS, - "pytorch_ipex": PYTORCHIPEX_DATASETS, - "pytorch_fx": PYTORCHFX_DATASETS, - "onnxrt_integerops": ONNXRTIT_DATASETS, - "onnxrt_qdq": ONNXRTQL_DATASETS, - "onnxruntime": ONNXRTQL_DATASETS, - "onnxrt_qlinearops": ONNXRTQL_DATASETS, - } - - -def dataset_registry(dataset_type, framework, dataset_format=''): # pragma: no cover +registry_datasets = { + "tensorflow": TENSORFLOW_DATASETS, + "tensorflow_itex": TENSORFLOWITEX_DATASETS, + "mxnet": MXNET_DATASETS, + "pytorch": PYTORCH_DATASETS, + "pytorch_ipex": PYTORCHIPEX_DATASETS, + "pytorch_fx": PYTORCHFX_DATASETS, + "onnxrt_integerops": ONNXRTIT_DATASETS, + "onnxrt_qdq": ONNXRTQL_DATASETS, + "onnxruntime": ONNXRTQL_DATASETS, + "onnxrt_qlinearops": ONNXRTQL_DATASETS, +} + + +def dataset_registry(dataset_type, framework, dataset_format=""): # pragma: no cover """Register dataset subclasses. Args: @@ -220,8 +229,9 @@ def dataset_registry(dataset_type, framework, dataset_format=''): # pragma: n Returns: cls: The class of register. """ + def decorator_dataset(cls): - for single_framework in [fwk.strip() for fwk in framework.split(',')]: + for single_framework in [fwk.strip() for fwk in framework.split(",")]: assert single_framework in [ "tensorflow", "tensorflow_itex", @@ -236,13 +246,14 @@ def decorator_dataset(cls): ], "The framework support tensorflow mxnet pytorch onnxrt" dataset_name = dataset_type + dataset_format if dataset_name in registry_datasets[single_framework].keys(): - raise ValueError('Cannot have two datasets with the same name') + raise ValueError("Cannot have two datasets with the same name") registry_datasets[single_framework][dataset_name] = cls return cls + return decorator_dataset -class Dataset(object): # pragma: no cover +class Dataset(object): # pragma: no cover """The base class of dataset. Subclass datasets should overwrite two methods: @@ -263,7 +274,7 @@ def __getitem__(self, index): # raise NotImplementedError -class IterableDataset(object): # pragma: no cover +class IterableDataset(object): # pragma: no cover """An iterable Dataset. Subclass iterable dataset should also implement a method: @@ -289,6 +300,7 @@ def download_url(url, root, filename=None, md5=None): # pragma: no cover md5 (str): the md5 string. """ import urllib + root = os.path.expanduser(root) if not filename: filename = os.path.basename(url) @@ -297,32 +309,26 @@ def download_url(url, root, filename=None, md5=None): # pragma: no cover os.makedirs(root, exist_ok=True) if check_integrity(fpath, md5): - print('Using downloaded and verified file: ' + fpath) + print("Using downloaded and verified file: " + fpath) else: try: - print('Downloading ' + url + ' to ' + fpath) - urllib.request.urlretrieve( - url, fpath, - reporthook=gen_bar_updater() - ) + print("Downloading " + url + " to " + fpath) + urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater()) except (urllib.error.URLError, IOError) as e: - if url[:5] == 'https': - url = url.replace('https:', 'http:') - print('Failed download. Trying https -> http instead.' - ' Downloading ' + url + ' to ' + fpath) - urllib.request.urlretrieve( - url, fpath, - reporthook=gen_bar_updater() - ) + if url[:5] == "https": + url = url.replace("https:", "http:") + print("Failed download. Trying https -> http instead." " Downloading " + url + " to " + fpath) + urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater()) else: raise e if not check_integrity(fpath, md5): raise RuntimeError("File not found or corrupted.") -def gen_bar_updater(): # pragma: no cover +def gen_bar_updater(): # pragma: no cover """Generate progress bar.""" from tqdm import tqdm + pbar = tqdm(total=None) def bar_update(count, block_size, total_size): @@ -331,10 +337,11 @@ def bar_update(count, block_size, total_size): pbar.total = total_size progress_bytes = count * block_size pbar.update(progress_bytes - pbar.n) + return bar_update -def check_integrity(fpath, md5): # pragma: no cover +def check_integrity(fpath, md5): # pragma: no cover """Check MD5 checksum.""" if not os.path.isfile(fpath): return False @@ -343,17 +350,22 @@ def check_integrity(fpath, md5): # pragma: no cover return md5 == calculate_md5(fpath) -def calculate_md5(fpath, chunk_size=1024*1024): # pragma: no cover +def calculate_md5(fpath, chunk_size=1024 * 1024): # pragma: no cover """Generate MD5 checksum for a file.""" md5 = hashlib.md5() - with open(fpath, 'rb') as f: - for chunk in iter(lambda: f.read(chunk_size), b''): + with open(fpath, "rb") as f: + for chunk in iter(lambda: f.read(chunk_size), b""): md5.update(chunk) return md5.hexdigest() -@dataset_registry(dataset_type="CIFAR10", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') -class CIFAR10(Dataset): # pragma: no cover + +@dataset_registry( + dataset_type="CIFAR10", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) +class CIFAR10(Dataset): # pragma: no cover """The CIFAR10 and CIFAR100 database. For CIFAR10: If download is True, it will download dataset to root/ and extract it @@ -379,39 +391,33 @@ class CIFAR10(Dataset): # pragma: no cover url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" filename = "cifar-10-python.tar.gz" - tgz_md5 = 'c58f30108f718f92721af3b95e74349a' + tgz_md5 = "c58f30108f718f92721af3b95e74349a" train_list = [ - ['data_batch_1', 'c99cafc152244af753f735de768cd75f'], - ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'], - ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'], - ['data_batch_4', '634d18415352ddfa80567beed471001a'], - ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'], + ["data_batch_1", "c99cafc152244af753f735de768cd75f"], + ["data_batch_2", "d4bba439e000b95fd0a9bffe97cbabec"], + ["data_batch_3", "54ebc095f3ab1f0389bbae665268c751"], + ["data_batch_4", "634d18415352ddfa80567beed471001a"], + ["data_batch_5", "482c414d41f54cd18b22e5b47cb7c3cb"], ] test_list = [ - ['test_batch', '40351d587109b95175f43aff81a1287e'], + ["test_batch", "40351d587109b95175f43aff81a1287e"], ] meta = { - 'filename': 'batches.meta', - 'key': 'label_names', - 'md5': '5ff9c542aee3614f3951f8cda6e48888', + "filename": "batches.meta", + "key": "label_names", + "md5": "5ff9c542aee3614f3951f8cda6e48888", } - def __init__(self, - root, - train=False, - transform=None, - filter=None, - download=True): # pragma: no cover + def __init__(self, root, train=False, transform=None, filter=None, download=True): # pragma: no cover """Initialize the attributes of class.""" self.root = root if download: self.download() if not self._check_integrity(): - raise RuntimeError( - 'Dataset not found or corrupted. You can use download=True to download it') + raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it") if train: downloaded_list = self.train_list else: @@ -421,13 +427,13 @@ def __init__(self, self.targets = [] for file_name, checksum in downloaded_list: file_path = os.path.join(self.root, file_name) - with open(file_path, 'rb') as f: - entry = pickle.load(f, encoding='latin1') - self.data.append(entry['data']) - if 'labels' in entry: - self.targets.extend(entry['labels']) + with open(file_path, "rb") as f: + entry = pickle.load(f, encoding="latin1") + self.data.append(entry["data"]) + if "labels" in entry: + self.targets.extend(entry["labels"]) else: - self.targets.extend(entry['fine_labels']) + self.targets.extend(entry["fine_labels"]) self.data = np.vstack(self.data).reshape(-1, 3, 32, 32) self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC @@ -436,13 +442,14 @@ def __init__(self, def load_meta(self): # pragma: no cover """Load meta.""" - path = os.path.join(self.root, self.meta['filename']) - if not check_integrity(path, self.meta['md5']): - raise RuntimeError('Dataset metadata file not found or corrupted.' + - ' You can use download=True to download it') - with open(path, 'rb') as infile: - data = pickle.load(infile, encoding='latin1') - self.classes = data[self.meta['key']] + path = os.path.join(self.root, self.meta["filename"]) + if not check_integrity(path, self.meta["md5"]): + raise RuntimeError( + "Dataset metadata file not found or corrupted." + " You can use download=True to download it" + ) + with open(path, "rb") as infile: + data = pickle.load(infile, encoding="latin1") + self.classes = data[self.meta["key"]] self.class_to_idx = {_class: i for i, _class in enumerate(self.classes)} def __getitem__(self, index): # pragma: no cover @@ -462,20 +469,20 @@ def __len__(self): # pragma: no cover def download(self): # pragma: no cover """Download a file.""" if self._check_integrity(): - print('Files already downloaded and verified') + print("Files already downloaded and verified") return download_root = os.path.expanduser(self.root) filename = os.path.basename(self.url) download_url(self.url, download_root, filename, self.tgz_md5) archive = os.path.join(download_root, filename) print("Extracting {} to {}".format(archive, download_root)) - with tarfile.open(archive, 'r:gz') as tar: + with tarfile.open(archive, "r:gz") as tar: tar.extractall(path=download_root) def _check_integrity(self): # pragma: no cover """Check MD5 checksum.""" root = self.root - for fentry in (self.train_list + self.test_list): + for fentry in self.train_list + self.test_list: filename, md5 = fentry[0], fentry[1] fpath = os.path.join(root, filename) if not check_integrity(fpath, md5): @@ -483,8 +490,8 @@ def _check_integrity(self): # pragma: no cover return True -@dataset_registry(dataset_type="CIFAR10", framework="pytorch", dataset_format='') -class PytorchCIFAR10(CIFAR10): +@dataset_registry(dataset_type="CIFAR10", framework="pytorch", dataset_format="") +class PytorchCIFAR10(CIFAR10): """The PyTorch datasets for CIFAR10.""" def __getitem__(self, index): # pragma: no cover @@ -499,7 +506,7 @@ def __getitem__(self, index): # pragma: no cover return (image, label) -@dataset_registry(dataset_type="CIFAR10", framework="mxnet", dataset_format='') +@dataset_registry(dataset_type="CIFAR10", framework="mxnet", dataset_format="") class MXNetCIFAR10(CIFAR10): """The MXNet datasets for CIFAR10.""" @@ -515,7 +522,7 @@ def __getitem__(self, index): # pragma: no cover return (image, label) -@dataset_registry(dataset_type="CIFAR10", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="CIFAR10", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowCIFAR10(CIFAR10): """The Tensorflow datasets for CIFAR10.""" @@ -527,16 +534,20 @@ def __getitem__(self, index): # pragma: no cover image, label = self.data[index], self.targets[index] if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="CIFAR100", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') +@dataset_registry( + dataset_type="CIFAR100", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class CIFAR100(CIFAR10): """CIFAR100 database. @@ -559,21 +570,21 @@ class CIFAR100(CIFAR10): url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" filename = "cifar-100-python.tar.gz" - tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' + tgz_md5 = "eb9058c3a382ffc7106e4002c42a8d85" train_list = [ - ['train', '16019d7e3df5f24257cddd939b257f8d'], + ["train", "16019d7e3df5f24257cddd939b257f8d"], ] test_list = [ - ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], + ["test", "f0ef6b0ae62326f3e7ffdfab6717acfc"], ] meta = { - 'filename': 'meta', - 'key': 'fine_label_names', - 'md5': '7973b15100ade9c7d40fb424638fde48', + "filename": "meta", + "key": "fine_label_names", + "md5": "7973b15100ade9c7d40fb424638fde48", } -@dataset_registry(dataset_type="CIFAR100", framework="pytorch", dataset_format='') +@dataset_registry(dataset_type="CIFAR100", framework="pytorch", dataset_format="") class PytorchCIFAR100(CIFAR100): """The PyTorch datasets for CIFAR100.""" @@ -590,7 +601,7 @@ def __getitem__(self, index): # pragma: no cover return (image, label) -@dataset_registry(dataset_type="CIFAR100", framework="mxnet", dataset_format='') +@dataset_registry(dataset_type="CIFAR100", framework="mxnet", dataset_format="") class MXNetCIFAR100(CIFAR100): """The MXNet datasets for CIFAR100.""" @@ -606,11 +617,11 @@ def __getitem__(self, index): # pragma: no cover return (image, label) -@dataset_registry(dataset_type="CIFAR100", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="CIFAR100", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowCIFAR100(CIFAR100): """The Tensorflow datasets for CIFAR100.""" - def __getitem__(self, index): # pragma: no cover + def __getitem__(self, index): # pragma: no cover """Magic method. x[i] is roughly equivalent to type(x).__getitem__(x, index) @@ -618,16 +629,21 @@ def __getitem__(self, index): # pragma: no cover image, label = self.data[index], self.targets[index] if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="MNIST", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') -class MNIST(Dataset): # pragma: no cover + +@dataset_registry( + dataset_type="MNIST", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) +class MNIST(Dataset): # pragma: no cover """Modified National Institute of Standards and Technology database and FashionMNIST database. For MNIST: If download is True, it will download dataset to root/MNIST/, otherwise user @@ -649,11 +665,20 @@ class MNIST(Dataset): # pragma: no cover downloaded, it is not downloaded again. """ - classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', - '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine'] + classes = [ + "0 - zero", + "1 - one", + "2 - two", + "3 - three", + "4 - four", + "5 - five", + "6 - six", + "7 - seven", + "8 - eight", + "9 - nine", + ] resource = [ - ('https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz', - '8a61469f7ea1b51cbae51d4f78837e45') + ("https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz", "8a61469f7ea1b51cbae51d4f78837e45") ] def __init__(self, root, train=False, transform=None, filter=None, download=True): @@ -671,13 +696,12 @@ def read_data(self): for file_name, checksum in self.resource: file_path = os.path.join(self.root, os.path.basename(file_name)) if not os.path.exists(file_path): - raise RuntimeError( - 'Dataset not found. You can use download=True to download it') + raise RuntimeError("Dataset not found. You can use download=True to download it") with np.load(file_path, allow_pickle=True) as f: if self.train: - self.data, self.targets = f['x_train'], f['y_train'] + self.data, self.targets = f["x_train"], f["y_train"] else: - self.data, self.targets = f['x_test'], f['y_test'] + self.data, self.targets = f["x_test"], f["y_test"] def __len__(self): """Length of the dataset.""" @@ -706,12 +730,11 @@ def download(self): if os.path.exists(os.path.join(self.root, filename)): continue else: - download_url(url, root=self.root, - filename=filename, md5=md5) + download_url(url, root=self.root, filename=filename, md5=md5) -@dataset_registry(dataset_type="MNIST", framework="pytorch", dataset_format='') -class PytorchMNIST(MNIST): # pragma: no cover +@dataset_registry(dataset_type="MNIST", framework="pytorch", dataset_format="") +class PytorchMNIST(MNIST): # pragma: no cover """The PyTorch datasets for MNIST.""" def __getitem__(self, index): @@ -720,15 +743,15 @@ def __getitem__(self, index): x[i] is roughly equivalent to type(x).__getitem__(x, index) """ image, label = self.data[index], int(self.targets[index]) - image = Image.fromarray(image, mode='L') + image = Image.fromarray(image, mode="L") if self.transform is not None: image, label = self.transform((image, label)) image = np.array(image) return (image, label) -@dataset_registry(dataset_type="MNIST", framework="mxnet", dataset_format='') -class MXNetMNIST(MNIST): # pragma: no cover +@dataset_registry(dataset_type="MNIST", framework="mxnet", dataset_format="") +class MXNetMNIST(MNIST): # pragma: no cover """The MXNet datasets for MNIST.""" def __getitem__(self, index): @@ -744,8 +767,8 @@ def __getitem__(self, index): return (image, label) -@dataset_registry(dataset_type="MNIST", framework="tensorflow, tensorflow_itex", dataset_format='') -class TensorflowMNIST(MNIST): # pragma: no cover +@dataset_registry(dataset_type="MNIST", framework="tensorflow, tensorflow_itex", dataset_format="") +class TensorflowMNIST(MNIST): # pragma: no cover """The Tensorflow datasets for MNIST.""" def __getitem__(self, index): @@ -757,17 +780,21 @@ def __getitem__(self, index): image = np.expand_dims(image, -1) if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="FashionMNIST", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') -class FashionMNIST(MNIST): # pragma: no cover +@dataset_registry( + dataset_type="FashionMNIST", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) +class FashionMNIST(MNIST): # pragma: no cover """FashionMNIST database. For FashionMNIST: If download is True, it will download dataset to root/FashionMNIST/, @@ -788,36 +815,38 @@ class FashionMNIST(MNIST): # pragma: no cover """ resource = [ - ('https://storage.googleapis.com/tensorflow/tf-keras-datasets/' + file_name, None) + ("https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + file_name, None) for file_name in [ - 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', - 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' - ] + "train-labels-idx1-ubyte.gz", + "train-images-idx3-ubyte.gz", + "t10k-labels-idx1-ubyte.gz", + "t10k-images-idx3-ubyte.gz", + ] ] - classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', - 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] + classes = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"] def read_data(self): """Read data from a file.""" import struct + if self.train: - label_path = os.path.join(self.root, 'train-labels-idx1-ubyte.gz') - image_path = os.path.join(self.root, 'train-images-idx3-ubyte.gz') + label_path = os.path.join(self.root, "train-labels-idx1-ubyte.gz") + image_path = os.path.join(self.root, "train-images-idx3-ubyte.gz") else: - label_path = os.path.join(self.root, 't10k-labels-idx1-ubyte.gz') - image_path = os.path.join(self.root, 't10k-images-idx3-ubyte.gz') - with gzip.open(label_path, 'rb') as f: + label_path = os.path.join(self.root, "t10k-labels-idx1-ubyte.gz") + image_path = os.path.join(self.root, "t10k-images-idx3-ubyte.gz") + with gzip.open(label_path, "rb") as f: struct.unpack(">II", f.read(8)) self.targets = np.frombuffer(f.read(), dtype=np.uint8).astype(np.int32) - with gzip.open(image_path, 'rb') as f: + with gzip.open(image_path, "rb") as f: struct.unpack(">IIII", f.read(16)) data = np.frombuffer(f.read(), dtype=np.uint8) self.data = data.reshape(len(self.targets), 28, 28) -@dataset_registry(dataset_type="FashionMNIST", framework="pytorch", dataset_format='') -class PytorchFashionMNIST(FashionMNIST): # pragma: no cover +@dataset_registry(dataset_type="FashionMNIST", framework="pytorch", dataset_format="") +class PytorchFashionMNIST(FashionMNIST): # pragma: no cover """The PyTorch datasets for FashionMNIST.""" def __getitem__(self, index): @@ -826,15 +855,15 @@ def __getitem__(self, index): x[i] is roughly equivalent to type(x).__getitem__(x, index) """ image, label = self.data[index], int(self.targets[index]) - image = Image.fromarray(image, mode='L') + image = Image.fromarray(image, mode="L") if self.transform is not None: image, label = self.transform((image, label)) image = np.array(image) return (image, label) -@dataset_registry(dataset_type="FashionMNIST", framework="mxnet", dataset_format='') -class MXNetFashionMNIST(FashionMNIST): # pragma: no cover +@dataset_registry(dataset_type="FashionMNIST", framework="mxnet", dataset_format="") +class MXNetFashionMNIST(FashionMNIST): # pragma: no cover """The MXNet Dataset for FashionMNIST.""" def __getitem__(self, index): @@ -850,8 +879,8 @@ def __getitem__(self, index): return (image, label) -@dataset_registry(dataset_type="FashionMNIST", framework="tensorflow, tensorflow_itex", dataset_format='') -class TensorflowFashionMNIST(FashionMNIST): # pragma: no cover +@dataset_registry(dataset_type="FashionMNIST", framework="tensorflow, tensorflow_itex", dataset_format="") +class TensorflowFashionMNIST(FashionMNIST): # pragma: no cover """The Tensorflow Dataset for FashionMNIST.""" def __getitem__(self, index): @@ -863,17 +892,21 @@ def __getitem__(self, index): image = np.expand_dims(image, -1) if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="ImageFolder", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') -class ImageFolder(Dataset): # pragma: no cover +@dataset_registry( + dataset_type="ImageFolder", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) +class ImageFolder(Dataset): # pragma: no cover """The base class for ImageFolder. Expects the data folder to contain subfolders representing the classes to which @@ -902,10 +935,10 @@ def __init__(self, root, transform=None, filter=None): self.transform = transform self.image_list = [] - files = glob.glob(os.path.join(self.root, '*')) + files = glob.glob(os.path.join(self.root, "*")) files.sort() for idx, file in enumerate(files): - imgs = glob.glob(os.path.join(file, '*')) + imgs = glob.glob(os.path.join(file, "*")) imgs.sort() for img in imgs: self.image_list.append((img, idx)) @@ -928,8 +961,8 @@ def __getitem__(self, index): return (image, label) -@dataset_registry(dataset_type="ImageFolder", framework="mxnet", dataset_format='') -class MXNetImageFolder(ImageFolder): # pragma: no cover +@dataset_registry(dataset_type="ImageFolder", framework="mxnet", dataset_format="") +class MXNetImageFolder(ImageFolder): # pragma: no cover """The MXNet Dataset for image folder. Expects the data folder to contain subfolders representing the classes to which @@ -964,8 +997,8 @@ def __getitem__(self, index): return (image, label) -@dataset_registry(dataset_type="ImageFolder", framework="tensorflow, tensorflow_itex", dataset_format='') -class Tensorflow(ImageFolder): # pragma: no cover +@dataset_registry(dataset_type="ImageFolder", framework="tensorflow, tensorflow_itex", dataset_format="") +class Tensorflow(ImageFolder): # pragma: no cover """The Tensorflow Dataset for image folder. Expects the data folder to contain subfolders representing the classes to which @@ -995,21 +1028,21 @@ def __getitem__(self, index): sample = self.image_list[index] label = sample[1] with Image.open(sample[0]) as image: - if image.mode != 'RGB': - image = image.convert('RGB') + if image.mode != "RGB": + image = image.convert("RGB") image = np.array(image) if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="TFRecordDataset", framework="tensorflow, tensorflow_itex", dataset_format='') -class TensorflowTFRecordDataset(IterableDataset): # pragma: no cover +@dataset_registry(dataset_type="TFRecordDataset", framework="tensorflow, tensorflow_itex", dataset_format="") +class TensorflowTFRecordDataset(IterableDataset): # pragma: no cover """The Tensorflow TFRecord Dataset. Root is a full path to tfrecord file, which contains the file name. @@ -1025,18 +1058,18 @@ def __new__(cls, root, transform=None, filter=None): # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave from tensorflow.python.platform import gfile + file_names = gfile.Glob(root) ds = tf.data.Dataset.from_tensor_slices(file_names) - ds = ds.apply(parallel_interleave( - tf.data.TFRecordDataset, cycle_length=len(file_names))) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) if transform is not None: ds = ds.map(transform, num_parallel_calls=None) ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned return ds -@dataset_registry(dataset_type="ImageRecord", framework="tensorflow, tensorflow_itex", dataset_format='') -class TensorflowImageRecord(IterableDataset): # pragma: no cover +@dataset_registry(dataset_type="ImageRecord", framework="tensorflow, tensorflow_itex", dataset_format="") +class TensorflowImageRecord(IterableDataset): # pragma: no cover """Tensorflow imageNet database in tf record format. Please arrange data in this way: @@ -1053,20 +1086,23 @@ class TensorflowImageRecord(IterableDataset): # pragma: no cover """ """Configuration for Imagenet dataset.""" + def __new__(cls, root, transform=None, filter=None): """Build a new object of TensorflowImageRecord class.""" from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module - glob_pattern = os.path.join(root, '*-*-of-*') + + glob_pattern = os.path.join(root, "*-*-of-*") file_names = gfile.Glob(glob_pattern) if not file_names: - raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave + from neural_compressor.data.transforms.imagenet_transform import ParseDecodeImagenet + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) - ds = ds.apply(parallel_interleave( - tf.data.TFRecordDataset, cycle_length=len(file_names))) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) if transform is not None: transform.transform_list.insert(0, ParseDecodeImagenet()) @@ -1077,8 +1113,8 @@ def __new__(cls, root, transform=None, filter=None): return ds -@dataset_registry(dataset_type="VOCRecord", framework="tensorflow, tensorflow_itex", dataset_format='') -class TensorflowVOCRecord(IterableDataset): # pragma: no cover +@dataset_registry(dataset_type="VOCRecord", framework="tensorflow, tensorflow_itex", dataset_format="") +class TensorflowVOCRecord(IterableDataset): # pragma: no cover """The Tensorflow PASCAL VOC 2012 database in tf record format. Please arrange data in this way: @@ -1097,16 +1133,17 @@ class TensorflowVOCRecord(IterableDataset): # pragma: no cover def __new__(cls, root, transform=None, filter=None): """Build a new object of TensorflowVOCRecord class.""" from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module - glob_pattern = os.path.join(root, '%s-*' % 'val') + + glob_pattern = os.path.join(root, "%s-*" % "val") file_names = gfile.Glob(glob_pattern) if not file_names: - raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) - ds = ds.apply(parallel_interleave( - tf.data.TFRecordDataset, cycle_length=len(file_names))) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) if transform is not None: ds = ds.map(transform, num_parallel_calls=None) diff --git a/neural_compressor/data/datasets/dummy_dataset.py b/neural_compressor/data/datasets/dummy_dataset.py index 8b54296015d..8b04a56533f 100644 --- a/neural_compressor/data/datasets/dummy_dataset.py +++ b/neural_compressor/data/datasets/dummy_dataset.py @@ -17,22 +17,29 @@ # ============================================================================== """Dummy dataset for dummy data generation on multiple framework backends.""" -from .dataset import dataset_registry, Dataset +import logging + import numpy as np + from neural_compressor.utils.utility import LazyImport -import logging -mx = LazyImport('mxnet') -torch = LazyImport('torch') +from .dataset import Dataset, dataset_registry + +mx = LazyImport("mxnet") +torch = LazyImport("torch") logger = logging.getLogger("neural_compressor") -@dataset_registry(dataset_type="dummy", framework="tensorflow, tensorflow_itex, \ + +@dataset_registry( + dataset_type="dummy", + framework="tensorflow, tensorflow_itex, \ onnxrt_qlinearops, onnxrt_integerops, \ pytorch, pytorch_ipex, pytorch_fx, \ mxnet", - dataset_format='') -class DummyDataset(Dataset): # pragma: no cover + dataset_format="", +) +class DummyDataset(Dataset): # pragma: no cover """Dataset used for dummy data generation. This Dataset is to construct a dataset from a specific shape. @@ -40,8 +47,7 @@ class DummyDataset(Dataset): # pragma: no cover (TODO) construct dummy data from real dataset or iteration of data. """ - def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ - transform=None, filter=None): + def __init__(self, shape, low=-128.0, high=127.0, dtype="float32", label=True, transform=None, filter=None): """Initialize `DummyDataset` class. Args: @@ -59,37 +65,47 @@ def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ If transform is not None, it will ignore it. filter (Filter objects, default=None): Filter out examples according to specific conditions. """ - dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ - 'int8': np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':bool,\ - 'string': str} + dtype_map = { + "float32": np.float32, + "float16": np.float16, + "uint8": np.uint8, + "int8": np.int8, + "int32": np.int32, + "int64": np.int64, + "bool": bool, + "string": str, + } np.random.seed(9527) self.transform = transform self.label = label - if len(shape)==0: + if len(shape) == 0: logger.info("No data in the dummy dataset.") elif isinstance(shape, list): # list tensor should same first demension n n = shape[0][0] - assert all(isinstance(elem, tuple) and elem[0] == n for elem in shape), \ - 'each tensor shape should be tuple and same fisrt demension' + assert all( + isinstance(elem, tuple) and elem[0] == n for elem in shape + ), "each tensor shape should be tuple and same fisrt demension" if isinstance(low, list): - assert len(low) == len(shape) and all(isinstance(elem, float) for elem in low), \ - 'low list should have same length with shape with element data type float' + assert len(low) == len(shape) and all( + isinstance(elem, float) for elem in low + ), "low list should have same length with shape with element data type float" else: low = (low * np.ones(len(shape))).astype(float) if isinstance(high, list): - assert len(high) == len(shape) and all(isinstance(elem, float) for elem in high), \ - 'high list should have same length with shape with element data type float' + assert len(high) == len(shape) and all( + isinstance(elem, float) for elem in high + ), "high list should have same length with shape with element data type float" else: high = (high * np.ones(len(shape))).astype(float) if isinstance(dtype, list): - assert len(dtype) == len(shape) and \ - all(elem in dtype_map.keys() for elem in dtype), \ - 'high list should have same length with shape with element data type float' + assert len(dtype) == len(shape) and all( + elem in dtype_map.keys() for elem in dtype + ), "high list should have same length with shape with element data type float" else: dtype = [dtype for i in range(0, len(shape))] @@ -98,22 +114,24 @@ def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ if isinstance(low, float): low = [low] else: - assert isinstance(low, list) and len(low) == 1 and isinstance(low[0], float), \ - 'low should be float or list of float with length 1' + assert ( + isinstance(low, list) and len(low) == 1 and isinstance(low[0], float) + ), "low should be float or list of float with length 1" if isinstance(high, float): high = [high] else: - assert isinstance(high, list) and len(high) == 1 and isinstance(high[0], float), \ - 'high should be float or list of float with length 1' + assert ( + isinstance(high, list) and len(high) == 1 and isinstance(high[0], float) + ), "high should be float or list of float with length 1" if isinstance(dtype, str): - assert dtype in dtype_map.keys(), 'dtype only support {}'.format(dtype_map.keys()) + assert dtype in dtype_map.keys(), "dtype only support {}".format(dtype_map.keys()) dtype = [dtype] else: - assert isinstance(dtype, list) and \ - len(dtype) == 1 and dtype[0] in dtype_map.keys(), \ - 'dtype should be str or list of str in supported dtypes' + assert ( + isinstance(dtype, list) and len(dtype) == 1 and dtype[0] in dtype_map.keys() + ), "dtype should be str or list of str in supported dtypes" self.dataset = [] for idx in range(0, len(shape)): @@ -126,7 +144,6 @@ def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ else: self.dataset = [elem for elem in zip(*self.dataset)] - def __len__(self): """Return the length of dataset.""" return len(self.dataset) diff --git a/neural_compressor/data/datasets/dummy_dataset_v2.py b/neural_compressor/data/datasets/dummy_dataset_v2.py index 86699b7f57d..c61df3b4691 100644 --- a/neural_compressor/data/datasets/dummy_dataset_v2.py +++ b/neural_compressor/data/datasets/dummy_dataset_v2.py @@ -18,27 +18,35 @@ """Dummy dataset for dummy_v2/sparse_dummy_v2 data generation on multiple framework backends.""" import sys -from .dataset import dataset_registry, IterableDataset +from functools import reduce + import numpy as np + from neural_compressor.utils.utility import LazyImport -from functools import reduce -mx = LazyImport('mxnet') -torch = LazyImport('torch') +from .dataset import IterableDataset, dataset_registry + +mx = LazyImport("mxnet") +torch = LazyImport("torch") -@dataset_registry(dataset_type="dummy_v2", framework="tensorflow, tensorflow_itex, \ + +@dataset_registry( + dataset_type="dummy_v2", + framework="tensorflow, tensorflow_itex, \ onnxrt_qlinearops, onnxrt_integerops, \ pytorch, pytorch_ipex, pytorch_fx, mxnet", - dataset_format='') -class DummyDataset(IterableDataset): # pragma: no cover + dataset_format="", +) +class DummyDataset(IterableDataset): # pragma: no cover """Dataset used for dummy_v2 data generation. This Dataset is to construct a dataset from a input shape and label shape. The value range is calculated from: low * stand_normal(0, 1) + high. """ - def __init__(self, input_shape, label_shape=None, low=-128., high=127., \ - dtype='float32', transform=None, filter=None): + def __init__( + self, input_shape, label_shape=None, low=-128.0, high=127.0, dtype="float32", transform=None, filter=None + ): """Initialize `DummyDataset` class. Args: @@ -61,8 +69,15 @@ def __init__(self, input_shape, label_shape=None, low=-128., high=127., \ If transform is not None, it will ignore it. filter (Filter objects, default=None): Filter out examples according to specific conditions. """ - self.dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ - 'int8':np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':bool} + self.dtype_map = { + "float32": np.float32, + "float16": np.float16, + "uint8": np.uint8, + "int8": np.int8, + "int32": np.int32, + "int64": np.int64, + "bool": bool, + } np.random.seed(9527) self.transform = transform @@ -83,23 +98,23 @@ def __init__(self, input_shape, label_shape=None, low=-128., high=127., \ self.total_dim = self.input_dim + self.label_dim if isinstance(high, list): - assert len(high) == self.total_dim and \ - all(isinstance(elem, float) for elem in high),\ - 'high value list length should same with label dim + input_dim' + assert len(high) == self.total_dim and all( + isinstance(elem, float) for elem in high + ), "high value list length should same with label dim + input_dim" else: self.high = (high * np.ones(self.total_dim)).astype(np.float32) if isinstance(low, list): - assert len(low) == self.total_dim and \ - all(isinstance(elem, float) for elem in low), \ - 'low value list length should same with label dim + input_dim' + assert len(low) == self.total_dim and all( + isinstance(elem, float) for elem in low + ), "low value list length should same with label dim + input_dim" else: self.low = (low * np.ones(self.total_dim)).astype(np.float32) if isinstance(dtype, list): - assert len(dtype) == self.total_dim and \ - all(elem in self.dtype_map.keys() for elem in dtype), \ - 'dtype list length should same with label dim + input_dim' + assert len(dtype) == self.total_dim and all( + elem in self.dtype_map.keys() for elem in dtype + ), "dtype list length should same with label dim + input_dim" else: self.dtype = [self.dtype for i in range(0, self.total_dim)] @@ -114,48 +129,60 @@ def __iter__(self): while True: input_data = [] for idx in range(0, self.input_dim): - tensor = np.random.uniform(\ - low=self.low[idx], high=self.high[idx], size=self.input_shape[idx]) + tensor = np.random.uniform(low=self.low[idx], high=self.high[idx], size=self.input_shape[idx]) tensor = tensor.astype(self.dtype_map[self.dtype[idx]]) input_data.append(tensor) label = [] for idx in range(0, self.label_dim): - shift_idx = self.input_dim + idx - tensor = np.random.uniform(low=self.low[shift_idx], - high=self.high[shift_idx], - size=self.label_shape[idx]) + shift_idx = self.input_dim + idx + tensor = np.random.uniform( + low=self.low[shift_idx], high=self.high[shift_idx], size=self.label_shape[idx] + ) tensor = tensor.astype(self.dtype_map[self.dtype[shift_idx]]) label.append(tensor) if len(input_data) == 1: - input_data = input_data[0] + input_data = input_data[0] if len(label) == 1: - label = label[0] + label = label[0] if len(label) > 0: yield input_data, label else: yield input_data - + def __len__(self): """Return the length of dataset.""" return sys.maxsize -@dataset_registry(dataset_type="sparse_dummy_v2", framework="tensorflow, tensorflow_itex, \ + +@dataset_registry( + dataset_type="sparse_dummy_v2", + framework="tensorflow, tensorflow_itex, \ onnxrt_qlinearops, onnxrt_integerops, \ pytorch, pytorch_ipex, pytorch_fx, mxnet", - dataset_format='') -class SparseDummyDataset(IterableDataset): # pragma: no cover + dataset_format="", +) +class SparseDummyDataset(IterableDataset): # pragma: no cover """Dataset used for sparse_dummy_v2 data generation. This Dataset is to construct a dataset from a input shape and label shape. The value range is calculated from: low * stand_normal(0, 1) + high. """ - def __init__(self, dense_shape, label_shape=None, sparse_ratio=0.5, low=-128., high=127., \ - dtype='float32', transform=None, filter=None): + def __init__( + self, + dense_shape, + label_shape=None, + sparse_ratio=0.5, + low=-128.0, + high=127.0, + dtype="float32", + transform=None, + filter=None, + ): """Initialize `SparseDummyDataset` class. Args: @@ -178,8 +205,15 @@ def __init__(self, dense_shape, label_shape=None, sparse_ratio=0.5, low=-128., h If transform is not None, it will ignore it. filter (Filter objects, default=None): Filter out examples according to specific conditions. """ - self.dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ - 'int8':np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':bool} + self.dtype_map = { + "float32": np.float32, + "float16": np.float16, + "uint8": np.uint8, + "int8": np.int8, + "int32": np.int32, + "int64": np.int64, + "bool": bool, + } np.random.seed(9527) self.transform = transform @@ -200,39 +234,40 @@ def __init__(self, dense_shape, label_shape=None, sparse_ratio=0.5, low=-128., h self.label_shape = [label_shape] if len(self.label_shape) == 1 and len(self.label_shape) != len(self.dense_shape): self.label_shape = len(self.dense_shape) * self.label_shape - assert len(self.label_shape) == len(self.dense_shape), \ - 'length of dense_shape should be euqal to length of label_shape' + assert len(self.label_shape) == len( + self.dense_shape + ), "length of dense_shape should be euqal to length of label_shape" self.label_dim = len(self.label_shape) self.input_dim = 1 if isinstance(dense_shape, tuple) else len(dense_shape) self.total_dim = self.input_dim + self.label_dim if isinstance(sparse_ratio, list): - assert len(sparse_ratio) == self.input_dim and \ - all(isinstance(elem, float) for elem in sparse_ratio),\ - 'sparse_ratio list length should same with input_dim' + assert len(sparse_ratio) == self.input_dim and all( + isinstance(elem, float) for elem in sparse_ratio + ), "sparse_ratio list length should same with input_dim" else: self.sparse_ratio = (sparse_ratio * np.ones(self.input_dim)).astype(np.float32) - assert all([0 <= i <= 1 for i in self.sparse_ratio]), 'sparse_ratio should be in [0,1]' + assert all([0 <= i <= 1 for i in self.sparse_ratio]), "sparse_ratio should be in [0,1]" if isinstance(high, list): - assert len(high) == self.total_dim and \ - all(isinstance(elem, float) for elem in high),\ - 'high value list length should same with label dim + input_dim' + assert len(high) == self.total_dim and all( + isinstance(elem, float) for elem in high + ), "high value list length should same with label dim + input_dim" else: self.high = (high * np.ones(self.total_dim)).astype(np.float32) if isinstance(low, list): - assert len(low) == self.total_dim and \ - all(isinstance(elem, float) for elem in low), \ - 'low value list length should same with label dim + input_dim' + assert len(low) == self.total_dim and all( + isinstance(elem, float) for elem in low + ), "low value list length should same with label dim + input_dim" else: self.low = (low * np.ones(self.total_dim)).astype(np.float32) if isinstance(dtype, list): - assert len(dtype) == self.total_dim and \ - all(elem in self.dtype_map.keys() for elem in dtype), \ - 'dtype list length should same with label dim + input_dim' + assert len(dtype) == self.total_dim and all( + elem in self.dtype_map.keys() for elem in dtype + ), "dtype list length should same with label dim + input_dim" else: self.dtype = [self.dtype for i in range(0, self.total_dim)] @@ -242,34 +277,32 @@ def __iter__(self): input_data = [] for idx, shape in enumerate(self.dense_shape): dim = len(shape) - total = reduce(lambda x, y: x*y, shape) + total = reduce(lambda x, y: x * y, shape) sparse_num = round(total * (1 - self.sparse_ratio[idx])) - val = np.random.uniform(\ - low=self.low[idx], high=self.high[idx], size=sparse_num) + val = np.random.uniform(low=self.low[idx], high=self.high[idx], size=sparse_num) val = val.astype(self.dtype_map[self.dtype[idx]]) nums = np.arange(sparse_num) indices = [] - dim_shape = [reduce(lambda x, y: x*y, shape[i:])/shape[i] \ - for i in range(len(shape))] + dim_shape = [reduce(lambda x, y: x * y, shape[i:]) / shape[i] for i in range(len(shape))] for num in nums: indice = [] for item in dim_shape: - indice.append(num//item) + indice.append(num // item) num = num - indice[-1] * item if num - indice[-1] * item > 0 else num indices.append(indice) if self.label_dim > 0: - shift_idx = self.input_dim + idx - tensor = np.random.uniform(low=self.low[shift_idx], - high=self.high[shift_idx], - size=self.label_shape[idx]) + shift_idx = self.input_dim + idx + tensor = np.random.uniform( + low=self.low[shift_idx], high=self.high[shift_idx], size=self.label_shape[idx] + ) tensor = tensor.astype(self.dtype_map[self.dtype[shift_idx]]) input_data.append([(np.array(indices), val), tensor]) else: input_data.append((np.array(indices), val)) yield input_data - + def __len__(self): """Return the length of dataset.""" return sys.maxsize diff --git a/neural_compressor/data/datasets/imagenet_dataset.py b/neural_compressor/data/datasets/imagenet_dataset.py index 9d0d7daf2d1..b05a8420fd7 100644 --- a/neural_compressor/data/datasets/imagenet_dataset.py +++ b/neural_compressor/data/datasets/imagenet_dataset.py @@ -33,26 +33,35 @@ import os import re + import numpy as np from PIL import Image -from neural_compressor.utils.utility import LazyImport + from neural_compressor.utils import logger -from .dataset import dataset_registry, IterableDataset, Dataset -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -torch = LazyImport('torch') - -@dataset_registry(dataset_type="ImagenetRaw", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') -class ImagenetRaw(Dataset): # pragma: no cover +from neural_compressor.utils.utility import LazyImport + +from .dataset import Dataset, IterableDataset, dataset_registry + +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +torch = LazyImport("torch") + + +@dataset_registry( + dataset_type="ImagenetRaw", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) +class ImagenetRaw(Dataset): # pragma: no cover """Configuration for ImageNet raw dataset. - Please arrange data in this way: - data_path/img1.jpg - data_path/img2.jpg - ... - data_path/imgx.jpg - dataset will read name and label of each image from image_list file, + Please arrange data in this way: + data_path/img1.jpg + data_path/img2.jpg + ... + data_path/imgx.jpg + dataset will read name and label of each image from image_list file, if user set image_list to None, it will read from data_path/val_map.txt automatically. """ @@ -74,7 +83,7 @@ def __init__(self, data_path, image_list, transform=None, filter=None): # by default look for val.txt image_list = os.path.join(data_path, "val.txt") - with open(image_list, 'r') as f: + with open(image_list, "r") as f: for s in f: image_name, label = re.split(r"\s+", s.strip()) src = os.path.join(data_path, image_name) @@ -94,7 +103,7 @@ def __getitem__(self, index): """Return the item of dataset according to the given index.""" image_path, label = self.image_list[index], self.label_list[index] with Image.open(image_path) as image: - image = np.array(image.convert('RGB')) + image = np.array(image.convert("RGB")) if self.transform is not None: image, label = self.transform((image, label)) return (image, label) @@ -103,22 +112,24 @@ def __len__(self): """Return the length of dataset.""" return len(self.image_list) -@dataset_registry(dataset_type="ImagenetRaw", framework="pytorch", dataset_format='') -class PytorchImagenetRaw(ImagenetRaw): # pragma: no cover + +@dataset_registry(dataset_type="ImagenetRaw", framework="pytorch", dataset_format="") +class PytorchImagenetRaw(ImagenetRaw): # pragma: no cover """Dataset for ImageNet data generation on pytorch backend.""" def __getitem__(self, index): """Return the item of dataset according to the given index.""" image_path, label = self.image_list[index], self.label_list[index] with Image.open(image_path) as image: - image = image.convert('RGB') + image = image.convert("RGB") if self.transform is not None: image, label = self.transform((image, label)) image = np.array(image) return (image, label) -@dataset_registry(dataset_type="ImagenetRaw", framework="mxnet", dataset_format='') -class MXNetImagenetRaw(ImagenetRaw): # pragma: no cover + +@dataset_registry(dataset_type="ImagenetRaw", framework="mxnet", dataset_format="") +class MXNetImagenetRaw(ImagenetRaw): # pragma: no cover """Dataset for ImageNet data generation on mxnet backend.""" def __getitem__(self, index): @@ -129,48 +140,53 @@ def __getitem__(self, index): image, label = self.transform((image, label)) return (image, label) -@dataset_registry(dataset_type="ImagenetRaw", framework="tensorflow, \ - tensorflow_itex", dataset_format='') -class TensorflowImagenetRaw(ImagenetRaw): # pragma: no cover + +@dataset_registry( + dataset_type="ImagenetRaw", + framework="tensorflow, \ + tensorflow_itex", + dataset_format="", +) +class TensorflowImagenetRaw(ImagenetRaw): # pragma: no cover """Dataset for ImageNet data generation on tensorflow/inteltensorflow/tensorflow_itex backend.""" def __getitem__(self, index): """Return the item of dataset according to the given index.""" image_path, label = self.image_list[index], self.label_list[index] with Image.open(image_path) as image: - image = np.array(image.convert('RGB')) + image = np.array(image.convert("RGB")) if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="Imagenet", framework="tensorflow", dataset_format='') -class TensorflowImagenetDataset(IterableDataset): # pragma: no cover + +@dataset_registry(dataset_type="Imagenet", framework="tensorflow", dataset_format="") +class TensorflowImagenetDataset(IterableDataset): # pragma: no cover """Configuration for Imagenet dataset.""" - def __new__(cls, root, subset='validation', num_cores=28, transform=None, filter=None): + def __new__(cls, root, subset="validation", num_cores=28, transform=None, filter=None): """New a imagenet dataset for tensorflow.""" - assert subset in ('validation', 'train'), \ - 'only support subset (validation, train)' - logger.warning("This api is going to be deprecated, " - "please use ImageRecord instead.") + assert subset in ("validation", "train"), "only support subset (validation, train)" + logger.warning("This api is going to be deprecated, " "please use ImageRecord instead.") from tensorflow.python.platform import gfile - glob_pattern = os.path.join(root, '%s-*-of-*' % subset) + + glob_pattern = os.path.join(root, "%s-*-of-*" % subset) file_names = gfile.Glob(glob_pattern) if not file_names: - raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) from tensorflow.python.data.experimental import parallel_interleave + from neural_compressor.data.transforms.imagenet_transform import ParseDecodeImagenet + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=num_cores)) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=num_cores)) if transform is not None: transform.transform_list.insert(0, ParseDecodeImagenet()) @@ -181,26 +197,32 @@ def __new__(cls, root, subset='validation', num_cores=28, transform=None, filter ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned return ds -@dataset_registry(dataset_type="Imagenet", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') -class ONNXRTImagenetDataset(Dataset): # pragma: no cover + +@dataset_registry( + dataset_type="Imagenet", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) +class ONNXRTImagenetDataset(Dataset): # pragma: no cover """Configuration for Imagenet dataset.""" - def __init__(self, root, subset='val', num_cores=28, transform=None, filter=None): + def __init__(self, root, subset="val", num_cores=28, transform=None, filter=None): """Initialize `ONNXRTImagenetDataset` class.""" self.val_dir = os.path.join(root, subset) - assert os.path.exists(self.val_dir), "find no val dir in {}".format(root) + \ - "please make sure there are train/val subfolders" + assert os.path.exists(self.val_dir), ( + "find no val dir in {}".format(root) + "please make sure there are train/val subfolders" + ) import glob - logger.warning("This api is going to be deprecated, " - "please use ImageRecord instead.") + + logger.warning("This api is going to be deprecated, " "please use ImageRecord instead.") self.transform = transform self.image_list = [] - files = glob.glob(os.path.join(self.val_dir, '*')) + files = glob.glob(os.path.join(self.val_dir, "*")) files.sort() for idx, file in enumerate(files): - imgs = glob.glob(os.path.join(file, '*')) + imgs = glob.glob(os.path.join(file, "*")) for img in imgs: self.image_list.append((img, idx)) @@ -211,9 +233,9 @@ def __len__(self): def __getitem__(self, index): """Return the item of dataset according to the given index.""" from PIL import Image + sample = self.image_list[index] image = Image.open(sample[0]) if self.transform is not None: image, label = self.transform((image, sample[1])) return (image, label) - diff --git a/neural_compressor/data/datasets/style_transfer_dataset.py b/neural_compressor/data/datasets/style_transfer_dataset.py index 8f6f6ff332f..7afdb0542b5 100644 --- a/neural_compressor/data/datasets/style_transfer_dataset.py +++ b/neural_compressor/data/datasets/style_transfer_dataset.py @@ -17,23 +17,37 @@ # ============================================================================== """Dataset used for style transfer task on multiple framework backends.""" +import glob import os + import numpy as np -import glob -from .dataset import dataset_registry, Dataset +from .dataset import Dataset, dataset_registry -@dataset_registry(dataset_type="style_transfer", framework="tensorflow, \ - tensorflow_itex", dataset_format='') -class StyleTransferDataset(Dataset): # pragma: no cover + +@dataset_registry( + dataset_type="style_transfer", + framework="tensorflow, \ + tensorflow_itex", + dataset_format="", +) +class StyleTransferDataset(Dataset): # pragma: no cover """Dataset used for style transfer task on tensorflow/inteltensorflow/tensorflow_itex backend. This Dataset is to construct a dataset from two specific image holders representing content image folder and style image folder. """ - def __init__(self, content_folder, style_folder, crop_ratio=0.1, - resize_shape=(256, 256), image_format='jpg', transform=None, filter=None): + def __init__( + self, + content_folder, + style_folder, + crop_ratio=0.1, + resize_shape=(256, 256), + image_format="jpg", + transform=None, + filter=None, + ): """Initialize `StyleTransferDataset` class. Args: @@ -50,8 +64,8 @@ def __init__(self, content_folder, style_folder, crop_ratio=0.1, self.style_folder = style_folder self.resize_shape = resize_shape self.crop_ratio = crop_ratio - self.content_images = glob.glob(os.path.join(content_folder, '*' + image_format)) - self.style_images = glob.glob(os.path.join(style_folder, '*' + image_format)) + self.content_images = glob.glob(os.path.join(content_folder, "*" + image_format)) + self.style_images = glob.glob(os.path.join(style_folder, "*" + image_format)) self.image_list = [] for content in self.content_images: for style in self.style_images: @@ -64,21 +78,18 @@ def __len__(self): def __getitem__(self, index): """Return the item of dataset according to the given index.""" from PIL import Image + content_image, style_image = self.image_list[index] content_image = Image.open(content_image) style_image = Image.open(style_image) width, height = style_image.size crop_ratio = self.crop_ratio - crop_box = ( - crop_ratio * height, - crop_ratio * width, - (1 - crop_ratio) * height, - (1 - crop_ratio) * width) + crop_box = (crop_ratio * height, crop_ratio * width, (1 - crop_ratio) * height, (1 - crop_ratio) * width) content_image = np.asarray(content_image.resize(self.resize_shape)) style_image = np.asarray(style_image.resize(self.resize_shape)) if content_image.max() > 1.0: - content_image = content_image / 255. + content_image = content_image / 255.0 if style_image.max() > 1.0: - style_image = style_image / 255. + style_image = style_image / 255.0 return (content_image, style_image), 0 diff --git a/neural_compressor/data/filters/__init__.py b/neural_compressor/data/filters/__init__.py index 9acc0976d3b..d686dd0a47c 100644 --- a/neural_compressor/data/filters/__init__.py +++ b/neural_compressor/data/filters/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in filter.""" from .coco_filter import LabelBalanceCOCORecordFilter @@ -25,7 +24,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) diff --git a/neural_compressor/data/filters/coco_filter.py b/neural_compressor/data/filters/coco_filter.py index 3f9431185ab..c1455f07dbf 100644 --- a/neural_compressor/data/filters/coco_filter.py +++ b/neural_compressor/data/filters/coco_filter.py @@ -14,16 +14,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in COCO filter.""" from neural_compressor.utils.utility import LazyImport + from .filter import Filter, filter_registry -tf = LazyImport('tensorflow') + +tf = LazyImport("tensorflow") @filter_registry(filter_type="LabelBalanceCOCORecord", framework="tensorflow, tensorflow_itex") -class LabelBalanceCOCORecordFilter(Filter): # pragma: no cover +class LabelBalanceCOCORecordFilter(Filter): # pragma: no cover """The label balance filter for COCO Record.""" def __init__(self, size=1): @@ -40,9 +41,12 @@ def __call__(self, image, label): return tf.math.equal(len(label[0]), self.size) -@filter_registry(filter_type="LabelBalanceCOCORaw", framework="tensorflow, \ - tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") -class LabelBalanceCOCORawFilter(Filter): # pragma: no cover +@filter_registry( + filter_type="LabelBalanceCOCORaw", + framework="tensorflow, \ + tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops", +) +class LabelBalanceCOCORawFilter(Filter): # pragma: no cover """The label balance filter for COCO raw data.""" def __init__(self, size=1): @@ -57,4 +61,3 @@ def __call__(self, image, label): label: label of a sample. """ return len(label) == self.size - diff --git a/neural_compressor/data/filters/filter.py b/neural_compressor/data/filters/filter.py index a904de31123..5206282f833 100644 --- a/neural_compressor/data/filters/filter.py +++ b/neural_compressor/data/filters/filter.py @@ -14,15 +14,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The base filter class for all frameworks.""" from abc import abstractmethod + from neural_compressor.utils.utility import singleton @singleton -class TensorflowFilters(object): # pragma: no cover +class TensorflowFilters(object): # pragma: no cover """The base filter class for Tensorflow framework.""" def __init__(self): @@ -32,7 +32,7 @@ def __init__(self): @singleton -class ONNXRTQLFilters(object): # pragma: no cover +class ONNXRTQLFilters(object): # pragma: no cover """The base filter class for ONNXRT framework QLinear mode.""" def __init__(self): @@ -42,7 +42,7 @@ def __init__(self): @singleton -class ONNXRTITFilters(object): # pragma: no cover +class ONNXRTITFilters(object): # pragma: no cover """The base filter class for ONNXRT framework IT mode.""" def __init__(self): @@ -52,7 +52,7 @@ def __init__(self): @singleton -class PyTorchFilters(object): # pragma: no cover +class PyTorchFilters(object): # pragma: no cover """The base filter class for PyTorch framework.""" def __init__(self): @@ -62,7 +62,7 @@ def __init__(self): @singleton -class MXNetFilters(object): # pragma: no cover +class MXNetFilters(object): # pragma: no cover """The base filter class for MXNet framework.""" def __init__(self): @@ -78,31 +78,34 @@ def __init__(self): PYTORCH_FILTERS = {} MXNET_FILTERS = {} -framework_filters = {"tensorflow": TensorflowFilters, - "tensorflow_itex": TensorflowFilters, - "pytorch": PyTorchFilters, - "pytorch_ipex": PyTorchFilters, - "pytorch_fx": PyTorchFilters, - "mxnet": MXNetFilters, - "onnxrt_qlinearops": ONNXRTQLFilters, - "onnxrt_qdq": ONNXRTQLFilters, - "onnxruntime": ONNXRTQLFilters, - "onnxrt_integerops": ONNXRTITFilters, - } - -registry_filters = {"tensorflow": TENSORFLOW_FILTERS, - "tensorflow_itex": TENSORFLOW_ITEX_FILTERS, - "pytorch": PYTORCH_FILTERS, - "pytorch_ipex": PYTORCH_FILTERS, - "pytorch_fx": PYTORCH_FILTERS, - "mxnet": MXNET_FILTERS, - "onnxrt_integerops": ONNXRT_IT_FILTERS, - "onnxrt_qdq": ONNXRT_QL_FILTERS, - "onnxruntime": ONNXRT_QL_FILTERS, - "onnxrt_qlinearops": ONNXRT_QL_FILTERS} - - -class FILTERS(object): # pragma: no cover +framework_filters = { + "tensorflow": TensorflowFilters, + "tensorflow_itex": TensorflowFilters, + "pytorch": PyTorchFilters, + "pytorch_ipex": PyTorchFilters, + "pytorch_fx": PyTorchFilters, + "mxnet": MXNetFilters, + "onnxrt_qlinearops": ONNXRTQLFilters, + "onnxrt_qdq": ONNXRTQLFilters, + "onnxruntime": ONNXRTQLFilters, + "onnxrt_integerops": ONNXRTITFilters, +} + +registry_filters = { + "tensorflow": TENSORFLOW_FILTERS, + "tensorflow_itex": TENSORFLOW_ITEX_FILTERS, + "pytorch": PYTORCH_FILTERS, + "pytorch_ipex": PYTORCH_FILTERS, + "pytorch_fx": PYTORCH_FILTERS, + "mxnet": MXNET_FILTERS, + "onnxrt_integerops": ONNXRT_IT_FILTERS, + "onnxrt_qdq": ONNXRT_QL_FILTERS, + "onnxruntime": ONNXRT_QL_FILTERS, + "onnxrt_qlinearops": ONNXRT_QL_FILTERS, +} + + +class FILTERS(object): # pragma: no cover """The filter register for all frameworks. Args: @@ -114,10 +117,19 @@ class FILTERS(object): # pragma: no cover def __init__(self, framework): """Initialize the attribute of class.""" - assert framework in ["tensorflow", "tensorflow_itex", "keras", - "mxnet", "onnxrt_qdq", "pytorch", "pytorch_ipex", "pytorch_fx", - "onnxrt_integerops", "onnxrt_qlinearops", "onnxruntime"], \ - "framework support tensorflow pytorch mxnet onnxrt" + assert framework in [ + "tensorflow", + "tensorflow_itex", + "keras", + "mxnet", + "onnxrt_qdq", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxrt_integerops", + "onnxrt_qlinearops", + "onnxruntime", + ], "framework support tensorflow pytorch mxnet onnxrt" self.filters = framework_filters[framework]().filters self.framework = framework @@ -126,12 +138,11 @@ def __getitem__(self, filter_type): x[i] is roughly equivalent to type(x).__getitem__(x, index) """ - assert filter_type in self.filters.keys(), "filter support {}".\ - format(self.filters.keys()) + assert filter_type in self.filters.keys(), "filter support {}".format(self.filters.keys()) return self.filters[filter_type] -def filter_registry(filter_type, framework): # pragma: no cover +def filter_registry(filter_type, framework): # pragma: no cover """Register all transform subclasses. Args: @@ -142,11 +153,12 @@ def filter_registry(filter_type, framework): # pragma: no cover Returns: cls: The class of register. """ + def decorator_transform(cls): """Decorate a class.""" - for single_framework in [fwk.strip() for fwk in framework.split(',')]: + for single_framework in [fwk.strip() for fwk in framework.split(",")]: assert single_framework in [ - "tensorflow", + "tensorflow", "tensorflow_itex", "pytorch", "pytorch_ipex", @@ -155,20 +167,20 @@ def decorator_transform(cls): "onnxrt_integerops", "onnxrt_qdq", "onnxrt_qlinearops", - "onnxruntime" + "onnxruntime", ], "The framework support tensorflow mxnet pytorch onnxrt" if filter_type in registry_filters[single_framework].keys(): - raise ValueError('Cannot have two transforms with the same name') + raise ValueError("Cannot have two transforms with the same name") registry_filters[single_framework][filter_type] = cls return cls + return decorator_transform -class Filter(object): # pragma: no cover +class Filter(object): # pragma: no cover """The base class for transform. __call__ method is needed when write user specific transform. - """ @abstractmethod diff --git a/neural_compressor/data/transforms/__init__.py b/neural_compressor/data/transforms/__init__.py index baf1581fc6f..e4b80676a4d 100644 --- a/neural_compressor/data/transforms/__init__.py +++ b/neural_compressor/data/transforms/__init__.py @@ -17,8 +17,16 @@ # ============================================================================== """Neural Compressor Built-in transforms for multiple framework backends.""" -from .transform import TRANSFORMS, BaseTransform, ComposeTransform, transform_registry, \ -ResizeTFTransform, TensorflowResizeWithRatio, RescaleTFTransform, NormalizeTFTransform +from .transform import ( + TRANSFORMS, + BaseTransform, + ComposeTransform, + transform_registry, + ResizeTFTransform, + TensorflowResizeWithRatio, + RescaleTFTransform, + NormalizeTFTransform, +) from .transform import TFSquadV1PostTransform, TFSquadV1ModelZooPostTransform from .coco_transform import ParseDecodeCocoTransform from .postprocess import Postprocess @@ -30,12 +38,25 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) -__all__ = ["TRANSFORMS", "BaseTransform", "ComposeTransform", "transform_registry", "ResizeTFTransform", - "Postprocess", "LabelShift", "BilinearImagenetTransform", "TensorflowResizeCropImagenetTransform", - "RescaleTFTransform", "NormalizeTFTransform", "ParseDecodeCocoTransform", - "TensorflowResizeWithRatio", "TFSquadV1PostTransform", "TFSquadV1ModelZooPostTransform", - "TensorflowShiftRescale"] +__all__ = [ + "TRANSFORMS", + "BaseTransform", + "ComposeTransform", + "transform_registry", + "ResizeTFTransform", + "Postprocess", + "LabelShift", + "BilinearImagenetTransform", + "TensorflowResizeCropImagenetTransform", + "RescaleTFTransform", + "NormalizeTFTransform", + "ParseDecodeCocoTransform", + "TensorflowResizeWithRatio", + "TFSquadV1PostTransform", + "TFSquadV1ModelZooPostTransform", + "TensorflowShiftRescale", +] diff --git a/neural_compressor/data/transforms/coco_transform.py b/neural_compressor/data/transforms/coco_transform.py index b9524205cf6..fd2c2915a98 100644 --- a/neural_compressor/data/transforms/coco_transform.py +++ b/neural_compressor/data/transforms/coco_transform.py @@ -30,17 +30,19 @@ # limitations under the License. # ============================================================================== +from neural_compressor.data.transforms import BaseTransform, transform_registry from neural_compressor.utils import logger -from neural_compressor.data.transforms import transform_registry, BaseTransform + # BELOW IS TO BE DEPRECATED! -@transform_registry(transform_type="ParseDecodeCoco", \ - process="preprocess", framework="tensorflow") -class ParseDecodeCocoTransform(BaseTransform): # pragma: no cover - """Coco decoding will be performed automatically from Neural Compressor v1.4. - """ +@transform_registry(transform_type="ParseDecodeCoco", process="preprocess", framework="tensorflow") +class ParseDecodeCocoTransform(BaseTransform): # pragma: no cover + """Coco decoding will be performed automatically from Neural Compressor v1.4.""" + def __call__(self, sample): """Convert `ParseDecodeCocoTransform` feature.""" - logger.warning("This transform is going to be deprecated, " \ - "coco decoding will be performed automatically from Neural Compressor v1.4.") + logger.warning( + "This transform is going to be deprecated, " + "coco decoding will be performed automatically from Neural Compressor v1.4." + ) return sample diff --git a/neural_compressor/data/transforms/imagenet_transform.py b/neural_compressor/data/transforms/imagenet_transform.py index 287bcd677dc..c86e15d673b 100644 --- a/neural_compressor/data/transforms/imagenet_transform.py +++ b/neural_compressor/data/transforms/imagenet_transform.py @@ -32,15 +32,18 @@ """Neural Compressor built-in imagenet transforms.""" import numpy as np -from neural_compressor.utils.utility import LazyImport + from neural_compressor.utils import logger -from .transform import transform_registry, BaseTransform -tf = LazyImport('tensorflow') -cv2 = LazyImport('cv2') +from neural_compressor.utils.utility import LazyImport + +from .transform import BaseTransform, transform_registry + +tf = LazyImport("tensorflow") +cv2 = LazyImport("cv2") + -@transform_registry(transform_type="QuantizedInput", \ - process="preprocess", framework="tensorflow, tensorflow_itex") -class QuantizedInput(BaseTransform): # pragma: no cover +@transform_registry(transform_type="QuantizedInput", process="preprocess", framework="tensorflow, tensorflow_itex") +class QuantizedInput(BaseTransform): # pragma: no cover """Convert the dtype of input to quantize it. Args: @@ -53,9 +56,8 @@ class QuantizedInput(BaseTransform): # pragma: no cover def __init__(self, dtype, scale=None): """Initialize `QuantizedInput` class.""" - self.dtype_map = {'uint8': tf.uint8, 'int8': tf.int8} - assert dtype in self.dtype_map.keys(), \ - 'only support cast dtype {}'.format(self.dtype_map.keys()) + self.dtype_map = {"uint8": tf.uint8, "int8": tf.int8} + assert dtype in self.dtype_map.keys(), "only support cast dtype {}".format(self.dtype_map.keys()) self.dtype = dtype self.scale = scale @@ -63,19 +65,23 @@ def __call__(self, sample): """Convert the dtype of input.""" # scale is not know when tuning, in this case this transform # do nothing, it's only used when scale is set - if self.scale == None: + if self.scale is None: return sample image, label = sample image = image * self.scale - if self.dtype == 'uint8': + if self.dtype == "uint8": image = image + 128 image = tf.dtypes.cast(image, dtype=self.dtype_map[self.dtype]) return image, label -@transform_registry(transform_type="LabelShift", \ - process="postprocess", framework="pytorch, tensorflow, tensorflow_itex,\ - onnxrt_qlinearops, onnxrt_integerops") -class LabelShift(BaseTransform): # pragma: no cover + +@transform_registry( + transform_type="LabelShift", + process="postprocess", + framework="pytorch, tensorflow, tensorflow_itex,\ + onnxrt_qlinearops, onnxrt_integerops", +) +class LabelShift(BaseTransform): # pragma: no cover """Convert label to label - label_shift. Args: @@ -106,7 +112,8 @@ def __call__(self, sample): labels = np.array(labels) - self.label_shift return images, labels -class ParseDecodeImagenet(): # pragma: no cover + +class ParseDecodeImagenet: # pragma: no cover """Parse features in Example proto. Returns: @@ -117,27 +124,33 @@ def __call__(self, sample): """Parse features in example.""" # Dense features in Example proto. feature_map = { - 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), - 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1)} + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) features = tf.io.parse_single_example(serialized=sample, features=feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - image = features['image/encoded'] - image = tf.image.decode_jpeg( - image, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") return (image, label) -@transform_registry(transform_type="ParseDecodeImagenet", \ - process="preprocess", framework="tensorflow") -class ParseDecodeImagenetTransform(BaseTransform): # pragma: no cover + +@transform_registry(transform_type="ParseDecodeImagenet", process="preprocess", framework="tensorflow") +class ParseDecodeImagenetTransform(BaseTransform): # pragma: no cover """Imagenet decoding will be performed automatically from Neural Compressor v1.4. Returns: @@ -146,38 +159,44 @@ class ParseDecodeImagenetTransform(BaseTransform): # pragma: no cover def __call__(self, sample): """Convert `ParseDecodeImagenetTransform` feature.""" - logger.warning("This transform is going to be deprecated, " \ - "imagenet decoding will be performed automatically from Neural Compressor v1.4.") + logger.warning( + "This transform is going to be deprecated, " + "imagenet decoding will be performed automatically from Neural Compressor v1.4." + ) return sample + @transform_registry(transform_type="TransposeLastChannel", process="preprocess", framework="tensorflow") class TensorflowTransposeLastChannel(BaseTransform): - """Transpose NHWC to NCHW + """Transpose NHWC to NCHW. Returns: tuple of processed image and label """ + def __call__(self, sample): image, label = sample - image = tf.transpose(image, perm=[2,0,1]) + image = tf.transpose(image, perm=[2, 0, 1]) return (image, label) + @transform_registry(transform_type="ShiftRescale", process="postprocess", framework="tensorflow") class TensorflowShiftRescale(BaseTransform): - """label shift by 1 and rescale + """Label shift by 1 and rescale. - Returns: - tuple of processed image and label + Returns: + tuple of processed image and label """ + def __call__(self, sample): image, label = sample label -= 1 image = (image - 127.5) / 127.5 return (image, label) -@transform_registry(transform_type="ResizeCropImagenet", \ - process="preprocess", framework="tensorflow") -class TensorflowResizeCropImagenetTransform(BaseTransform): # pragma: no cover + +@transform_registry(transform_type="ResizeCropImagenet", process="preprocess", framework="tensorflow") +class TensorflowResizeCropImagenetTransform(BaseTransform): # pragma: no cover """Combination of a series of transforms which is applicable to images in Imagenet. Args: @@ -193,10 +212,19 @@ class TensorflowResizeCropImagenetTransform(BaseTransform): # pragma: no cove tuple of processed image and label """ - def __init__(self, height, width, random_crop=False, resize_side=256, \ - resize_method='bilinear', random_flip_left_right=False, \ - mean_value=[0.0,0.0,0.0], scale=1.0, \ - data_format='channels_last', subpixels='RGB'): + def __init__( + self, + height, + width, + random_crop=False, + resize_side=256, + resize_method="bilinear", + random_flip_left_right=False, + mean_value=[0.0, 0.0, 0.0], + scale=1.0, + data_format="channels_last", + subpixels="RGB", + ): """Initialize `TensorflowResizeCropImagenetTransform` class.""" self.height = height self.width = width @@ -214,37 +242,44 @@ def __call__(self, sample): """Convert `TensorflowResizeCropImagenetTransform` feature.""" image, label = sample shape = tf.shape(input=image) - - height = tf.cast(shape[0], dtype=tf.float32) \ - if self.data_format=="channels_last" else tf.cast(shape[1], dtype=tf.float32) - width = tf.cast(shape[1], dtype=tf.float32) \ - if self.data_format=="channels_last" else tf.cast(shape[2], dtype=tf.float32) - scale = tf.cond(pred=tf.greater(height, width), \ - true_fn=lambda: self.resize_side / width, - false_fn=lambda: self.resize_side / height,) + + height = ( + tf.cast(shape[0], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[1], dtype=tf.float32) + ) + width = ( + tf.cast(shape[1], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[2], dtype=tf.float32) + ) + scale = tf.cond( + pred=tf.greater(height, width), + true_fn=lambda: self.resize_side / width, + false_fn=lambda: self.resize_side / height, + ) scale = tf.cast(scale, dtype=tf.float32) - new_height = tf.cast(tf.math.rint(height*scale), dtype=tf.int32) - new_width = tf.cast(tf.math.rint(width*scale), dtype=tf.int32) + new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32) + new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32) - if self.subpixels=='BGR' and self.data_format=='channels_first': + if self.subpixels == "BGR" and self.data_format == "channels_first": # 'RGB'->'BGR' - image = tf.cond(tf.equal(tf.rank(image), 3), - lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), - lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1)) - elif self.subpixels=='BGR': + image = tf.cond( + tf.equal(tf.rank(image), 3), + lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), + lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1), + ) + elif self.subpixels == "BGR": # 'RGB'->'BGR' image = image[..., ::-1] image = tf.expand_dims(image, 0) - image = tf.image.resize(image, [new_height, new_width], - method=self.resize_method) - image = tf.squeeze(image) + image = tf.image.resize(image, [new_height, new_width], method=self.resize_method) + image = tf.squeeze(image) shape = tf.shape(input=image) if self.random_crop: - y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height +1), - dtype=tf.dtypes.int32) - x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width +1), - dtype=tf.dtypes.int32) + y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height + 1), dtype=tf.dtypes.int32) + x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width + 1), dtype=tf.dtypes.int32) else: y0 = (shape[0] - self.height) // 2 x0 = (shape[1] - self.width) // 2 @@ -257,14 +292,14 @@ def __call__(self, sample): image = (image - means) * self.scale return (image, label) -@transform_registry(transform_type="BilinearImagenet", \ - process="preprocess", framework="tensorflow") -class BilinearImagenetTransform(BaseTransform): # pragma: no cover + +@transform_registry(transform_type="BilinearImagenet", process="preprocess", framework="tensorflow") +class BilinearImagenetTransform(BaseTransform): # pragma: no cover """Combination of a series of transforms which is applicable to images in Imagenet. Args: height: Height of the result - width:Width of the result + width:Width of the result central_fraction(float, default=0.875):fraction of size to crop mean_value(list, default=[0.0,0.0,0.0]):means for each channel scale(float, default=1.0):std value @@ -273,8 +308,7 @@ class BilinearImagenetTransform(BaseTransform): # pragma: no cover tuple of processed image and label """ - def __init__(self, height, width, central_fraction=0.875, - mean_value=[0.0,0.0,0.0], scale=1.0): + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): """Initialize `BilinearImagenetTransform` class.""" self.height = height self.width = width @@ -295,8 +329,7 @@ def __call__(self, sample): if self.height and self.width: # Resize the image to the specified height and width. image = tf.expand_dims(image, 0) - image = tf.image.resize(image, [self.height, self.width], \ - method=tf.image.ResizeMethod.BILINEAR) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) image = tf.squeeze(image, [0]) image = tf.subtract(image, 0.5) @@ -305,14 +338,16 @@ def __call__(self, sample): image = (image - means) * self.scale return (image, label) -@transform_registry(transform_type="BilinearImagenet", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") -class OnnxBilinearImagenetTransform(BaseTransform): # pragma: no cover + +@transform_registry( + transform_type="BilinearImagenet", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) +class OnnxBilinearImagenetTransform(BaseTransform): # pragma: no cover """Combination of a series of transforms which is applicable to images in Imagenet. Args: height: Height of the result - width:Width of the result + width:Width of the result central_fraction(float, default=0.875):fraction of size to crop mean_value(list, default=[0.0,0.0,0.0]):means for each channel scale(float, default=1.0):std value @@ -321,8 +356,7 @@ class OnnxBilinearImagenetTransform(BaseTransform): # pragma: no cover tuple of processed image and label """ - def __init__(self, height, width, central_fraction=0.875, - mean_value=[0.0,0.0,0.0], scale=1.0): + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): """Initialize `OnnxBilinearImagenetTransform` class.""" self.height = height self.width = width @@ -334,7 +368,7 @@ def __call__(self, sample): """Convert `OnnxBilinearImagenetTransform` feature.""" image, label = sample if isinstance(image, np.ndarray): - image = image.astype('float32') / 255. + image = image.astype("float32") / 255.0 img_shape = image.shape depth = img_shape[2] img_hd = float(img_shape[0]) @@ -345,11 +379,11 @@ def __call__(self, sample): bbox_h_size = img_shape[0] - bbox_h_start * 2 bbox_w_size = img_shape[1] - bbox_w_start * 2 - image = image[bbox_h_start:bbox_h_start+bbox_h_size, bbox_w_start:bbox_w_start+bbox_w_size] + image = image[bbox_h_start : bbox_h_start + bbox_h_size, bbox_w_start : bbox_w_start + bbox_w_size] if self.height and self.width: image = cv2.resize(image, (self.width, self.height), interpolation=cv2.INTER_LINEAR) - + image = np.subtract(image, 0.5) image = np.multiply(image, 2.0) means = np.broadcast_to(self.mean_value, image.shape) @@ -357,14 +391,16 @@ def __call__(self, sample): image = image.astype(np.float32) return (image, label) -@transform_registry(transform_type="ResizeCropImagenet", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") -class ONNXResizeCropImagenetTransform(BaseTransform): # pragma: no cover + +@transform_registry( + transform_type="ResizeCropImagenet", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) +class ONNXResizeCropImagenetTransform(BaseTransform): # pragma: no cover """Combination of a series of transforms which is applicable to images in Imagenet. Args: height: Height of the result - width:Width of the result + width:Width of the result central_fraction(float, default=0.875):fraction of size to crop mean_value(list, default=[0.0,0.0,0.0]):means for each channel scale(float, default=1.0):std value @@ -373,9 +409,18 @@ class ONNXResizeCropImagenetTransform(BaseTransform): # pragma: no cover tuple of processed image and label """ - def __init__(self, height, width, random_crop=False, resize_side=256, \ - mean_value=[0.0,0.0,0.0], std_value=[0.229, 0.224, 0.225], \ - resize_method='bilinear', data_format='channels_last', subpixels='RGB'): + def __init__( + self, + height, + width, + random_crop=False, + resize_side=256, + mean_value=[0.0, 0.0, 0.0], + std_value=[0.229, 0.224, 0.225], + resize_method="bilinear", + data_format="channels_last", + subpixels="RGB", + ): """Initialize `ONNXResizeCropImagenetTransform` class.""" self.height = height self.width = width @@ -394,14 +439,14 @@ def __call__(self, sample): image, label = sample height, width = image.shape[0], image.shape[1] scale = self.resize_side / width if height > width else self.resize_side / height - new_height = int(height*scale) - new_width = int(width*scale) + new_height = int(height * scale) + new_width = int(width * scale) image = cv2.resize(image, (new_height, new_width)) - image = image / 255. + image = image / 255.0 shape = image.shape if self.random_crop: - y0 = np.random.randint(low=0, high=(shape[0] - self.height +1)) - x0 = np.random.randint(low=0, high=(shape[1] - self.width +1)) + y0 = np.random.randint(low=0, high=(shape[0] - self.height + 1)) + x0 = np.random.randint(low=0, high=(shape[1] - self.width + 1)) else: y0 = (shape[0] - self.height) // 2 x0 = (shape[1] - self.width) // 2 @@ -409,13 +454,15 @@ def __call__(self, sample): image = np.array([image]) image = np.repeat(image, 3, axis=0) image = image.transpose(1, 2, 0) - image = image[y0:y0+self.height, x0:x0+self.width, :] - image = ((image - self.mean_value)/self.std_value).astype(np.float32) + image = image[y0 : y0 + self.height, x0 : x0 + self.width, :] + image = ((image - self.mean_value) / self.std_value).astype(np.float32) return (image.transpose(2, 0, 1), label) -@transform_registry(transform_type="ResizeWithAspectRatio", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") -class ResizeWithAspectRatio(BaseTransform): # pragma: no cover + +@transform_registry( + transform_type="ResizeWithAspectRatio", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) +class ResizeWithAspectRatio(BaseTransform): # pragma: no cover """Resize the image with aspect ratio. Returns: @@ -434,8 +481,8 @@ def __call__(self, sample): (img, label) = sample assert len(img.shape) == 3 height, width, _ = img.shape - new_height = int(100. * self.height / self.scale) - new_width = int(100. * self.width / self.scale) + new_height = int(100.0 * self.height / self.scale) + new_width = int(100.0 * self.width / self.scale) if height > width: w = new_width h = int(new_height * height / width) diff --git a/neural_compressor/data/transforms/postprocess.py b/neural_compressor/data/transforms/postprocess.py index 605417a73ab..3521df4fb29 100644 --- a/neural_compressor/data/transforms/postprocess.py +++ b/neural_compressor/data/transforms/postprocess.py @@ -16,11 +16,12 @@ # limitations under the License. """Common Postprocess.""" + class Postprocess(object): -# class Transform(object): + # class Transform(object): """Just collect the infos to construct a Postprocess.""" - def __init__(self, postprocess_cls, name='user_postprocess', **kwargs): + def __init__(self, postprocess_cls, name="user_postprocess", **kwargs): """Initialize `Postprocess` class.""" self.postprocess_cls = postprocess_cls self.name = name diff --git a/neural_compressor/data/transforms/tokenization.py b/neural_compressor/data/transforms/tokenization.py index b16160800bc..53814ad5bdc 100644 --- a/neural_compressor/data/transforms/tokenization.py +++ b/neural_compressor/data/transforms/tokenization.py @@ -32,17 +32,20 @@ # ============================================================================== """Tokenization helper classes.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from neural_compressor.utils.utility import LazyImport +from __future__ import absolute_import, division, print_function + import collections import re import unicodedata + import six -tf = LazyImport('tensorflow') -def convert_to_unicode(text): # pragma: no cover +from neural_compressor.utils.utility import LazyImport + +tf = LazyImport("tensorflow") + + +def convert_to_unicode(text): # pragma: no cover """Convert `text` to Unicode (if it's not already), assuming utf-8 input.""" if six.PY3: if isinstance(text, str): @@ -54,13 +57,14 @@ def convert_to_unicode(text): # pragma: no cover elif six.PY2: if isinstance(text, str): return text.decode("utf-8", "ignore") - elif isinstance(text, unicode): # pylint: disable=undefined-variable # noqa: F821 + elif isinstance(text, unicode): # pylint: disable=undefined-variable # noqa: F821 return text else: raise ValueError("Unsupported string type: %s" % (type(text))) else: raise ValueError("Not running on Python2 or Python 3?") + def load_vocab(vocab_file): """Load a vocabulary file into a dictionary.""" vocab = collections.OrderedDict() @@ -75,6 +79,7 @@ def load_vocab(vocab_file): index += 1 return vocab + def convert_by_vocab(vocab, items): """Convert a sequence of [tokens|ids] using the vocab.""" output = [] @@ -82,6 +87,7 @@ def convert_by_vocab(vocab, items): output.append(vocab[item]) return output + def whitespace_tokenize(text): """Run basic whitespace cleaning and splitting on a piece of text.""" text = text.strip() @@ -195,7 +201,7 @@ def _tokenize_chinese_chars(self, text): output = [] for char in text: cp = ord(char) - if self._is_chinese_char(cp): # pragma: no cover + if self._is_chinese_char(cp): # pragma: no cover output.append(" ") output.append(char) output.append(" ") @@ -213,14 +219,16 @@ def _is_chinese_char(self, cp): # as is Japanese Hiragana and Katakana. Those alphabets are used to write # space-separated words, so they are not treated specially and handled # like the all of the other languages. - if ((cp >= 0x4E00 and cp <= 0x9FFF) or # - (cp >= 0x3400 and cp <= 0x4DBF) or # - (cp >= 0x20000 and cp <= 0x2A6DF) or # - (cp >= 0x2A700 and cp <= 0x2B73F) or # - (cp >= 0x2B740 and cp <= 0x2B81F) or # - (cp >= 0x2B820 and cp <= 0x2CEAF) or - (cp >= 0xF900 and cp <= 0xFAFF) or # - (cp >= 0x2F800 and cp <= 0x2FA1F)): # + if ( + (cp >= 0x4E00 and cp <= 0x9FFF) + or (cp >= 0x3400 and cp <= 0x4DBF) # + or (cp >= 0x20000 and cp <= 0x2A6DF) # + or (cp >= 0x2A700 and cp <= 0x2B73F) # + or (cp >= 0x2B740 and cp <= 0x2B81F) # + or (cp >= 0x2B820 and cp <= 0x2CEAF) # + or (cp >= 0xF900 and cp <= 0xFAFF) + or (cp >= 0x2F800 and cp <= 0x2FA1F) # + ): # return True return False @@ -230,7 +238,7 @@ def _clean_text(self, text): output = [] for char in text: cp = ord(char) - if cp == 0 or cp == 0xfffd or _is_control(char): + if cp == 0 or cp == 0xFFFD or _is_control(char): continue if _is_whitespace(char): output.append(" ") @@ -274,7 +282,7 @@ def tokenize(self, text): output_tokens = [] for token in whitespace_tokenize(text): chars = list(token) - if len(chars) > self.max_input_chars_per_word: # pragma: no cover + if len(chars) > self.max_input_chars_per_word: # pragma: no cover output_tokens.append(self.unk_token) continue @@ -304,6 +312,7 @@ def tokenize(self, text): output_tokens.extend(sub_tokens) return output_tokens + def _is_whitespace(char): """Check whether `chars` is a whitespace character.""" # \t, \n, and \r are technically contorl characters but we treat them @@ -311,11 +320,12 @@ def _is_whitespace(char): if char == " " or char == "\t" or char == "\n" or char == "\r": return True cat = unicodedata.category(char) - if cat == "Zs": # pragma: no cover + if cat == "Zs": # pragma: no cover return True return False -def _is_control(char): # pragma: no cover + +def _is_control(char): # pragma: no cover """Check whether `chars` is a control character.""" # These are technically control characters but we count them as whitespace # characters. @@ -326,15 +336,15 @@ def _is_control(char): # pragma: no cover return True return False -def _is_punctuation(char): # pragma: no cover + +def _is_punctuation(char): # pragma: no cover """Check whether `chars` is a punctuation character.""" cp = ord(char) # We treat all non-letter/number ASCII as punctuation. # Characters such as "^", "$", and "`" are not in the Unicode # Punctuation class but we treat them as punctuation anyways, for # consistency. - if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or - (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + if (cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126): return True cat = unicodedata.category(char) if cat.startswith("P"): diff --git a/neural_compressor/data/transforms/transform.py b/neural_compressor/data/transforms/transform.py index e62a35d2eb9..3827babb603 100644 --- a/neural_compressor/data/transforms/transform.py +++ b/neural_compressor/data/transforms/transform.py @@ -17,17 +17,20 @@ # ============================================================================== """Neural Compressor built-in Transforms on multiple framework backends.""" -import numpy as np import collections from abc import abstractmethod -from neural_compressor.utils.utility import LazyImport, singleton + +import numpy as np + from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport, singleton + +torchvision = LazyImport("torchvision") +torch = LazyImport("torch") +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +cv2 = LazyImport("cv2") -torchvision = LazyImport('torchvision') -torch = LazyImport('torch') -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -cv2 = LazyImport('cv2') class Transforms(object): """INC supports built-in preprocessing, postprocessing and general methods on different framework backends. @@ -44,12 +47,14 @@ def __init__(self, process, concat_general=True): concat_general (Boolean): users can use general transform in both preprocess or postprocess if set True """ - transform_map = {"preprocess": self._get_preprocess, - "postprocess": self._get_postprocess, - "general": self._get_general, } + transform_map = { + "preprocess": self._get_preprocess, + "postprocess": self._get_postprocess, + "general": self._get_general, + } self.transforms = transform_map[process]() if concat_general: - self.transforms.update(transform_map['general']()) + self.transforms.update(transform_map["general"]()) @abstractmethod def _get_preprocess(self): @@ -115,14 +120,10 @@ def _get_preprocess(self): preprocess: a dict including all the registered preprocess methods """ preprocess = { - 'ToTensor': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.ToTensor), - 'CenterCrop': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.CenterCrop), - 'RandomHorizontalFlip': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.RandomFlipLeftRight), - 'RandomVerticalFlip': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.RandomFlipTopBottom), + "ToTensor": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.ToTensor), + "CenterCrop": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.CenterCrop), + "RandomHorizontalFlip": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.RandomFlipLeftRight), + "RandomVerticalFlip": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.RandomFlipTopBottom), } preprocess.update(MXNET_TRANSFORMS["preprocess"]) return preprocess @@ -144,9 +145,8 @@ def _get_general(self): general: a dict including all the registered general methods """ general = { - 'Compose': mx.gluon.data.vision.transforms.Compose, - 'Cast': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.Cast), + "Compose": mx.gluon.data.vision.transforms.Compose, + "Cast": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.Cast), } general.update(MXNET_TRANSFORMS["general"]) return general @@ -162,22 +162,14 @@ def _get_preprocess(self): preprocess: a dict including all the registered preprocess methods """ preprocess = { - "ToTensor": PytorchMxnetWrapFunction( - torchvision.transforms.ToTensor), - "ToPILImage": PytorchMxnetWrapFunction( - torchvision.transforms.ToPILImage), - "CenterCrop": PytorchMxnetWrapFunction( - torchvision.transforms.CenterCrop), - "RandomCrop": PytorchMxnetWrapFunction( - torchvision.transforms.RandomCrop), - "RandomHorizontalFlip": PytorchMxnetWrapFunction( - torchvision.transforms.RandomHorizontalFlip), - "RandomVerticalFlip": PytorchMxnetWrapFunction( - torchvision.transforms.RandomVerticalFlip), - "Pad": PytorchMxnetWrapFunction( - torchvision.transforms.Pad), - "ColorJitter": PytorchMxnetWrapFunction( - torchvision.transforms.ColorJitter), + "ToTensor": PytorchMxnetWrapFunction(torchvision.transforms.ToTensor), + "ToPILImage": PytorchMxnetWrapFunction(torchvision.transforms.ToPILImage), + "CenterCrop": PytorchMxnetWrapFunction(torchvision.transforms.CenterCrop), + "RandomCrop": PytorchMxnetWrapFunction(torchvision.transforms.RandomCrop), + "RandomHorizontalFlip": PytorchMxnetWrapFunction(torchvision.transforms.RandomHorizontalFlip), + "RandomVerticalFlip": PytorchMxnetWrapFunction(torchvision.transforms.RandomVerticalFlip), + "Pad": PytorchMxnetWrapFunction(torchvision.transforms.Pad), + "ColorJitter": PytorchMxnetWrapFunction(torchvision.transforms.ColorJitter), } preprocess.update(PYTORCH_TRANSFORMS["preprocess"]) return preprocess @@ -204,6 +196,7 @@ def _get_general(self): general.update(PYTORCH_TRANSFORMS["general"]) return general + class ONNXRTQLTransforms(Transforms): """Onnxrt_qlinearops Transforms subclass.""" @@ -237,6 +230,7 @@ def _get_general(self): general.update(ONNXRT_QL_TRANSFORMS["general"]) return general + class ONNXRTITTransforms(Transforms): """Onnxrt_integerops Transforms subclass.""" @@ -271,16 +265,18 @@ def _get_general(self): return general -framework_transforms = {"tensorflow": TensorflowTransforms, - "tensorflow_itex": TensorflowTransforms, - "mxnet": MXNetTransforms, - "pytorch": PyTorchTransforms, - "pytorch_ipex": PyTorchTransforms, - "pytorch_fx": PyTorchTransforms, - "onnxrt_qlinearops": ONNXRTQLTransforms, - "onnxrt_integerops": ONNXRTITTransforms, - "onnxruntime": ONNXRTQLTransforms, - "onnxrt_qdq": ONNXRTQLTransforms} +framework_transforms = { + "tensorflow": TensorflowTransforms, + "tensorflow_itex": TensorflowTransforms, + "mxnet": MXNetTransforms, + "pytorch": PyTorchTransforms, + "pytorch_ipex": PyTorchTransforms, + "pytorch_fx": PyTorchTransforms, + "onnxrt_qlinearops": ONNXRTQLTransforms, + "onnxrt_integerops": ONNXRTITTransforms, + "onnxruntime": ONNXRTQLTransforms, + "onnxrt_qdq": ONNXRTQLTransforms, +} # transform registry will register transforms into these dicts TENSORFLOW_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} @@ -290,17 +286,19 @@ def _get_general(self): ONNXRT_QL_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} ONNXRT_IT_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} -registry_transforms = {"tensorflow": TENSORFLOW_TRANSFORMS, - "tensorflow_itex": TENSORFLOW_ITEX_TRANSFORMS, - "mxnet": MXNET_TRANSFORMS, - "pytorch": PYTORCH_TRANSFORMS, - "pytorch_ipex": PYTORCH_TRANSFORMS, - "pytorch_fx": PYTORCH_TRANSFORMS, - "onnxrt_qlinearops": ONNXRT_QL_TRANSFORMS, - "onnxrt_qdq": ONNXRT_QL_TRANSFORMS, - "onnxruntime": ONNXRT_QL_TRANSFORMS, - "onnxrt_integerops": ONNXRT_IT_TRANSFORMS, - } +registry_transforms = { + "tensorflow": TENSORFLOW_TRANSFORMS, + "tensorflow_itex": TENSORFLOW_ITEX_TRANSFORMS, + "mxnet": MXNET_TRANSFORMS, + "pytorch": PYTORCH_TRANSFORMS, + "pytorch_ipex": PYTORCH_TRANSFORMS, + "pytorch_fx": PYTORCH_TRANSFORMS, + "onnxrt_qlinearops": ONNXRT_QL_TRANSFORMS, + "onnxrt_qdq": ONNXRT_QL_TRANSFORMS, + "onnxruntime": ONNXRT_QL_TRANSFORMS, + "onnxrt_integerops": ONNXRT_IT_TRANSFORMS, +} + class TRANSFORMS(object): """Transforms collection class. @@ -316,12 +314,20 @@ def __init__(self, framework, process): framework (str): different framework type like tensorflow, pytorch and so on process (str): process type, the value can be preprocess, postprocess or general """ - assert framework in ("tensorflow", "tensorflow_itex", "keras", "onnxruntime", \ - "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", \ - "onnxrt_qlinearops", "onnxrt_integerops", "mxnet"), \ - "framework support tensorflow pytorch mxnet onnxrt" - assert process in ("preprocess", "postprocess", - "general"), "process support preprocess postprocess, general" + assert framework in ( + "tensorflow", + "tensorflow_itex", + "keras", + "onnxruntime", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxrt_qdq", + "onnxrt_qlinearops", + "onnxrt_integerops", + "mxnet", + ), "framework support tensorflow pytorch mxnet onnxrt" + assert process in ("preprocess", "postprocess", "general"), "process support preprocess postprocess, general" self.transforms = framework_transforms[framework](process).transforms self.framework = framework self.process = process @@ -335,8 +341,7 @@ def __getitem__(self, transform_type): Returns: Transforms: the registered Transforms """ - assert transform_type in self.transforms.keys(), "transform support {}".\ - format(self.transforms.keys()) + assert transform_type in self.transforms.keys(), "transform support {}".format(self.transforms.keys()) return self.transforms[transform_type] def register(self, name, transform_cls): @@ -346,8 +351,9 @@ def register(self, name, transform_cls): name (str): process name transform_cls (class): process function wrapper class """ - assert name not in registry_transforms[self.framework][self.process].keys(), \ - 'register transform name already exists.' + assert ( + name not in registry_transforms[self.framework][self.process].keys() + ), "register transform name already exists." registry_transforms[self.framework][self.process].update({name: transform_cls}) @@ -363,8 +369,9 @@ def transform_registry(transform_type, process, framework): Returns: cls: The class of register. """ + def decorator_transform(cls): - for single_framework in [fwk.strip() for fwk in framework.split(',')]: + for single_framework in [fwk.strip() for fwk in framework.split(",")]: assert single_framework in [ "tensorflow", "tensorflow_itex", @@ -378,9 +385,10 @@ def decorator_transform(cls): "onnxruntime", ], "The framework support tensorflow mxnet pytorch onnxrt" if transform_type in registry_transforms[single_framework][process].keys(): - raise ValueError('Cannot have two transforms with the same name') + raise ValueError("Cannot have two transforms with the same name") registry_transforms[single_framework][process][transform_type] = cls return cls + return decorator_transform @@ -412,6 +420,7 @@ def __call__(self, **kwargs): """ return TensorflowTransform(self.transform_func, **kwargs) + class TensorflowTransform(BaseTransform): """Tensorflow transform class, the subclass of BaseTransform.""" @@ -434,6 +443,7 @@ def __call__(self, sample): image = self.transform_func(image, **self.kwargs) return (image, label) + class PytorchMxnetWrapFunction(object): """Pytorch and MXNet wrapper function class.""" @@ -453,6 +463,7 @@ def __call__(self, **args): """ return PytorchMxnetTransform(self.transform_func(**args)) + class PytorchMxnetTransform(BaseTransform): """Pytorch and Mxnet transform class, the subclass of BaseTransform.""" @@ -474,40 +485,47 @@ def __call__(self, sample): image = self.transform_func(image) return (image, label) + interpolation_map = { - 'nearest': cv2.INTER_NEAREST, - 'bilinear': cv2.INTER_LINEAR, - 'bicubic': cv2.INTER_CUBIC, + "nearest": cv2.INTER_NEAREST, + "bilinear": cv2.INTER_LINEAR, + "bicubic": cv2.INTER_CUBIC, } interpolation_pytorch_map = { - 'nearest': 0, - 'bilinear': 2, - 'bicubic': 3, + "nearest": 0, + "bilinear": 2, + "bicubic": 3, } interpolation_mxnet_map = { - 'nearest': 0, - 'bilinear': 1, - 'bicubic': 2, + "nearest": 0, + "bilinear": 1, + "bicubic": 2, } + def get_torchvision_map(interpolation): """Get torchvision interpolation map.""" try: from torchvision.transforms.functional import InterpolationMode + interpolation_torchvision_map = { 0: InterpolationMode.NEAREST, 2: InterpolationMode.BILINEAR, 3: InterpolationMode.BICUBIC, } return interpolation_torchvision_map[interpolation] - except: # pragma: no cover + except: # pragma: no cover return interpolation -@transform_registry(transform_type="Compose", process="general", \ - framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ - tensorflow_itex") + +@transform_registry( + transform_type="Compose", + process="general", + framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ + tensorflow_itex", +) class ComposeTransform(BaseTransform): """Composes several transforms together. @@ -528,8 +546,8 @@ def __call__(self, sample): sample = transform(sample) return sample -@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ - framework="pytorch") + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", framework="pytorch") class CropToBoundingBox(BaseTransform): """Crops an image to a specified bounding box. @@ -554,15 +572,12 @@ def __call__(self, sample): """Call torchvision.transforms.functional.crop.""" image, label = sample image = torchvision.transforms.functional.crop( - image, - self.offset_height, - self.offset_width, - self.target_height, - self.target_width) + image, self.offset_height, self.offset_width, self.target_height, self.target_width + ) return (image, label) -@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ - framework="mxnet") + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", framework="mxnet") class MXNetCropToBoundingBox(CropToBoundingBox): """Crops an image to a specified bounding box. @@ -579,16 +594,13 @@ class MXNetCropToBoundingBox(CropToBoundingBox): def __call__(self, sample): """Call mx.image.fixed_crop.""" image, label = sample - image = mx.image.fixed_crop( - image, - self.offset_height, - self.offset_width, - self.target_height, - self.target_width) + image = mx.image.fixed_crop(image, self.offset_height, self.offset_width, self.target_height, self.target_width) return (image, label) -@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="CropToBoundingBox", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class ONNXRTCropToBoundingBox(CropToBoundingBox): """Crops an image to a specified bounding box. @@ -605,12 +617,15 @@ class ONNXRTCropToBoundingBox(CropToBoundingBox): def __call__(self, sample): """Crop the image in sample.""" image, label = sample - image = image[self.offset_height : self.offset_height+self.target_height, - self.offset_width : self.offset_width+self.target_width, :] + image = image[ + self.offset_height : self.offset_height + self.target_height, + self.offset_width : self.offset_width + self.target_width, + :, + ] return (image, label) -@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", framework="tensorflow, tensorflow_itex") class TensorflowCropToBoundingBox(CropToBoundingBox): """Crops an image to a specified bounding box. @@ -628,15 +643,23 @@ def __call__(self, sample): """Crop the image in sample.""" image, label = sample if isinstance(image, tf.Tensor): - image = tf.image.crop_to_bounding_box(image, self.offset_height, - self.offset_width, self.target_height, self.target_width) + image = tf.image.crop_to_bounding_box( + image, self.offset_height, self.offset_width, self.target_height, self.target_width + ) else: - image = image[self.offset_height : self.offset_height+self.target_height, - self.offset_width : self.offset_width+self.target_width, :] + image = image[ + self.offset_height : self.offset_height + self.target_height, + self.offset_width : self.offset_width + self.target_width, + :, + ] return (image, label) -@transform_registry(transform_type="ResizeWithRatio", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops, pytorch, mxnet") + +@transform_registry( + transform_type="ResizeWithRatio", + process="preprocess", + framework="onnxrt_qlinearops, onnxrt_integerops, pytorch, mxnet", +) class ResizeWithRatio(BaseTransform): """Resize image with aspect ratio and pad it to max shape(optional). @@ -680,20 +703,26 @@ def __call__(self, sample): if self.padding: h, w = image.shape[:2] - pad_param = [[(self.max_dim-h)//2, self.max_dim-h-(self.max_dim-h)//2], - [(self.max_dim-w)//2, self.max_dim-w-(self.max_dim-w)//2], - [0, 0]] + pad_param = [ + [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2], + [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2], + [0, 0], + ] if not isinstance(bbox, np.ndarray): bbox = np.array(bbox) resized_box = bbox * [height, width, height, width] * scale - moved_box = (resized_box + [(self.max_dim-h)//2, (self.max_dim-w)//2, \ - (self.max_dim-h)//2, (self.max_dim-w)//2]) + moved_box = resized_box + [ + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + ] bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] - image = np.pad(image, pad_param, mode='constant', constant_values=self.constant_value) + image = np.pad(image, pad_param, mode="constant", constant_values=self.constant_value) return image, (bbox, str_label, int_label, image_id) -@transform_registry(transform_type="ResizeWithRatio", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="ResizeWithRatio", process="preprocess", framework="tensorflow, tensorflow_itex") class TensorflowResizeWithRatio(BaseTransform): """Resize image with aspect ratio and pad it to max shape(optional). @@ -728,27 +757,33 @@ def __call__(self, sample): width = tf.cast(shape[1], dtype=tf.float32) scale = 1 if self.min_dim: - scale = tf.maximum(1., tf.cast(self.min_dim / tf.math.minimum(height, width),\ - dtype=tf.float32)) + scale = tf.maximum(1.0, tf.cast(self.min_dim / tf.math.minimum(height, width), dtype=tf.float32)) if self.max_dim: image_max = tf.cast(tf.maximum(height, width), dtype=tf.float32) - scale = tf.cond(pred=tf.greater(tf.math.round(image_max * scale), self.max_dim), \ - true_fn=lambda: self.max_dim / image_max, - false_fn=lambda: scale) - image = tf.image.resize(image, (tf.math.round(height * scale), \ - tf.math.round(width * scale))) + scale = tf.cond( + pred=tf.greater(tf.math.round(image_max * scale), self.max_dim), + true_fn=lambda: self.max_dim / image_max, + false_fn=lambda: scale, + ) + image = tf.image.resize(image, (tf.math.round(height * scale), tf.math.round(width * scale))) bbox, str_label, int_label, image_id = label if self.padding: shape = tf.shape(input=image) h = tf.cast(shape[0], dtype=tf.float32) w = tf.cast(shape[1], dtype=tf.float32) - pad_param = [[(self.max_dim-h)//2, self.max_dim-h-(self.max_dim-h)//2], - [(self.max_dim-w)//2, self.max_dim-w-(self.max_dim-w)//2], - [0, 0]] + pad_param = [ + [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2], + [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2], + [0, 0], + ] resized_box = bbox * [height, width, height, width] * scale - moved_box = (resized_box + [(self.max_dim-h)//2, (self.max_dim-w)//2, \ - (self.max_dim-h)//2, (self.max_dim-w)//2]) + moved_box = resized_box + [ + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + ] bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] image = tf.pad(image, pad_param, constant_values=self.constant_value) else: @@ -756,8 +791,8 @@ def __call__(self, sample): image, (bbox, str_label, int_label, image_id) = transform(sample) return image, (bbox, str_label, int_label, image_id) -@transform_registry(transform_type="Transpose", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="Transpose", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class Transpose(BaseTransform): """Transpose image according to perm. @@ -779,8 +814,8 @@ def __call__(self, sample): image = np.transpose(image, axes=self.perm) return (image, label) -@transform_registry(transform_type="Transpose", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="Transpose", process="preprocess", framework="tensorflow, tensorflow_itex") class TensorflowTranspose(Transpose): """Transpose image according to perm. @@ -801,6 +836,7 @@ def __call__(self, sample): image = np.transpose(image, axes=self.perm) return (image, label) + @transform_registry(transform_type="Transpose", process="preprocess", framework="mxnet") class MXNetTranspose(Transpose): """Transpose image according to perm. @@ -819,6 +855,7 @@ def __call__(self, sample): image = mx.ndarray.transpose(image, self.perm) return (image, label) + @transform_registry(transform_type="Transpose", process="preprocess", framework="pytorch") class PyTorchTranspose(Transpose): """Transpose image according to perm. @@ -837,8 +874,10 @@ def __call__(self, sample): image = image.permute(self.perm) return (image, label) -@transform_registry(transform_type="RandomVerticalFlip", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="RandomVerticalFlip", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class RandomVerticalFlip(BaseTransform): """Vertically flip the given image randomly. @@ -853,8 +892,8 @@ def __call__(self, sample): image = np.flipud(image) return (image, label) -@transform_registry(transform_type="RandomVerticalFlip", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="RandomVerticalFlip", process="preprocess", framework="tensorflow, tensorflow_itex") class TensorflowRandomVerticalFlip(BaseTransform): """Vertically flip the given image randomly. @@ -872,8 +911,10 @@ def __call__(self, sample): image = np.flipud(image) return (image, label) -@transform_registry(transform_type="RandomHorizontalFlip", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="RandomHorizontalFlip", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class RandomHorizontalFlip(BaseTransform): """Horizontally flip the given image randomly. @@ -888,8 +929,10 @@ def __call__(self, sample): image = np.fliplr(image) return (image, label) -@transform_registry(transform_type="RandomHorizontalFlip", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry( + transform_type="RandomHorizontalFlip", process="preprocess", framework="tensorflow, tensorflow_itex" +) class TensorflowRandomHorizontalFlip(BaseTransform): """Horizontally flip the given image randomly. @@ -907,9 +950,13 @@ def __call__(self, sample): image = np.fliplr(image) return (image, label) -@transform_registry(transform_type="ToArray", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ - tensorflow_itex, pytorch, mxnet") + +@transform_registry( + transform_type="ToArray", + process="preprocess", + framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ + tensorflow_itex, pytorch, mxnet", +) class ToArray(BaseTransform): """Convert PIL Image or NDArray to numpy array. @@ -920,23 +967,37 @@ class ToArray(BaseTransform): def __call__(self, sample): """Convert image in sample to numpy array.""" from PIL import Image + image, label = sample if isinstance(image, Image.Image): image = np.array(image) - elif isinstance(image, mx.ndarray.NDArray): # pylint: disable=no-member + elif isinstance(image, mx.ndarray.NDArray): # pylint: disable=no-member image = image.asnumpy() else: raise ValueError("Unknown image type!") return (image, label) -np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64, - 'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32, - 'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32, - 'float16': np.float16, 'float64': np.float64, 'bool': bool, - 'string': str, 'complex128': np.complex128, 'int16': np.int16} -@transform_registry(transform_type="Cast", process="general", \ - framework="tensorflow, tensorflow_itex") +np_dtype_map = { + "int8": np.int8, + "uint8": np.uint8, + "complex64": np.complex64, + "uint16": np.uint16, + "int32": np.int32, + "uint32": np.uint32, + "int64": np.int64, + "uint64": np.uint64, + "float32": np.float32, + "float16": np.float16, + "float64": np.float64, + "bool": bool, + "string": str, + "complex128": np.complex128, + "int16": np.int16, +} + + +@transform_registry(transform_type="Cast", process="general", framework="tensorflow, tensorflow_itex") class CastTFTransform(BaseTransform): """Convert image to given dtype. @@ -947,15 +1008,27 @@ class CastTFTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, dtype='float32'): + def __init__(self, dtype="float32"): """Initialize `CastTFTransform` class.""" - self.tf_dtype_map = {'int16': tf.int16, 'uint8': tf.uint8, 'uint16': tf.uint16, - 'uint32':tf.uint32, 'uint64': tf.uint64, 'complex64': tf.complex64, - 'int32': tf.int32, 'int64':tf.int64, 'float32': tf.float32, - 'float16': tf.float16, 'float64':tf.float64, 'bool': tf.bool, - 'string': tf.string, 'int8': tf.int8, 'complex128': tf.complex128} + self.tf_dtype_map = { + "int16": tf.int16, + "uint8": tf.uint8, + "uint16": tf.uint16, + "uint32": tf.uint32, + "uint64": tf.uint64, + "complex64": tf.complex64, + "int32": tf.int32, + "int64": tf.int64, + "float32": tf.float32, + "float16": tf.float16, + "float64": tf.float64, + "bool": tf.bool, + "string": tf.string, + "int8": tf.int8, + "complex128": tf.complex128, + } - assert dtype in self.tf_dtype_map.keys(), 'Unknown dtype' + assert dtype in self.tf_dtype_map.keys(), "Unknown dtype" self.dtype = dtype def __call__(self, sample): @@ -967,8 +1040,8 @@ def __call__(self, sample): image = image.astype(np_dtype_map[self.dtype]) return (image, label) -@transform_registry(transform_type="Cast", process="general", - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="Cast", process="general", framework="onnxrt_qlinearops, onnxrt_integerops") class CastONNXTransform(BaseTransform): """Convert image to given dtype. @@ -979,9 +1052,9 @@ class CastONNXTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, dtype='float32'): + def __init__(self, dtype="float32"): """Initialize `CastONNXTransform` class.""" - assert dtype in np_dtype_map.keys(), 'Unknown dtype' + assert dtype in np_dtype_map.keys(), "Unknown dtype" self.dtype = dtype def __call__(self, sample): @@ -990,6 +1063,7 @@ def __call__(self, sample): image = image.astype(np_dtype_map[self.dtype]) return (image, label) + @transform_registry(transform_type="Cast", process="general", framework="pytorch") class CastPyTorchTransform(BaseTransform): """Convert image to given dtype. @@ -1001,13 +1075,23 @@ class CastPyTorchTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, dtype='float32'): + def __init__(self, dtype="float32"): """Initialize `CastPyTorchTransform` class.""" - dtype_map = {'int8': torch.int8, 'uint8': torch.uint8, 'complex128': torch.complex128, - 'int32':torch.int32, 'int64':torch.int64, 'complex64': torch.complex64, - 'bfloat16':torch.bfloat16, 'float64':torch.float64, 'bool': torch.bool, - 'float16':torch.float16, 'int16':torch.int16, 'float32': torch.float32} - assert dtype in dtype_map.keys(), 'Unknown dtype' + dtype_map = { + "int8": torch.int8, + "uint8": torch.uint8, + "complex128": torch.complex128, + "int32": torch.int32, + "int64": torch.int64, + "complex64": torch.complex64, + "bfloat16": torch.bfloat16, + "float64": torch.float64, + "bool": torch.bool, + "float16": torch.float16, + "int16": torch.int16, + "float32": torch.float32, + } + assert dtype in dtype_map.keys(), "Unknown dtype" self.dtype = dtype_map[dtype] def __call__(self, sample): @@ -1016,8 +1100,8 @@ def __call__(self, sample): image = image.type(self.dtype) return (image, label) -@transform_registry(transform_type="CenterCrop", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="CenterCrop", process="preprocess", framework="tensorflow, tensorflow_itex") class CenterCropTFTransform(BaseTransform): """Crops the given image at the center to the given size. @@ -1058,8 +1142,8 @@ def __call__(self, sample): image, label = transform(sample) return (image, label) -@transform_registry(transform_type="PaddedCenterCrop", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="PaddedCenterCrop", process="preprocess", framework="tensorflow, tensorflow_itex") class PaddedCenterCropTransform(BaseTransform): """Crops the given image at the center to the given size with padding. @@ -1089,16 +1173,15 @@ def __call__(self, sample): image, label = sample h, w = image.shape[0], image.shape[1] - padded_center_crop_size = \ - int((self.image_size / (self.image_size + self.crop_padding)) * min(h, w)) + padded_center_crop_size = int((self.image_size / (self.image_size + self.crop_padding)) * min(h, w)) y0 = (h - padded_center_crop_size + 1) // 2 x0 = (w - padded_center_crop_size + 1) // 2 - image = image[y0:y0 + padded_center_crop_size, x0:x0 + padded_center_crop_size, :] + image = image[y0 : y0 + padded_center_crop_size, x0 : x0 + padded_center_crop_size, :] return (image, label) -@transform_registry(transform_type="Resize", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="Resize", process="preprocess", framework="tensorflow, tensorflow_itex") class ResizeTFTransform(BaseTransform): """Resize the input image to the given size. @@ -1111,7 +1194,7 @@ class ResizeTFTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, interpolation='bilinear'): + def __init__(self, size, interpolation="bilinear"): """Initialize `ResizeTFTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1122,8 +1205,8 @@ def __init__(self, size, interpolation='bilinear'): self.size = size[0], size[1] self.interpolation = interpolation - if self.interpolation not in ['bilinear', 'nearest', 'bicubic']: - raise ValueError('Unsupported interpolation type!') + if self.interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") def __call__(self, sample): """Resize the input image in sample to the given size.""" @@ -1131,12 +1214,11 @@ def __call__(self, sample): if isinstance(image, tf.Tensor): image = tf.image.resize(image, self.size, method=self.interpolation) else: - image = cv2.resize(image, self.size, - interpolation=interpolation_map[self.interpolation]) + image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation]) return (image, label) -@transform_registry(transform_type="Resize", process="preprocess", \ - framework="pytorch") + +@transform_registry(transform_type="Resize", process="preprocess", framework="pytorch") class ResizePytorchTransform(BaseTransform): """Resize the input image to the given size. @@ -1149,7 +1231,7 @@ class ResizePytorchTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, interpolation='bilinear'): + def __init__(self, size, interpolation="bilinear"): """Initialize `ResizePytorchTransform` class.""" self.size = size if interpolation in interpolation_pytorch_map.keys(): @@ -1160,12 +1242,11 @@ def __init__(self, size, interpolation='bilinear'): def __call__(self, sample): """Resize the input image in sample to the given size.""" image, label = sample - transformer = torchvision.transforms.Resize(size=self.size, - interpolation=self.interpolation) + transformer = torchvision.transforms.Resize(size=self.size, interpolation=self.interpolation) return (transformer(image), label) -@transform_registry(transform_type="RandomCrop", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="RandomCrop", process="preprocess", framework="tensorflow, tensorflow_itex") class RandomCropTFTransform(BaseTransform): """Crop the image at a random location to the given size. @@ -1196,7 +1277,7 @@ def __call__(self, sample): height, width = image.shape[1:3] if self.size[0] > height or self.size[1] > width: - raise ValueError('Crop size must be smaller than image size') + raise ValueError("Crop size must be smaller than image size") if self.size[0] == height and self.size[1] == width: return (image, label) @@ -1208,15 +1289,14 @@ def __call__(self, sample): offset_height = tf.cast(offset_height, dtype=tf.int32) offset_width = tf.cast(offset_width, dtype=tf.int32) - image = tf.image.crop_to_bounding_box(image, offset_height, - offset_width, self.size[0], self.size[1]) + image = tf.image.crop_to_bounding_box(image, offset_height, offset_width, self.size[0], self.size[1]) else: transform = RandomCropTransform(self.size) image, label = transform(sample) return (image, label) -@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ - framework="pytorch") + +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", framework="pytorch") class RandomResizedCropPytorchTransform(BaseTransform): """Crop the given image to random size and aspect ratio. @@ -1234,8 +1314,7 @@ class RandomResizedCropPytorchTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), - interpolation='bilinear'): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="bilinear"): """Initialize `RandomResizedCropPytorchTransform` class.""" self.size = size self.scale = scale @@ -1252,12 +1331,13 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), def __call__(self, sample): """Crop the image in sample to the random size.""" image, label = sample - transformer = torchvision.transforms.RandomResizedCrop(size=self.size, - scale=self.scale, ratio=self.ratio, interpolation=self.interpolation) + transformer = torchvision.transforms.RandomResizedCrop( + size=self.size, scale=self.scale, ratio=self.ratio, interpolation=self.interpolation + ) return (transformer(image), label) -@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ - framework="mxnet") + +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", framework="mxnet") class RandomResizedCropMXNetTransform(BaseTransform): """Crop the given image to random size and aspect ratio. @@ -1275,8 +1355,7 @@ class RandomResizedCropMXNetTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), - interpolation='bilinear'): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="bilinear"): """Initialize `RandomResizedCropMXNetTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1299,13 +1378,13 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), def __call__(self, sample): """Crop the image in sample to the random size.""" image, label = sample - transformer = mx.gluon.data.vision.transforms.RandomResizedCrop(size=self.size, - scale=self.scale, ratio=self.ratio, interpolation=self.interpolation) + transformer = mx.gluon.data.vision.transforms.RandomResizedCrop( + size=self.size, scale=self.scale, ratio=self.ratio, interpolation=self.interpolation + ) return (transformer(image), label) -@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ - framework="tensorflow, tensorflow_itex") +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", framework="tensorflow, tensorflow_itex") class RandomResizedCropTFTransform(BaseTransform): """Crop the given image to random size and aspect ratio. @@ -1323,8 +1402,7 @@ class RandomResizedCropTFTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, scale=(0.08, 1.0), ratio=( - 3. / 4., 4. / 3.), interpolation='bilinear'): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="bilinear"): """Initialize `RandomResizedCropTFTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1337,8 +1415,8 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=( self.scale = scale self.ratio = ratio self.interpolation = interpolation - if self.interpolation not in ['bilinear', 'nearest']: - raise ValueError('Unsupported interpolation type!') + if self.interpolation not in ["bilinear", "nearest"]: + raise ValueError("Unsupported interpolation type!") if scale[0] > scale[1] or ratio[0] > ratio[1]: raise ValueError("Scale and ratio should be of kind (min, max)") @@ -1354,29 +1432,32 @@ def get_params(self, image, scale, ratio): log_ratio = (np.log(ratio[0]), np.log(ratio[1])) new_ratio = np.exp(np.random.uniform(log_ratio[0], log_ratio[1])) - new_w = tf.math.round( - tf.math.sqrt(tf.math.multiply(target_area, new_ratio))) - new_h = tf.math.round( - tf.math.sqrt(tf.math.divide(target_area, new_ratio))) + new_w = tf.math.round(tf.math.sqrt(tf.math.multiply(target_area, new_ratio))) + new_h = tf.math.round(tf.math.sqrt(tf.math.divide(target_area, new_ratio))) x0, y0 = tf.case( - [(tf.math.logical_and( - tf.math.greater(width, new_w), tf.math.greater(height, new_h)), - lambda: (tf.random.uniform( - shape=[], maxval=tf.math.subtract(width, new_w)), - tf.random.uniform( - shape=[], maxval=tf.math.subtract(height, new_h))) - )], - default=lambda: (-1.0, -1.0)) + [ + ( + tf.math.logical_and(tf.math.greater(width, new_w), tf.math.greater(height, new_h)), + lambda: ( + tf.random.uniform(shape=[], maxval=tf.math.subtract(width, new_w)), + tf.random.uniform(shape=[], maxval=tf.math.subtract(height, new_h)), + ), + ) + ], + default=lambda: (-1.0, -1.0), + ) if x0 != -1.0 and y0 != -1.0: return y0, x0, new_h, new_w in_ratio = width / height - new_w, new_h = tf.case([(tf.math.greater(min(ratio), in_ratio), - lambda: (width, tf.math.round(width / min(ratio)))), - (tf.math.greater(in_ratio, max(ratio)), - lambda: (height, tf.math.round(height * max(ratio))))], - default=lambda: (width, height)) + new_w, new_h = tf.case( + [ + (tf.math.greater(min(ratio), in_ratio), lambda: (width, tf.math.round(width / min(ratio)))), + (tf.math.greater(in_ratio, max(ratio)), lambda: (height, tf.math.round(height * max(ratio)))), + ], + default=lambda: (width, height), + ) y0 = (height - new_h) / 2 x0 = (width - new_w) / 2 @@ -1395,20 +1476,18 @@ def __call__(self, sample): height = tf.cast(height, dtype=tf.float32) width = tf.cast(width, dtype=tf.float32) box_indices = tf.range(0, image.shape[0], dtype=tf.int32) - boxes = [y0/height, x0/width, (y0+h)/height, (x0+w)/width] + boxes = [y0 / height, x0 / width, (y0 + h) / height, (x0 + w) / width] boxes = tf.broadcast_to(boxes, [image.shape[0], 4]) - image = tf.image.crop_and_resize(image, boxes, box_indices, - self.size, self.interpolation) + image = tf.image.crop_and_resize(image, boxes, box_indices, self.size, self.interpolation) if squeeze: image = tf.squeeze(image, axis=0) else: - transform = RandomResizedCropTransform(self.size, self.scale, - self.ratio, self.interpolation) + transform = RandomResizedCropTransform(self.size, self.scale, self.ratio, self.interpolation) image, label = transform(sample) return (image, label) -@transform_registry(transform_type="Normalize", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="Normalize", process="preprocess", framework="tensorflow, tensorflow_itex") class NormalizeTFTransform(BaseTransform): """Normalize a image with mean and standard deviation. @@ -1449,11 +1528,11 @@ def __call__(self, sample): image, label = transform(sample) if self.rescale: image /= self.rescale[0] - image -= self.rescale[1] + image -= self.rescale[1] return (image, label) -@transform_registry(transform_type='KerasRescale', process="preprocess", \ - framework='tensorflow, tensorflow_itex') + +@transform_registry(transform_type="KerasRescale", process="preprocess", framework="tensorflow, tensorflow_itex") class RescaleKerasPretrainTransform(BaseTransform): """Scale the values of image to [0,1]. @@ -1468,15 +1547,15 @@ def __init__(self, rescale=None): def __call__(self, sample): """Scale the values of the image in sample.""" image, label = sample - if image.dtype == np.dtype('uint8'): - self.rescale = np.array(self.rescale).astype('uint8') + if image.dtype == np.dtype("uint8"): + self.rescale = np.array(self.rescale).astype("uint8") if len(self.rescale) == 2: image = image / self.rescale[0] - image = image - self.rescale[1] + image = image - self.rescale[1] return (image, label) -@transform_registry(transform_type='Rescale', process="preprocess", \ - framework='tensorflow, tensorflow_itex') + +@transform_registry(transform_type="Rescale", process="preprocess", framework="tensorflow, tensorflow_itex") class RescaleTFTransform(BaseTransform): """Scale the values of image to [0,1]. @@ -1488,13 +1567,13 @@ def __call__(self, sample): """Scale the values of the image in sample.""" image, label = sample if isinstance(image, tf.Tensor): - image = tf.cast(image, tf.float32) / 255. + image = tf.cast(image, tf.float32) / 255.0 else: - image = image.astype('float32') / 255. + image = image.astype("float32") / 255.0 return (image, label) -@transform_registry(transform_type='Rescale', process="preprocess", \ - framework='onnxrt_qlinearops, onnxrt_integerops') + +@transform_registry(transform_type="Rescale", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class RescaleTransform(BaseTransform): """Scale the values of image to [0,1]. @@ -1506,12 +1585,16 @@ def __call__(self, sample): """Scale the values of the image in sample.""" image, label = sample if isinstance(image, np.ndarray): - image = image.astype('float32') / 255. + image = image.astype("float32") / 255.0 return (image, label) -@transform_registry(transform_type='AlignImageChannel', process="preprocess", \ - framework='tensorflow, tensorflow_itex, \ - onnxrt_qlinearops, onnxrt_integerops, mxnet') + +@transform_registry( + transform_type="AlignImageChannel", + process="preprocess", + framework="tensorflow, tensorflow_itex, \ + onnxrt_qlinearops, onnxrt_integerops, mxnet", +) class AlignImageChannelTransform(BaseTransform): """Align image channel, now just support [H,W]->[H,W,dim], [H,W,4]->[H,W,3] and [H,W,3]->[H,W]. @@ -1525,14 +1608,14 @@ def __init__(self, dim=3): """Initialize `AlignImageChannelTransform` class.""" logger.warning("This transform is going to be deprecated") if dim < 1 or dim > 4: - raise ValueError('Unsupport image dim!') + raise ValueError("Unsupport image dim!") self.dim = dim def __call__(self, sample): """Align channel of the image in sample.""" image, label = sample if len(image.shape) == 2: - image = np.dstack([image]*self.dim) + image = np.dstack([image] * self.dim) if isinstance(image, np.ndarray) and image.shape[-1] != self.dim: if image.shape[-1] == 4 and self.dim == 3: image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) @@ -1540,11 +1623,11 @@ def __call__(self, sample): image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) image = np.expand_dims(image, axis=-1) else: - raise ValueError('Unsupport conversion!') + raise ValueError("Unsupport conversion!") return (image, label) -@transform_registry(transform_type='AlignImageChannel', process="preprocess", \ - framework='pytorch') + +@transform_registry(transform_type="AlignImageChannel", process="preprocess", framework="pytorch") class PyTorchAlignImageChannel(BaseTransform): """Align image channel, now just support [H,W,4]->[H,W,3] and [H,W,3]->[H,W]. @@ -1558,24 +1641,25 @@ def __init__(self, dim=3): """Initialize `PyTorchAlignImageChannel` class.""" logger.warning("This transform is going to be deprecated") if dim != 1 and dim != 3: - raise ValueError('Unsupport image dim!') + raise ValueError("Unsupport image dim!") self.dim = dim def __call__(self, sample): """Align channel of the image in sample.""" from PIL import Image + image, label = sample - assert isinstance(image, Image.Image), 'Input image must be PIL Image' + assert isinstance(image, Image.Image), "Input image must be PIL Image" if self.dim == 3: - image = image.convert('RGB') + image = image.convert("RGB") elif self.dim == 1: - image = image.convert('L') + image = image.convert("L") else: - raise ValueError('Unsupport conversion!') + raise ValueError("Unsupport conversion!") return (image, label) -@transform_registry(transform_type="ToNDArray", process="preprocess", \ - framework="mxnet") + +@transform_registry(transform_type="ToNDArray", process="preprocess", framework="mxnet") class ToNDArrayTransform(BaseTransform): """Convert np.array to NDArray. @@ -1589,6 +1673,7 @@ def __call__(self, sample): image = mx.nd.array(image) return image, label + @transform_registry(transform_type="Resize", process="preprocess", framework="mxnet") class ResizeMXNetTransform(BaseTransform): """Resize the input image to the given size. @@ -1602,7 +1687,7 @@ class ResizeMXNetTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, interpolation='bilinear'): + def __init__(self, size, interpolation="bilinear"): """Initialize `ResizeMXNetTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1620,13 +1705,11 @@ def __init__(self, size, interpolation='bilinear'): def __call__(self, sample): """Resize the input image in sample to the given size.""" image, label = sample - transformer = mx.gluon.data.vision.transforms.Resize(size=self.size, - interpolation=self.interpolation) + transformer = mx.gluon.data.vision.transforms.Resize(size=self.size, interpolation=self.interpolation) return (transformer(image), label) -@transform_registry(transform_type="Resize", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") +@transform_registry(transform_type="Resize", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class ResizeTransform(BaseTransform): """Resize the input image to the given size. @@ -1639,7 +1722,7 @@ class ResizeTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, interpolation='bilinear'): + def __init__(self, size, interpolation="bilinear"): """Initialize `ResizeTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1662,8 +1745,8 @@ def __call__(self, sample): image = np.expand_dims(image, -1) return (image, label) -@transform_registry(transform_type="CropResize", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="CropResize", process="preprocess", framework="tensorflow, tensorflow_itex") class CropResizeTFTransform(BaseTransform): """Crop the input image with given location and resize it. @@ -1680,10 +1763,10 @@ class CropResizeTFTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, x, y, width, height, size, interpolation='bilinear'): + def __init__(self, x, y, width, height, size, interpolation="bilinear"): """Initialize `CropResizeTFTransform` class.""" - if interpolation not in ['bilinear', 'nearest', 'bicubic']: - raise ValueError('Unsupported interpolation type!') + if interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") self.interpolation = interpolation self.x = x self.y = y @@ -1701,15 +1784,14 @@ def __call__(self, sample): """Resize the input image in sample with given location.""" image, label = sample if isinstance(image, tf.Tensor): - image = tf.image.crop_to_bounding_box( - image, self.y, self.x, self.height, self.width) + image = tf.image.crop_to_bounding_box(image, self.y, self.x, self.height, self.width) image = tf.image.resize(image, self.size, method=self.interpolation) else: - transform = CropResizeTransform(self.x, self.y, self.width, - self.height, self.size, self.interpolation) + transform = CropResizeTransform(self.x, self.y, self.width, self.height, self.size, self.interpolation) image, label = transform(sample) return (image, label) + @transform_registry(transform_type="CropResize", process="preprocess", framework="pytorch") class PyTorchCropResizeTransform(BaseTransform): """Crop the input image with given location and resize it. @@ -1727,7 +1809,7 @@ class PyTorchCropResizeTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, x, y, width, height, size, interpolation='bilinear'): + def __init__(self, x, y, width, height, size, interpolation="bilinear"): """Initialize `PyTorchCropResizeTransform` class.""" if interpolation in interpolation_pytorch_map.keys(): self.interpolation = get_torchvision_map(interpolation_pytorch_map[interpolation]) @@ -1743,10 +1825,10 @@ def __call__(self, sample): """Resize the input image in sample with given location.""" image, label = sample image = image.crop((self.x, self.y, self.x + self.width, self.y + self.height)) - transformer = torchvision.transforms.Resize(size=self.size, - interpolation=self.interpolation) + transformer = torchvision.transforms.Resize(size=self.size, interpolation=self.interpolation) return (transformer(image), label) + @transform_registry(transform_type="CropResize", process="preprocess", framework="mxnet") class MXNetCropResizeTransform(BaseTransform): """Crop the input image with given location and resize it. @@ -1764,7 +1846,7 @@ class MXNetCropResizeTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, x, y, width, height, size, interpolation='bilinear'): + def __init__(self, x, y, width, height, size, interpolation="bilinear"): """Initialize `MXNetCropResizeTransform` class.""" if interpolation in interpolation_mxnet_map.keys(): self.interpolation = interpolation_mxnet_map[interpolation] @@ -1779,12 +1861,13 @@ def __init__(self, x, y, width, height, size, interpolation='bilinear'): def __call__(self, sample): """Resize the input image in sample with given location.""" image, label = sample - transformer = mx.gluon.data.vision.transforms.CropResize(self.x, self.y, self.width, - self.height, self.size, self.interpolation) + transformer = mx.gluon.data.vision.transforms.CropResize( + self.x, self.y, self.width, self.height, self.size, self.interpolation + ) return (transformer(image), label) -@transform_registry(transform_type="CropResize", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="CropResize", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class CropResizeTransform(BaseTransform): """Crop the input image with given location and resize it. @@ -1801,7 +1884,7 @@ class CropResizeTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, x, y, width, height, size, interpolation='bilinear'): + def __init__(self, x, y, width, height, size, interpolation="bilinear"): """Initialize `CropResizeTransform` class.""" if interpolation in interpolation_map.keys(): self.interpolation = interpolation_map[interpolation] @@ -1822,12 +1905,12 @@ def __init__(self, x, y, width, height, size, interpolation='bilinear'): def __call__(self, sample): """Crop the input image in sample with given location.""" image, label = sample - image = image[self.y:self.y+self.height, self.x:self.x+self.width, :] + image = image[self.y : self.y + self.height, self.x : self.x + self.width, :] image = cv2.resize(image, self.size, interpolation=self.interpolation) return (image, label) -@transform_registry(transform_type="CenterCrop", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="CenterCrop", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class CenterCropTransform(BaseTransform): """Crops the given image at the center to the given size. @@ -1854,17 +1937,18 @@ def __call__(self, sample): h, w = image.shape[0], image.shape[1] if h + 1 < self.height or w + 1 < self.width: raise ValueError( - "Required crop size {} is larger then input image size {}".format( - (self.height, self.width), (h, w))) + "Required crop size {} is larger then input image size {}".format((self.height, self.width), (h, w)) + ) if self.height == h and self.width == w: return (image, label) y0 = (h - self.height) // 2 x0 = (w - self.width) // 2 - image = image[y0:y0 + self.height, x0:x0 + self.width, :] + image = image[y0 : y0 + self.height, x0 : x0 + self.width, :] return (image, label) + @transform_registry(transform_type="Normalize", process="preprocess", framework="mxnet") class MXNetNormalizeTransform(BaseTransform): """Normalize a image with mean and standard deviation. @@ -1893,9 +1977,9 @@ def __call__(self, sample): """Normalize the image in sample.""" image, label = sample axes = [len(image.shape) - 1] - axes.extend(list(np.arange(len(image.shape)-1))) + axes.extend(list(np.arange(len(image.shape) - 1))) image = mx.ndarray.transpose(image, axes) - assert len(self.mean) == image.shape[0], 'Mean channel must match image channel' + assert len(self.mean) == image.shape[0], "Mean channel must match image channel" transformer = mx.gluon.data.vision.transforms.Normalize(self.mean, self.std) image = transformer(image) axes = list(np.arange(1, len(image.shape))) @@ -1903,6 +1987,7 @@ def __call__(self, sample): image = mx.ndarray.transpose(image, axes) return (image, label) + @transform_registry(transform_type="Normalize", process="preprocess", framework="pytorch") class PyTorchNormalizeTransform(MXNetNormalizeTransform): """Normalize a image with mean and standard deviation. @@ -1926,8 +2011,8 @@ def __call__(self, sample): image = transformer(image) return (image, label) -@transform_registry(transform_type="Normalize", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="Normalize", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class NormalizeTransform(BaseTransform): """Normalize a image with mean and standard deviation. @@ -1954,12 +2039,14 @@ def __init__(self, mean=[0.0], std=[1.0]): def __call__(self, sample): """Normalize the image in sample.""" image, label = sample - assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel' + assert len(self.mean) == image.shape[-1], "Mean channel must match image channel" image = (image - self.mean) / self.std return (image, label) -@transform_registry(transform_type="RandomCrop", process="preprocess", \ - framework="mxnet, onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="RandomCrop", process="preprocess", framework="mxnet, onnxrt_qlinearops, onnxrt_integerops" +) class RandomCropTransform(BaseTransform): """Crop the image at a random location to the given size. @@ -1986,8 +2073,8 @@ def __call__(self, sample): h, w = image.shape[0], image.shape[1] if h + 1 < self.height or w + 1 < self.width: raise ValueError( - "Required crop size {} is larger then input image size {}".format( - (self.height, self.width), (h, w))) + "Required crop size {} is larger then input image size {}".format((self.height, self.width), (h, w)) + ) if self.height == h and self.width == w: return (image, label) @@ -1995,13 +2082,15 @@ def __call__(self, sample): rand_h = np.random.randint(0, h - self.height + 1) rand_w = np.random.randint(0, w - self.width + 1) if len(image.shape) == 2: - image = image[rand_h:rand_h + self.height, rand_w:rand_w + self.width] + image = image[rand_h : rand_h + self.height, rand_w : rand_w + self.width] else: - image = image[rand_h:rand_h + self.height, rand_w:rand_w + self.width, :] + image = image[rand_h : rand_h + self.height, rand_w : rand_w + self.width, :] return (image, label) -@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="RandomResizedCrop", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class RandomResizedCropTransform(BaseTransform): """Crop the given image to random size and aspect ratio. @@ -2019,8 +2108,7 @@ class RandomResizedCropTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, scale=(0.08, 1.0), ratio=( - 3. / 4., 4. / 3.), interpolation='bilinear'): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="bilinear"): """Initialize `RandomResizedCropTransform` class.""" if isinstance(size, int): self.size = size, size @@ -2077,13 +2165,15 @@ def __call__(self, sample): """Crop the image in sample to random size.""" image, label = sample y0, x0, h, w = self.get_params(image, self.scale, self.ratio) - crop_img = image[y0:y0 + h, x0:x0 + w, :] + crop_img = image[y0 : y0 + h, x0 : x0 + w, :] image = cv2.resize(crop_img, self.size, interpolation=self.interpolation) return (image, label) + def _compute_softmax(scores): """Compute softmax probability over raw logits.""" import math + if not scores: return [] @@ -2104,6 +2194,7 @@ def _compute_softmax(scores): probs.append(score / total_sum) return probs + def _get_best_indexes(logits, n_best_size): """Get the n-best logits from a list.""" index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) @@ -2115,14 +2206,17 @@ def _get_best_indexes(logits, n_best_size): best_indexes.append(index_and_score[i][0]) return best_indexes + def get_final_text(pred_text, orig_text, do_lower_case): """Project the tokenized prediction back to the original text.""" import six + from . import tokenization + def _strip_spaces(text): ns_chars = [] ns_to_s_map = collections.OrderedDict() - for (i, c) in enumerate(text): + for i, c in enumerate(text): if c == " ": continue ns_to_s_map[len(ns_chars)] = i @@ -2144,7 +2238,7 @@ def _strip_spaces(text): return orig_text tok_s_to_ns_map = {} - for (i, tok_index) in six.iteritems(tok_ns_to_s_map): + for i, tok_index in six.iteritems(tok_ns_to_s_map): tok_s_to_ns_map[tok_index] = i orig_start_position = None @@ -2154,7 +2248,7 @@ def _strip_spaces(text): orig_start_position = orig_ns_to_s_map[ns_start_position] if orig_start_position is None: - return orig_text + return orig_text orig_end_position = None if end_position in tok_s_to_ns_map: @@ -2165,23 +2259,26 @@ def _strip_spaces(text): if orig_end_position is None: return orig_text - output_text = orig_text[orig_start_position:(orig_end_position + 1)] + output_text = orig_text[orig_start_position : (orig_end_position + 1)] return output_text + class SquadExample(object): """A single training/test example for simple sequence classification. For examples without an answer, the start and end position are -1. """ - def __init__(self, + def __init__( + self, qas_id, question_text, doc_tokens, orig_answer_text=None, start_position=None, end_position=None, - is_impossible=False): + is_impossible=False, + ): """Initialize `SquadExample` class.""" self.qas_id = qas_id self.question_text = question_text @@ -2191,22 +2288,25 @@ def __init__(self, self.end_position = end_position self.is_impossible = is_impossible + class InputFeatures(object): """A single set of features of data.""" - def __init__(self, - unique_id, - example_index, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - input_mask, - segment_ids, - start_position=None, - end_position=None, - is_impossible=None): + def __init__( + self, + unique_id, + example_index, + doc_span_index, + tokens, + token_to_orig_map, + token_is_max_context, + input_ids, + input_mask, + segment_ids, + start_position=None, + end_position=None, + is_impossible=None, + ): """Initialize `InputFeatures` class.""" self.unique_id = unique_id self.example_index = example_index @@ -2221,9 +2321,11 @@ def __init__(self, self.end_position = end_position self.is_impossible = is_impossible + def read_squad_examples(input_file): """Read a SQuAD json file into a list of SquadExample.""" import json + with tf.io.gfile.GFile(input_file, "r") as reader: input_data = json.load(reader)["data"] @@ -2264,15 +2366,17 @@ def is_whitespace(c): orig_answer_text=orig_answer_text, start_position=start_position, end_position=end_position, - is_impossible=is_impossible) + is_impossible=is_impossible, + ) examples.append(example) return examples + def _check_is_max_context(doc_spans, cur_span_index, position): """Check if this is the 'max context' doc span for the token.""" best_score = None best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): + for span_index, doc_span in enumerate(doc_spans): end = doc_span.start + doc_span.length - 1 if position < doc_span.start: continue @@ -2287,11 +2391,11 @@ def _check_is_max_context(doc_spans, cur_span_index, position): return cur_span_index == best_span_index -def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, output_fn): + +def convert_examples_to_features(examples, tokenizer, max_seq_length, doc_stride, max_query_length, output_fn): """Load a data file into a list of `InputBatch`s.""" unique_id = 1000000000 - for (example_index, example) in enumerate(examples): + for example_index, example in enumerate(examples): query_tokens = tokenizer.tokenize(example.question_text) if len(query_tokens) > max_query_length: query_tokens = query_tokens[0:max_query_length] @@ -2299,7 +2403,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, tok_to_orig_index = [] orig_to_tok_index = [] all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): + for i, token in enumerate(example.doc_tokens): orig_to_tok_index.append(len(all_doc_tokens)) sub_tokens = tokenizer.tokenize(token) for sub_token in sub_tokens: @@ -2315,8 +2419,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, # We can have documents that are longer than the maximum sequence length. # To deal with this we do a sliding window approach, where we take chunks # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) + _DocSpan = collections.namedtuple("DocSpan", ["start", "length"]) # pylint: disable=invalid-name doc_spans = [] start_offset = 0 while start_offset < len(all_doc_tokens): @@ -2327,7 +2430,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, if start_offset + length == len(all_doc_tokens): break start_offset += min(length, doc_stride) - for (doc_span_index, doc_span) in enumerate(doc_spans): + for doc_span_index, doc_span in enumerate(doc_spans): tokens = [] token_to_orig_map = {} token_is_max_context = {} @@ -2344,8 +2447,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, split_token_index = doc_span.start + i token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) + is_max_context = _check_is_max_context(doc_spans, doc_span_index, split_token_index) token_is_max_context[len(tokens)] = is_max_context tokens.append(all_doc_tokens[split_token_index]) segment_ids.append(1) @@ -2383,13 +2485,14 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, segment_ids=segment_ids, start_position=start_position, end_position=end_position, - is_impossible=example.is_impossible) + is_impossible=example.is_impossible, + ) # Run callback output_fn(feature) unique_id += 1 -@transform_registry(transform_type="Collect", \ - process="postprocess", framework="tensorflow") + +@transform_registry(transform_type="Collect", process="postprocess", framework="tensorflow") class CollectTransform(BaseTransform): """Postprocess the predictions, collect data.""" @@ -2408,7 +2511,7 @@ def __call__(self, sample): result_list = [np.expand_dims(result, 0) for result in all_results] for result in result_list: if len(self.unique_id) < self.length: - result = result.transpose(2,0,1) + result = result.transpose(2, 0, 1) self.unique_id.append(self.idx) self.start_logits.append(result[0]) self.end_logits.append(result[1]) @@ -2417,8 +2520,8 @@ def __call__(self, sample): self.all_sample = ([self.unique_id, self.start_logits, self.end_logits], label) return self.all_sample -@transform_registry(transform_type="SquadV1", process="postprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="SquadV1", process="postprocess", framework="tensorflow, tensorflow_itex") class TFSquadV1PostTransform(BaseTransform): """Postprocess the predictions of bert on SQuAD. @@ -2447,15 +2550,25 @@ class TFSquadV1PostTransform(BaseTransform): tuple of processed prediction and label """ - def __init__(self, label_file, vocab_file, n_best_size=20, max_seq_length=384, \ - max_query_length=64, max_answer_length=30, do_lower_case=True, doc_stride=128): + def __init__( + self, + label_file, + vocab_file, + n_best_size=20, + max_seq_length=384, + max_query_length=64, + max_answer_length=30, + do_lower_case=True, + doc_stride=128, + ): """Initialize `TFSquadV1PostTransform` class.""" from . import tokenization + self.eval_examples = read_squad_examples(label_file) - tokenizer = tokenization.FullTokenizer( - vocab_file=vocab_file, do_lower_case=do_lower_case) + tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case) self.eval_features = [] + def append_feature(feature): self.eval_features.append(feature) @@ -2465,13 +2578,13 @@ def append_feature(feature): max_seq_length=max_seq_length, doc_stride=doc_stride, max_query_length=max_query_length, - output_fn=append_feature) + output_fn=append_feature, + ) self.n_best_size = n_best_size self.max_answer_length = max_answer_length self.do_lower_case = do_lower_case - self.RawResult = collections.namedtuple("RawResult", - ["unique_id", "start_logits", "end_logits"]) + self.RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) def process_result(self, results): """Get the processed results.""" @@ -2482,7 +2595,9 @@ def process_result(self, results): self.RawResult( unique_id=int(unique_id), start_logits=[float(x) for x in start_logits.flat], - end_logits=[float(x) for x in end_logits.flat])) + end_logits=[float(x) for x in end_logits.flat], + ) + ) return processed_results @@ -2501,11 +2616,11 @@ def get_postprocess_result(self, sample): unique_id_to_result[result.unique_id] = result _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name - "PrelimPrediction", - ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) + "PrelimPrediction", ["feature_index", "start_index", "end_index", "start_logit", "end_logit"] + ) all_predictions = collections.OrderedDict() - for (example_index, example) in enumerate(self.eval_examples): + for example_index, example in enumerate(self.eval_examples): features = example_index_to_features[example_index] prelim_predictions = [] @@ -2514,9 +2629,9 @@ def get_postprocess_result(self, sample): min_null_feature_index = 0 # the paragraph slice with min mull score null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): + for feature_index, feature in enumerate(features): # skip the case that is not predicted - if not feature.unique_id in unique_id_to_result: + if feature.unique_id not in unique_id_to_result: all_predictions[example.qas_id] = "*#skip this example#*" continue result = unique_id_to_result[feature.unique_id] @@ -2549,14 +2664,16 @@ def get_postprocess_result(self, sample): start_index=start_index, end_index=end_index, start_logit=result.start_logits[start_index], - end_logit=result.end_logits[end_index])) + end_logit=result.end_logits[end_index], + ) + ) prelim_predictions = sorted( - prelim_predictions, - key=lambda x: (x.start_logit + x.end_logit), - reverse=True) + prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True + ) _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name - "NbestPrediction", ["text", "start_logit", "end_logit"]) + "NbestPrediction", ["text", "start_logit", "end_logit"] + ) seen_predictions = {} nbest = [] @@ -2565,10 +2682,10 @@ def get_postprocess_result(self, sample): break feature = features[pred.feature_index] if pred.start_index > 0: # this is a non-null prediction - tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + tok_tokens = feature.tokens[pred.start_index : (pred.end_index + 1)] orig_doc_start = feature.token_to_orig_map[pred.start_index] orig_doc_end = feature.token_to_orig_map[pred.end_index] - orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + orig_tokens = example.doc_tokens[orig_doc_start : (orig_doc_end + 1)] tok_text = " ".join(tok_tokens) # De-tokenize WordPieces that have been split off. @@ -2590,16 +2707,13 @@ def get_postprocess_result(self, sample): seen_predictions[final_text] = True nbest.append( - _NbestPrediction( - text=final_text, - start_logit=pred.start_logit, - end_logit=pred.end_logit)) + _NbestPrediction(text=final_text, start_logit=pred.start_logit, end_logit=pred.end_logit) + ) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: - nbest.append( - _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) assert len(nbest) >= 1 @@ -2613,7 +2727,7 @@ def get_postprocess_result(self, sample): probs = _compute_softmax(total_scores) nbest_json = [] - for (i, entry) in enumerate(nbest): + for i, entry in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] @@ -2630,8 +2744,7 @@ def __call__(self, sample): return self.get_postprocess_result(sample) -@transform_registry(transform_type="ModelZooCollect", \ - process="postprocess", framework="tensorflow, tensorflow_itex") +@transform_registry(transform_type="ModelZooCollect", process="postprocess", framework="tensorflow, tensorflow_itex") class TFModelZooCollectTransform(CollectTransform): """Postprocess the predictions of model zoo, collect data.""" @@ -2651,20 +2764,41 @@ def __call__(self, sample): self.all_sample = ([self.unique_id, self.start_logits, self.end_logits], label) return self.all_sample -@transform_registry(transform_type="SquadV1ModelZoo", \ - process="postprocess", framework="tensorflow, \ - tensorflow_itex") + +@transform_registry( + transform_type="SquadV1ModelZoo", + process="postprocess", + framework="tensorflow, \ + tensorflow_itex", +) class TFSquadV1ModelZooPostTransform(TFSquadV1PostTransform): """Postprocess the predictions of bert on SQuADV1.1. See class TFSquadV1PostTransform for more details """ - def __init__(self, label_file, vocab_file, n_best_size=20, max_seq_length=384, \ - max_query_length=64, max_answer_length=30, do_lower_case=True, doc_stride=128): + def __init__( + self, + label_file, + vocab_file, + n_best_size=20, + max_seq_length=384, + max_query_length=64, + max_answer_length=30, + do_lower_case=True, + doc_stride=128, + ): """Initialize `TFSquadV1ModelZooPostTransform` class.""" - super().__init__(label_file, vocab_file, n_best_size, max_seq_length, \ - max_query_length, max_answer_length, do_lower_case, doc_stride) + super().__init__( + label_file, + vocab_file, + n_best_size, + max_seq_length, + max_query_length, + max_answer_length, + do_lower_case, + doc_stride, + ) self.length = len(self.eval_features) self.collect_data = TFModelZooCollectTransform(length=self.length) @@ -2673,8 +2807,8 @@ def __call__(self, sample): sample = self.collect_data(sample) return self.get_postprocess_result(sample) -@transform_registry(transform_type="ParseDecodeVoc", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="ParseDecodeVoc", process="preprocess", framework="tensorflow, tensorflow_itex") class ParseDecodeVocTransform(BaseTransform): """Parse features in Example proto. @@ -2684,6 +2818,7 @@ class ParseDecodeVocTransform(BaseTransform): def __call__(self, sample): """Parse decode voc.""" + # Currently only supports jpeg and png. # Need to use this logic because the shape is not known for # tf.image.decode_image and we rely on this info to @@ -2693,39 +2828,32 @@ def _decode_image(content, channels): return tf.cond( tf.image.is_jpeg(content), lambda: tf.image.decode_jpeg(content, channels), - lambda: tf.image.decode_png(content, channels)) + lambda: tf.image.decode_png(content, channels), + ) features = { - 'image/encoded': - tf.compat.v1.FixedLenFeature((), tf.string, default_value=''), - 'image/filename': - tf.compat.v1.FixedLenFeature((), tf.string, default_value=''), - 'image/format': - tf.compat.v1.FixedLenFeature((), tf.string, default_value='jpeg'), - 'image/height': - tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), - 'image/width': - tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), - 'image/segmentation/class/encoded': - tf.compat.v1.FixedLenFeature((), tf.string, default_value=''), - 'image/segmentation/class/format': - tf.compat.v1.FixedLenFeature((), tf.string, default_value='png'), + "image/encoded": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/filename": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/format": tf.compat.v1.FixedLenFeature((), tf.string, default_value="jpeg"), + "image/height": tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), + "image/width": tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), + "image/segmentation/class/encoded": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/segmentation/class/format": tf.compat.v1.FixedLenFeature((), tf.string, default_value="png"), } parsed_features = tf.compat.v1.parse_single_example(sample, features) - image = _decode_image(parsed_features['image/encoded'], channels=3) + image = _decode_image(parsed_features["image/encoded"], channels=3) label = None - label = _decode_image( - parsed_features['image/segmentation/class/encoded'], channels=1) + label = _decode_image(parsed_features["image/segmentation/class/encoded"], channels=1) sample = { - 'image': image, + "image": image, } label.set_shape([None, None, 1]) - sample['labels_class'] = label + sample["labels_class"] = label - return sample['image'], sample['labels_class'] + return sample["image"], sample["labels_class"] diff --git a/neural_compressor/experimental/__init__.py b/neural_compressor/experimental/__init__.py index 6b1e78ac6ac..fbf460df2a9 100644 --- a/neural_compressor/experimental/__init__.py +++ b/neural_compressor/experimental/__init__.py @@ -14,8 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques.""" from .component import Component @@ -30,6 +28,16 @@ from . import export from .contrib import * -__all__ = ['Component', 'Quantization', 'Pruning', 'Benchmark', 'Graph_Optimization', \ - 'GraphOptimization', 'ModelConversion', 'Distillation', 'NAS', 'MixedPrecision', \ - 'export'] +__all__ = [ + "Component", + "Quantization", + "Pruning", + "Benchmark", + "Graph_Optimization", + "GraphOptimization", + "ModelConversion", + "Distillation", + "NAS", + "MixedPrecision", + "export", +] diff --git a/neural_compressor/experimental/benchmark.py b/neural_compressor/experimental/benchmark.py index f434e92b6c2..b1668ea4910 100644 --- a/neural_compressor/experimental/benchmark.py +++ b/neural_compressor/experimental/benchmark.py @@ -14,33 +14,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Benchmarking: measure the model performance with the objective settings.""" import os import re +import signal +import subprocess import sys +from threading import Thread + import numpy as np -import subprocess -import signal import psutil -from threading import Thread + from ..adaptor import FRAMEWORKS -from ..objective import MultiObjective from ..conf.config import BenchmarkConf -from ..conf.dotdict import DotDict +from ..conf.dotdict import DotDict, deep_get, deep_set +from ..conf.pythonic_config import Config +from ..model import BaseModel +from ..model.model import get_model_fwk_name +from ..objective import MultiObjective from ..utils import logger +from ..utils.create_obj_from_config import create_dataloader, create_eval_func from ..utils.utility import GLOBAL_STATE, MODE -from ..utils.create_obj_from_config import create_eval_func, create_dataloader -from ..conf.dotdict import deep_get, deep_set -from ..model import BaseModel -from .metric import METRICS -from .common import Model as NCModel from .common import Metric as NCMetric +from .common import Model as NCModel from .common import Postprocess as NCPostprocess from .common import _generate_common_dataloader -from ..model.model import get_model_fwk_name -from ..conf.pythonic_config import Config +from .metric import METRICS + def set_env_var(env_var, value, overwrite_existing=False): """Set the specified environment variable. @@ -52,6 +53,7 @@ def set_env_var(env_var, value, overwrite_existing=False): if overwrite_existing or not os.environ.get(env_var): os.environ[env_var] = str(value) + def set_all_env_var(conf, overwrite_existing=False): """Set all the environment variables with the configuration dict. @@ -60,68 +62,75 @@ def set_all_env_var(conf, overwrite_existing=False): cpu_counts = psutil.cpu_count(logical=False) if not conf: conf = {} - conf['num_of_instance'] = 1 - conf['cores_per_instance'] = cpu_counts - if 'cores_per_instance' in conf: - assert conf['cores_per_instance'] * conf['num_of_instance'] <= cpu_counts,\ - 'num_of_instance * cores_per_instance should <= cpu physical cores' + conf["num_of_instance"] = 1 + conf["cores_per_instance"] = cpu_counts + if "cores_per_instance" in conf: + assert ( + conf["cores_per_instance"] * conf["num_of_instance"] <= cpu_counts + ), "num_of_instance * cores_per_instance should <= cpu physical cores" else: - assert conf['num_of_instance'] <= cpu_counts, 'num_of_instance should <= cpu counts' - conf['cores_per_instance'] = int(cpu_counts / conf['num_of_instance']) + assert conf["num_of_instance"] <= cpu_counts, "num_of_instance should <= cpu counts" + conf["cores_per_instance"] = int(cpu_counts / conf["num_of_instance"]) for var, value in conf.items(): set_env_var(var.upper(), value, overwrite_existing) + def get_architecture(): """Get the architecture name of the system.""" p1 = subprocess.Popen("lscpu", stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "Architecture"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) - res=None - for line in iter(p3.stdout.readline, b''): - res=line.decode("utf-8").strip() + res = None + for line in iter(p3.stdout.readline, b""): + res = line.decode("utf-8").strip() return res + def get_threads_per_core(): """Get the threads per core.""" p1 = subprocess.Popen("lscpu", stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "Thread(s) per core"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = None - for line in iter(p3.stdout.readline, b''): - res=line.decode("utf-8").strip() + for line in iter(p3.stdout.readline, b""): + res = line.decode("utf-8").strip() return res + def get_threads(): """Get the list of threads.""" - p1 = subprocess.Popen(["cat","/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "processor"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res + def get_physical_ids(): """Get the list of sockets.""" - p1 = subprocess.Popen(["cat","/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "physical id"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res + def get_core_ids(): """Get the ids list of the cores.""" - p1 = subprocess.Popen(["cat","/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p2 = subprocess.Popen(["grep", "core id"], stdin=p1.stdout, stdout=subprocess.PIPE) p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE) res = [] - for line in iter(p3.stdout.readline, b''): + for line in iter(p3.stdout.readline, b""): res.append(line.decode("utf-8").strip()) return res + def get_bounded_threads(core_ids, threads, sockets): """Return the threads id list that we will bind instances to.""" res = [] @@ -133,6 +142,7 @@ def get_bounded_threads(core_ids, threads, sockets): existing_socket_core_list.append(socket_core) return res + class Benchmark(object): """Benchmark class is used to evaluate the model performance with the objective settings. @@ -163,10 +173,10 @@ def __init__(self, conf_fname_or_obj=None): self.conf.map_pyconfig_to_cfg(conf_fname_or_obj) else: self.conf = BenchmarkConf(conf_fname_or_obj) - if self.conf.usr_cfg.model.framework != 'NA': + if self.conf.usr_cfg.model.framework != "NA": self.framework = self.conf.usr_cfg.model.framework.lower() - def __call__(self, mode='performance'): + def __call__(self, mode="performance"): """Directly call a Benchmark object. Args: @@ -176,15 +186,15 @@ def __call__(self, mode='performance'): 'accuracy' mode runs benchmarking with full cores and returns model accuracy """ cfg = self.conf.usr_cfg - assert cfg.evaluation is not None, 'benchmark evaluation filed should not be None...' - assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' - set_all_env_var(deep_get(cfg, 'evaluation.{}.configs'.format(mode))) + assert cfg.evaluation is not None, "benchmark evaluation filed should not be None..." + assert sys.platform in ["linux", "win32"], "only support platform windows and linux..." + set_all_env_var(deep_get(cfg, "evaluation.{}.configs".format(mode))) # disable multi-instance for accuracy mode or running bechmark on GPU device - if mode == "accuracy" or cfg.device == 'gpu': - set_env_var('NC_ENV_CONF', True, overwrite_existing=True) + if mode == "accuracy" or cfg.device == "gpu": + set_env_var("NC_ENV_CONF", True, overwrite_existing=True) logger.info("Start to run Benchmark.") - if os.environ.get('NC_ENV_CONF') == 'True': + if os.environ.get("NC_ENV_CONF") == "True": return self.run_instance(mode) else: self.config_instance() @@ -195,12 +205,12 @@ def __call__(self, mode='performance'): def summary_benchmark(self): """Get the summary of the benchmark.""" - num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + num_of_instance = int(os.environ.get("NUM_OF_INSTANCE")) + cores_per_instance = int(os.environ.get("CORES_PER_INSTANCE")) latency_l = [] throughput_l = [] for i in range(0, num_of_instance): - log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i) + log = "{}_{}_{}.log".format(num_of_instance, cores_per_instance, i) with open(log, "r") as f: for line in f: latency = re.search(r"[L,l]atency:\s+(\d+(\.\d+)?)", line) @@ -208,71 +218,74 @@ def summary_benchmark(self): throughput = re.search(r"[T,t]hroughput:\s+(\d+(\.\d+)?)", line) throughput_l.append(float(throughput.group(1))) if throughput and throughput.group(1) else None if throughput_l and latency_l: - assert len(latency_l)==len(throughput_l)==num_of_instance, \ - "Multiple instance benchmark failed with some instance!" + assert ( + len(latency_l) == len(throughput_l) == num_of_instance + ), "Multiple instance benchmark failed with some instance!" logger.info("\n\nMultiple instance benchmark summary: ") - logger.info("Latency average: {:.3f} ms".format(sum(latency_l)/len(latency_l))) + logger.info("Latency average: {:.3f} ms".format(sum(latency_l) / len(latency_l))) logger.info("Throughput sum: {:.3f} images/sec".format(sum(throughput_l))) def call_one(self, cmd, log_file): """Execute one command for one instance in one thread and dump the log (for Windows).""" - proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - shell=True) # nosec + proc = subprocess.Popen( + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True + ) # nosec with open(log_file, "w", 1, encoding="utf-8") as log_file: log_file.write(f"[ COMMAND ] {cmd} \n") for line in proc.stdout: decoded_line = line.decode("utf-8", errors="ignore").strip() - logger.info(decoded_line) # redirect to terminal + logger.info(decoded_line) # redirect to terminal log_file.write(decoded_line + "\n") def config_instance(self): """Configure the multi-instance commands and trigger benchmark with sub process.""" - raw_cmd = sys.executable + ' ' + ' '.join(sys.argv) - multi_instance_cmd = '' - num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + raw_cmd = sys.executable + " " + " ".join(sys.argv) + multi_instance_cmd = "" + num_of_instance = int(os.environ.get("NUM_OF_INSTANCE")) + cores_per_instance = int(os.environ.get("CORES_PER_INSTANCE")) logger.info("num of instance: {}".format(num_of_instance)) logger.info("cores per instance: {}".format(cores_per_instance)) - if(sys.platform in ['linux'] and get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1): - raise OSError('Currently no support on ARM with hyperthreads') - elif sys.platform in ['linux']: + if sys.platform in ["linux"] and get_architecture() == "aarch64" and int(get_threads_per_core()) > 1: + raise OSError("Currently no support on ARM with hyperthreads") + elif sys.platform in ["linux"]: bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids()) for i in range(0, num_of_instance): - if sys.platform in ['linux'] and get_architecture() == 'x86_64': + if sys.platform in ["linux"] and get_architecture() == "x86_64": core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance core_list = np.array(bounded_threads)[core_list_idx] else: core_list = np.arange(0, cores_per_instance) + i * cores_per_instance # bind cores only allowed in linux/mac os with numactl enabled prefix = self.generate_prefix(core_list) - instance_cmd = '{} {}'.format(prefix, raw_cmd) - if sys.platform in ['linux']: - instance_log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i) - multi_instance_cmd += '{} 2>&1|tee {} & \\\n'.format( - instance_cmd, instance_log) + instance_cmd = "{} {}".format(prefix, raw_cmd) + if sys.platform in ["linux"]: + instance_log = "{}_{}_{}.log".format(num_of_instance, cores_per_instance, i) + multi_instance_cmd += "{} 2>&1|tee {} & \\\n".format(instance_cmd, instance_log) else: # pragma: no cover - multi_instance_cmd += '{} \n'.format(instance_cmd) + multi_instance_cmd += "{} \n".format(instance_cmd) - multi_instance_cmd += 'wait' if sys.platform in ['linux'] else '' + multi_instance_cmd += "wait" if sys.platform in ["linux"] else "" logger.info("Running command is\n{}".format(multi_instance_cmd)) # each instance will execute single instance - set_env_var('NC_ENV_CONF', True, overwrite_existing=True) - if sys.platform in ['linux']: - p = subprocess.Popen(multi_instance_cmd, preexec_fn=os.setsid, shell=True) # nosec - elif sys.platform in ['win32']: # pragma: no cover + set_env_var("NC_ENV_CONF", True, overwrite_existing=True) + if sys.platform in ["linux"]: + p = subprocess.Popen(multi_instance_cmd, preexec_fn=os.setsid, shell=True) # nosec + elif sys.platform in ["win32"]: # pragma: no cover cmd_list = multi_instance_cmd.split("\n")[:-1] threads = [] for idx, cmd in enumerate(cmd_list): # wrap each execution of windows bat file in one thread # write the log to the log file of the corresponding instance - logger.info('Will dump to {}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx)) - threads.append(Thread(target=self.call_one, args=(cmd, - '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx)))) + logger.info("Will dump to {}_{}_{}.log".format(num_of_instance, cores_per_instance, idx)) + threads.append( + Thread( + target=self.call_one, + args=(cmd, "{}_{}_{}.log".format(num_of_instance, cores_per_instance, idx)), + ) + ) for command_thread in threads: command_thread.start() logger.info("Worker threads start") @@ -292,23 +305,26 @@ def generate_prefix(self, core_list): Args: core_list: a list of core indexes bound with specific instances """ - if sys.platform in ['linux'] and os.system('numactl --show >/dev/null 2>&1') == 0: - return 'OMP_NUM_THREADS={} numactl --localalloc --physcpubind={}'.format(\ - len(core_list), ','.join(core_list.astype(str))) - elif sys.platform in ['win32']: # pragma: no cover + if sys.platform in ["linux"] and os.system("numactl --show >/dev/null 2>&1") == 0: + return "OMP_NUM_THREADS={} numactl --localalloc --physcpubind={}".format( + len(core_list), ",".join(core_list.astype(str)) + ) + elif sys.platform in ["win32"]: # pragma: no cover # (TODO) should we move the hw_info from ux? from neural_compressor.utils.utility import get_number_of_sockets + num_of_socket = int(get_number_of_sockets()) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + cores_per_instance = int(os.environ.get("CORES_PER_INSTANCE")) cores_per_socket = int(psutil.cpu_count(logical=False)) / num_of_socket socket_id = int(core_list[0] // cores_per_socket) # cores per socket should integral multiple of cores per instance, else not bind core if cores_per_socket % cores_per_instance == 0: from functools import reduce - hex_core = hex(reduce(lambda x, y : x | y, [1 << p for p in core_list])) - return 'start /b /WAIT /node {} /affinity {} CMD /c'.format(socket_id, hex_core) + + hex_core = hex(reduce(lambda x, y: x | y, [1 << p for p in core_list])) + return "start /b /WAIT /node {} /affinity {} CMD /c".format(socket_id, hex_core) else: - return '' + return "" def run_instance(self, mode): """Run the instance with the configuration. @@ -321,67 +337,75 @@ def run_instance(self, mode): """ cfg = self.conf.usr_cfg GLOBAL_STATE.STATE = MODE.BENCHMARK - framework_specific_info = {'device': cfg.device, \ - 'approach': cfg.quantization.approach, \ - 'random_seed': cfg.tuning.random_seed, - 'backend': cfg.model.get('backend', 'default'), - 'domain': cfg.model.get('domain', 'auto'), - 'format': cfg.model.get('quant_format', 'default')} + framework_specific_info = { + "device": cfg.device, + "approach": cfg.quantization.approach, + "random_seed": cfg.tuning.random_seed, + "backend": cfg.model.get("backend", "default"), + "domain": cfg.model.get("domain", "auto"), + "format": cfg.model.get("quant_format", "default"), + } framework = cfg.model.framework.lower() - if 'tensorflow' in framework: - framework_specific_info.update({"inputs": cfg.model.inputs, \ - "outputs": cfg.model.outputs, \ - "recipes": cfg.model.recipes, \ - 'workspace_path': cfg.tuning.workspace.path}) - if framework == 'keras': - framework_specific_info.update({'workspace_path': cfg.tuning.workspace.path}) - if framework == 'mxnet': + if "tensorflow" in framework: + framework_specific_info.update( + { + "inputs": cfg.model.inputs, + "outputs": cfg.model.outputs, + "recipes": cfg.model.recipes, + "workspace_path": cfg.tuning.workspace.path, + } + ) + if framework == "keras": + framework_specific_info.update({"workspace_path": cfg.tuning.workspace.path}) + if framework == "mxnet": framework_specific_info.update({"b_dataloader": self._b_dataloader}) - if 'onnx' in framework.lower(): - framework_specific_info.update({'workspace_path': cfg.tuning.workspace.path, - 'recipes': cfg.quantization.get('recipes', {})}) - if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - framework_specific_info.update({"workspace_path": cfg.tuning.workspace.path, - "q_dataloader": None}) + if "onnx" in framework.lower(): + framework_specific_info.update( + {"workspace_path": cfg.tuning.workspace.path, "recipes": cfg.quantization.get("recipes", {})} + ) + if framework == "pytorch_ipex" or framework == "pytorch" or framework == "pytorch_fx": + framework_specific_info.update({"workspace_path": cfg.tuning.workspace.path, "q_dataloader": None}) - assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for quantization....' + assert isinstance(self._model, BaseModel), "need set neural_compressor Model for quantization...." adaptor = FRAMEWORKS[framework](framework_specific_info) - if deep_get(cfg, 'evaluation.{}.iteration'.format(mode)) == -1 and 'dummy_v2' in \ - deep_get(cfg, 'evaluation.{}.dataloader.dataset'.format(mode), {}): - deep_set(cfg, 'evaluation.{}.iteration'.format(mode), 10) + if deep_get(cfg, "evaluation.{}.iteration".format(mode)) == -1 and "dummy_v2" in deep_get( + cfg, "evaluation.{}.dataloader.dataset".format(mode), {} + ): + deep_set(cfg, "evaluation.{}.iteration".format(mode), 10) - iteration = -1 if deep_get(cfg, 'evaluation.{}.iteration'.format(mode)) is None \ - else deep_get(cfg, 'evaluation.{}.iteration'.format(mode)) + iteration = ( + -1 + if deep_get(cfg, "evaluation.{}.iteration".format(mode)) is None + else deep_get(cfg, "evaluation.{}.iteration".format(mode)) + ) - metric = [self._metric] if self._metric else \ - deep_get(cfg, 'evaluation.{}.metric'.format(mode)) - b_postprocess_cfg = deep_get(cfg, 'evaluation.{}.postprocess'.format(mode)) + metric = [self._metric] if self._metric else deep_get(cfg, "evaluation.{}.metric".format(mode)) + b_postprocess_cfg = deep_get(cfg, "evaluation.{}.postprocess".format(mode)) if self._b_func is None and self._b_dataloader is None: - assert deep_get(cfg, 'evaluation.{}.dataloader'.format(mode)) is not None, \ - 'dataloader field of yaml file is missing' + assert ( + deep_get(cfg, "evaluation.{}.dataloader".format(mode)) is not None + ), "dataloader field of yaml file is missing" - b_dataloader_cfg = deep_get(cfg, 'evaluation.{}.dataloader'.format(mode)) + b_dataloader_cfg = deep_get(cfg, "evaluation.{}.dataloader".format(mode)) self._b_dataloader = create_dataloader(self.framework, b_dataloader_cfg) is_measure = False if self._b_func is None: is_measure = True - self._b_func = create_eval_func(self.framework, \ - self._b_dataloader, \ - adaptor, \ - metric, \ - b_postprocess_cfg, - iteration=iteration) + self._b_func = create_eval_func( + self.framework, self._b_dataloader, adaptor, metric, b_postprocess_cfg, iteration=iteration + ) else: self._custom_b_func = True - objectives = [i.lower() for i in cfg.tuning.multi_objectives.objective] if \ - deep_get(cfg, 'tuning.multi_objectives') else [cfg.tuning.objective] - assert len(objectives) == 1, 'benchmark supports one objective at a time' - self.objectives = MultiObjective(objectives, - cfg.tuning.accuracy_criterion, - is_measure=is_measure) + objectives = ( + [i.lower() for i in cfg.tuning.multi_objectives.objective] + if deep_get(cfg, "tuning.multi_objectives") + else [cfg.tuning.objective] + ) + assert len(objectives) == 1, "benchmark supports one objective at a time" + self.objectives = MultiObjective(objectives, cfg.tuning.accuracy_criterion, is_measure=is_measure) if self._custom_b_func: val = self.objectives.evaluate(self._b_func, self._model.model) return @@ -391,8 +415,11 @@ def run_instance(self, mode): # also measurer have result list among steps acc, _ = val batch_size = self._b_dataloader.batch_size - warmup = 0 if deep_get(cfg, 'evaluation.{}.warmup'.format(mode)) is None \ - else deep_get(cfg, 'evaluation.{}.warmup'.format(mode)) + warmup = ( + 0 + if deep_get(cfg, "evaluation.{}.warmup".format(mode)) is None + else deep_get(cfg, "evaluation.{}.warmup".format(mode)) + ) if len(self.objectives.objectives[0].result_list()) < warmup: if len(self.objectives.objectives[0].result_list()) > 1 and warmup != 0: @@ -407,16 +434,16 @@ def run_instance(self, mode): logger.info("\n{} mode benchmark result:".format(mode)) for i, res in enumerate(result_list): logger.debug("Iteration {} result {}:".format(i, res)) - if mode == 'accuracy': + if mode == "accuracy": logger.info("Batch size = {}".format(batch_size)) if isinstance(acc, list): logger.info("Accuracy is" + "".join([" {:.4f}".format(i) for i in acc])) else: logger.info("Accuracy is {:.4f}".format(acc)) - elif mode == 'performance': + elif mode == "performance": logger.info("Batch size = {}".format(batch_size)) logger.info("Latency: {:.3f} ms".format(latency * 1000)) - logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + logger.info("Throughput: {:.3f} images/sec".format(1.0 / latency)) @property def results(self): @@ -460,7 +487,7 @@ def b_dataloader(self, dataloader): @property def b_func(self): """Not support getting b_func.""" - assert False, 'Should not try to get the value of `b_func` attribute.' + assert False, "Should not try to get the value of `b_func` attribute." return None @b_func.setter @@ -500,14 +527,16 @@ def model(self, user_model): make sure the name is in the supported slim model list. """ cfg = self.conf.usr_cfg - if cfg.model.framework == 'NA': - assert not isinstance(user_model, BaseModel), \ - "Please pass an original framework model but not neural compressor model!" + if cfg.model.framework == "NA": + assert not isinstance( + user_model, BaseModel + ), "Please pass an original framework model but not neural compressor model!" self.framework = get_model_fwk_name(user_model) if self.framework == "tensorflow": from ..model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and cfg.model.backend == 'itex': - self.framework = 'keras' + + if get_model_type(user_model) == "keras" and cfg.model.backend == "itex": + self.framework = "keras" if self.framework == "pytorch": if cfg.model.backend == "default": self.framework = "pytorch_fx" @@ -526,13 +555,14 @@ def model(self, user_model): # It is config of neural_compressor version < 2.0, no need in 2.0 if cfg.model.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel + if not isinstance(user_model, IPEXModel): self._model = NCModel(user_model.model, framework=cfg.model.framework) return self._model = user_model # (TODO) ugly to set these params, but tensorflow need - if 'tensorflow' in self.framework: + if "tensorflow" in self.framework: self._model.name = cfg.model.name self._model.output_tensor_names = cfg.model.outputs self._model.input_tensor_names = cfg.model.inputs @@ -541,7 +571,7 @@ def model(self, user_model): @property def metric(self): """Not support getting metric.""" - assert False, 'Should not try to get the value of `metric` attribute.' + assert False, "Should not try to get the value of `metric` attribute." return None @metric.setter @@ -562,25 +592,26 @@ def metric(self, user_metric): specific frameworks and initialized. """ if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"): - logger.warning("Override the value of `metric` field defined in yaml file" \ - " as user defines the value of `metric` attribute by code.") - + logger.warning( + "Override the value of `metric` field defined in yaml file" + " as user defines the value of `metric` attribute by code." + ) + if isinstance(user_metric, NCMetric): - metric_cfg = {user_metric.name : {**user_metric.kwargs}} + metric_cfg = {user_metric.name: {**user_metric.kwargs}} deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg) self.conf.usr_cfg = DotDict(self.conf.usr_cfg) metrics = METRICS(self.framework) metrics.register(user_metric.name, user_metric.metric_cls) else: - for i in ['reset', 'update', 'result']: - assert hasattr(user_metric, i), 'Please realize {} function' \ - 'in user defined metric'.format(i) + for i in ["reset", "update", "result"]: + assert hasattr(user_metric, i), "Please realize {} function" "in user defined metric".format(i) self._metric = user_metric @property def postprocess(self, user_postprocess): """Not support getting postprocess.""" - assert False, 'Should not try to get the value of `postprocess` attribute.' + assert False, "Should not try to get the value of `postprocess` attribute." return None @postprocess.setter @@ -597,17 +628,21 @@ def postprocess(self, user_postprocess): in this method the user_postprocess.postprocess_cls will be registered to specific frameworks and initialized. """ - assert isinstance(user_postprocess, NCPostprocess), \ - 'please initialize a neural_compressor.experimental.common.Postprocess and set....' - postprocess_cfg = {user_postprocess.name : {**user_postprocess.kwargs}} + assert isinstance( + user_postprocess, NCPostprocess + ), "please initialize a neural_compressor.experimental.common.Postprocess and set...." + postprocess_cfg = {user_postprocess.name: {**user_postprocess.kwargs}} if deep_get(self.conf.usr_cfg, "evaluation.accuracy.postprocess"): - logger.warning("Override the value of `postprocess` field defined in yaml file" \ - " as user defines the value of `postprocess` attribute by code.") + logger.warning( + "Override the value of `postprocess` field defined in yaml file" + " as user defines the value of `postprocess` attribute by code." + ) deep_set(self.conf.usr_cfg, "evaluation.accuracy.postprocess.transform", postprocess_cfg) from neural_compressor.data import TRANSFORMS - postprocesses = TRANSFORMS(self.framework, 'postprocess') + + postprocesses = TRANSFORMS(self.framework, "postprocess") postprocesses.register(user_postprocess.name, user_postprocess.postprocess_cls) def __repr__(self): """Get the object representation in string format.""" - return 'Benchmark' + return "Benchmark" diff --git a/neural_compressor/experimental/common/__init__.py b/neural_compressor/experimental/common/__init__.py index a5f07849745..3d3c255b221 100644 --- a/neural_compressor/experimental/common/__init__.py +++ b/neural_compressor/experimental/common/__init__.py @@ -23,6 +23,4 @@ from .criterion import Criterions from .optimizer import Optimizers -__all__ = ['Model', 'DataLoader', 'Postprocess', 'Metric', '_generate_common_dataloader'] - - +__all__ = ["Model", "DataLoader", "Postprocess", "Metric", "_generate_common_dataloader"] diff --git a/neural_compressor/experimental/common/criterion.py b/neural_compressor/experimental/common/criterion.py index b45e4c944f1..c68b4220a7b 100644 --- a/neural_compressor/experimental/common/criterion.py +++ b/neural_compressor/experimental/common/criterion.py @@ -14,24 +14,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Initialize critetion classes. Classes includes: - TensorFlowCrossEntropyLoss, PyTorchCrossEntropyLoss, - TensorflowKnowledgeDistillationLoss, PyTorchKnowledgeDistillationLoss, + TensorFlowCrossEntropyLoss, PyTorchCrossEntropyLoss, + TensorflowKnowledgeDistillationLoss, PyTorchKnowledgeDistillationLoss, PyTorchIntermediateLayersKnowledgeDistillationLoss. """ from collections import Counter -from neural_compressor.utils.utility import LazyImport, singleton -from neural_compressor.utils import logger -from neural_compressor.adaptor.pytorch import pytorch_forward_wrapper import numpy as np -torch = LazyImport('torch') -tf = LazyImport('tensorflow') +from neural_compressor.adaptor.pytorch import pytorch_forward_wrapper +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport, singleton + +torch = LazyImport("torch") +tf = LazyImport("tensorflow") + @singleton class TensorflowCriterions(object): @@ -42,6 +43,7 @@ def __init__(self): self.criterions = {} self.criterions.update(TENSORFLOW_CRITERIONS) + @singleton class PyTorchCriterions(object): """Record criterions in PyTorchCriterions class.""" @@ -51,17 +53,23 @@ def __init__(self): self.criterions = {} self.criterions.update(PYTORCH_CRITERIONS) -framework_criterions = {"tensorflow": TensorflowCriterions, - "pytorch": PyTorchCriterions, - "pytorch_fx": PyTorchCriterions} + +framework_criterions = { + "tensorflow": TensorflowCriterions, + "pytorch": PyTorchCriterions, + "pytorch_fx": PyTorchCriterions, +} # user/model specific criterions will be registered here TENSORFLOW_CRITERIONS = {} -PYTORCH_CRITERIONS= {} +PYTORCH_CRITERIONS = {} + +registry_criterions = { + "tensorflow": TENSORFLOW_CRITERIONS, + "pytorch": PYTORCH_CRITERIONS, + "pytorch_fx": PYTORCH_CRITERIONS, +} -registry_criterions = {"tensorflow": TENSORFLOW_CRITERIONS, - "pytorch": PYTORCH_CRITERIONS, - "pytorch_fx": PYTORCH_CRITERIONS} class Criterions(object): """Integrate criterions of different framework.""" @@ -72,8 +80,7 @@ def __init__(self, framework): Args: framework (string): framework name. """ - assert framework in ("tensorflow", "pytorch", "pytorch_fx"), \ - "framework support tensorflow pytorch" + assert framework in ("tensorflow", "pytorch", "pytorch_fx"), "framework support tensorflow pytorch" self.criterions = framework_criterions[framework]().criterions def __getitem__(self, criterion_type): @@ -81,12 +88,11 @@ def __getitem__(self, criterion_type): Args: criterion_type (string): criterion type. - + Returns: cls: criterion class. """ - assert criterion_type in self.criterions.keys(), "only support criterions in {}".\ - format(self.criterions.keys()) + assert criterion_type in self.criterions.keys(), "only support criterions in {}".format(self.criterions.keys()) return self.criterions[criterion_type] @@ -97,15 +103,16 @@ def register(self, name, criterion_cls): name (string): criterion name/type. criterion_cls (string): criterion class. """ - assert name not in self.criterions.keys(), 'registered criterion name already exists.' + assert name not in self.criterions.keys(), "registered criterion name already exists." self.criterions.update({name: criterion_cls}) + def criterion_registry(criterion_type, framework): """Use to register criterion classes in registry_criterions. Args: criterion_type (str): The string of supported criterion. - framework (str): The string of supported framework. + framework (str): The string of supported framework. Returns: cls: The class of register. @@ -113,18 +120,18 @@ def criterion_registry(criterion_type, framework): def decorator_criterion(cls): """Decorate criterion class to check framework and criterion name.""" - for fw in [fwk.strip() for fwk in framework.split(',')]: - assert fw in [ - "tensorflow", - "pytorch"], "The framework support tensorflow pytorch" + for fw in [fwk.strip() for fwk in framework.split(",")]: + assert fw in ["tensorflow", "pytorch"], "The framework support tensorflow pytorch" if criterion_type in registry_criterions[fw].keys(): - raise ValueError('Cannot have two criterions with the same name') + raise ValueError("Cannot have two criterions with the same name") registry_criterions[fw][criterion_type] = cls return cls + return decorator_criterion -@criterion_registry('CrossEntropyLoss', 'tensorflow') + +@criterion_registry("CrossEntropyLoss", "tensorflow") class TensorFlowCrossEntropyLoss(object): """TensorFlow CrossEntropyLoss criterion.""" @@ -134,18 +141,21 @@ def __init__(self, param_dict): Args: param_dict (dict): The dict of parameters setting by user for CrossEntropyLoss criterion. """ - assert isinstance(param_dict, dict), 'This criterion constructor parameter must be a dict' + assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" self._param_dict = param_dict def _mapping(self): - _param_map = {'reduction': 'reduction', - 'from_logits':'from_logits'} + _param_map = {"reduction": "reduction", "from_logits": "from_logits"} _dict = {} for key in self._param_dict: if key in _param_map: - if key == 'reduction': - assert self._param_dict[key] in ['auto', 'none', 'sum', 'sum_over_batch_size'], \ - 'Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size' + if key == "reduction": + assert self._param_dict[key] in [ + "auto", + "none", + "sum", + "sum_over_batch_size", + ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size" _dict.update({_param_map[key]: self._param_dict[key]}) return _dict @@ -158,7 +168,8 @@ def __call__(self): """ return tf.keras.losses.CategoricalCrossentropy, self._mapping() -@criterion_registry('SparseCategoricalCrossentropy', 'tensorflow') + +@criterion_registry("SparseCategoricalCrossentropy", "tensorflow") class TensorFlowSparseCategoricalCrossentropy(object): """TensorFlow SparseCategoricalCrossentropyLoss criterion.""" @@ -168,18 +179,21 @@ def __init__(self, param_dict): Args: param_dict (string): param_dict. """ - assert isinstance(param_dict, dict), 'This criterion constructor parameter must be a dict' + assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" self._param_dict = param_dict def _mapping(self): - _param_map = {'reduction': 'reduction', - 'from_logits':'from_logits'} + _param_map = {"reduction": "reduction", "from_logits": "from_logits"} _dict = {} for key in self._param_dict: if key in _param_map: - if key == 'reduction': - assert self._param_dict[key] in ['auto', 'none', 'sum', 'sum_over_batch_size'], \ - 'Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size' + if key == "reduction": + assert self._param_dict[key] in [ + "auto", + "none", + "sum", + "sum_over_batch_size", + ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size" _dict.update({_param_map[key]: self._param_dict[key]}) return _dict @@ -192,7 +206,8 @@ def __call__(self): """ return tf.keras.losses.SparseCategoricalCrossentropy, self._mapping() -@criterion_registry('CrossEntropyLoss', 'pytorch') + +@criterion_registry("CrossEntropyLoss", "pytorch") class PyTorchCrossEntropyLoss(object): """PyTorch CrossEntropyLoss criterion.""" @@ -202,17 +217,20 @@ def __init__(self, param_dict): Args: param_dict (string): param_dict. """ - assert isinstance(param_dict, dict), 'This criterion constructor parameter must be a dict' + assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" self._param_dict = param_dict def _mapping(self): - _param_map = {'reduction': 'reduction'} + _param_map = {"reduction": "reduction"} _dict = {} for key in self._param_dict: if key in _param_map: - if key == 'reduction': - assert self._param_dict[key] in ['none', 'mean', 'sum'], \ - 'Supported reduction value is none, mean, sum' + if key == "reduction": + assert self._param_dict[key] in [ + "none", + "mean", + "sum", + ], "Supported reduction value is none, mean, sum" _dict.update({_param_map[key]: self._param_dict[key]}) return _dict @@ -225,6 +243,7 @@ def __call__(self): """ return torch.nn.CrossEntropyLoss, self._mapping() + class KnowledgeDistillationFramework(object): """Knowledge Distillation Framework.""" @@ -258,31 +277,33 @@ def teacher_model(self, model): """Setter of teacher model.""" self._teacher_model = model + class KnowledgeDistillationLoss(KnowledgeDistillationFramework): """Initialize the KnowledgeDistillationLoss class.""" - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], - loss_weights=[0.5, 0.5], student_model=None, teacher_model=None): + def __init__( + self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None + ): """Initialize Knowledge Distillation Loss class. Args: - temperature (float, optional): Hyperparameters that control the entropy + temperature (float, optional): Hyperparameters that control the entropy of probability distributions. Defaults to 1.0. loss_types (list, optional): loss type. Defaults to ['CE', 'CE']. loss_weights (list, optional): loss weights. Defaults to [0.5, 0.5]. student_model (model, optional): student model. Defaults to None. teacher_model (model, optional): teacher model. Defaults to None. """ - super(KnowledgeDistillationLoss, self).__init__(student_model=student_model, - teacher_model=teacher_model) + super(KnowledgeDistillationLoss, self).__init__(student_model=student_model, teacher_model=teacher_model) self.teacher_outputs = None self.temperature = temperature self.loss_weights = loss_weights self.loss_types = loss_types self.teacher_student_loss = self.student_targets_loss = None - assert len(loss_weights) == len(loss_types) == 2, 'Wrong length for ' + \ - 'loss_weights or loss_types, should be 2.' - assert sum(loss_weights) == 1.0, 'Sum of loss_weights should be 1.0.' + assert len(loss_weights) == len(loss_types) == 2, ( + "Wrong length for " + "loss_weights or loss_types, should be 2." + ) + assert sum(loss_weights) == 1.0, "Sum of loss_weights should be 1.0." def teacher_model_forward(self, input, teacher_model=None): """Define parameters for teacher_model_forward function. @@ -294,8 +315,7 @@ def teacher_model_forward(self, input, teacher_model=None): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function teacher_model_forward ' - 'should be framework related.') + raise NotImplementedError("Function teacher_model_forward " "should be framework related.") def teacher_student_loss_cal(self, student_outputs, teacher_outputs): """Define parameters for teacher_student_loss_cal function. @@ -307,8 +327,7 @@ def teacher_student_loss_cal(self, student_outputs, teacher_outputs): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function teacher_student_loss_cal ' - 'should be framework related.') + raise NotImplementedError("Function teacher_student_loss_cal " "should be framework related.") def student_targets_loss_cal(self, student_outputs, targets): """Define parameters for student_targets_loss_cal function. @@ -320,8 +339,7 @@ def student_targets_loss_cal(self, student_outputs, targets): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function student_targets_loss_cal ' - 'should be framework related.') + raise NotImplementedError("Function student_targets_loss_cal " "should be framework related.") def loss_cal(self, student_outputs, targets): """Calculate loss of student model. @@ -355,12 +373,11 @@ def loss_cal_sloss(self, student_outputs, teacher_outputs, student_loss): student_out_ = student_outputs / self.temperature teacher_out_ = teacher_outputs / self.temperature distillation_loss = self.teacher_student_loss_cal(student_out_, teacher_out_) - distillation_loss *= self.temperature ** 2 + distillation_loss *= self.temperature**2 else: distillation_loss = 0 - self.loss = origin_loss * self.loss_weights[0] + \ - distillation_loss * self.loss_weights[1] + self.loss = origin_loss * self.loss_weights[0] + distillation_loss * self.loss_weights[1] return self.loss def __call__(self, student_outputs, targets): @@ -375,15 +392,17 @@ def __call__(self, student_outputs, targets): """ return self.loss_cal(student_outputs, targets) + class PyTorchKnowledgeDistillationLoss(KnowledgeDistillationLoss): """The PyTorchKnowledgeDistillationLoss class inherits from KnowledgeDistillationLoss.""" - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], - loss_weights=[0.5, 0.5], student_model=None, teacher_model=None): + def __init__( + self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None + ): """Initialize PyTorch Knowledge Distillation Loss class. Args: - temperature (float, optional): Hyperparameters that control the entropy + temperature (float, optional): Hyperparameters that control the entropy of probability distributions. Defaults to 1.0. loss_types (list, optional): loss types. Defaults to ['CE', 'CE']. loss_weights (list, optional): loss weights. Defaults to [0.5, 0.5]. @@ -394,33 +413,37 @@ def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], NotImplementedError: NotImplementedError NotImplementedError: NotImplementedError """ - super(PyTorchKnowledgeDistillationLoss, self).__init__(temperature=temperature, - loss_types=loss_types, - loss_weights=loss_weights, - student_model=student_model, - teacher_model=teacher_model) + super(PyTorchKnowledgeDistillationLoss, self).__init__( + temperature=temperature, + loss_types=loss_types, + loss_weights=loss_weights, + student_model=student_model, + teacher_model=teacher_model, + ) if self.student_targets_loss is None: - if self.loss_types[0] == 'CE': + if self.loss_types[0] == "CE": self.student_targets_loss = torch.nn.CrossEntropyLoss() - elif self.loss_types[0] == 'MSE': + elif self.loss_types[0] == "MSE": self.student_targets_loss = torch.nn.MSELoss() else: - raise NotImplementedError('Now we only support CrossEntropyLoss and MSELoss ' - 'for loss of student model output with respect to targets.') - logger.info('student_targets_loss: {}, {}'.format(self.loss_types[0], \ - self.loss_weights[0])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss and MSELoss " + "for loss of student model output with respect to targets." + ) + logger.info("student_targets_loss: {}, {}".format(self.loss_types[0], self.loss_weights[0])) if self.teacher_student_loss is None: - if self.loss_types[1] == 'CE': + if self.loss_types[1] == "CE": self.teacher_student_loss = self.SoftCrossEntropy - elif self.loss_types[1] == 'KL': + elif self.loss_types[1] == "KL": self.teacher_student_loss = self.KullbackLeiblerDivergence - elif self.loss_types[1] == 'MSE': + elif self.loss_types[1] == "MSE": self.teacher_student_loss = torch.nn.MSELoss() else: - raise NotImplementedError('Now we only support CrossEntropyLoss KL Divergence' - ' and MSELoss for loss of student model output with respect to teacher model ouput.') - logger.info('teacher_student_loss: {}, {}'.format(self.loss_types[1], \ - self.loss_weights[1])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss KL Divergence" + " and MSELoss for loss of student model output with respect to teacher model ouput." + ) + logger.info("teacher_student_loss: {}, {}".format(self.loss_types[1], self.loss_weights[1])) def SoftCrossEntropy(self, logits, targets): """Return SoftCrossEntropy. @@ -434,7 +457,7 @@ def SoftCrossEntropy(self, logits, targets): """ log_prob = torch.nn.functional.log_softmax(logits, dim=-1) targets_prob = torch.nn.functional.softmax(targets, dim=-1) - return (- targets_prob * log_prob).sum(dim=-1).mean() + return (-targets_prob * log_prob).sum(dim=-1).mean() def KullbackLeiblerDivergence(self, logits, targets): """Return KullbackLeiblerDivergence. @@ -464,8 +487,9 @@ def teacher_model_forward(self, input, teacher_model=None, device=None): outputs = None if self.loss_weights[1] > 0: model = self.teacher_model if teacher_model is None else teacher_model - assert isinstance(model, torch.nn.Module), \ - 'Teacher model should be a torch Module instead of {}'.format(type(model)) + assert isinstance(model, torch.nn.Module), "Teacher model should be a torch Module instead of {}".format( + type(model) + ) model.eval() try: model_device = next(model.parameters()).device @@ -490,7 +514,7 @@ def teacher_student_loss_cal(self, student_outputs, teacher_outputs): Returns: tensor: loss """ - assert self.teacher_student_loss, 'teacher_student_loss not specified.' + assert self.teacher_student_loss, "teacher_student_loss not specified." return self.teacher_student_loss(student_outputs, teacher_outputs) def student_targets_loss_cal(self, student_outputs, targets): @@ -503,10 +527,11 @@ def student_targets_loss_cal(self, student_outputs, targets): Returns: tensor: loss """ - assert self.student_targets_loss, 'student_targets_loss not specified.' + assert self.student_targets_loss, "student_targets_loss not specified." return self.student_targets_loss(student_outputs, targets) -@criterion_registry('KnowledgeDistillationLoss', 'pytorch') + +@criterion_registry("KnowledgeDistillationLoss", "pytorch") class PyTorchKnowledgeDistillationLossWrapper(object): """PyTorchKnowledgeDistillationLossWrapper wraps PyTorchKnowledgeDistillationLoss.""" @@ -520,22 +545,22 @@ def __init__(self, param_dict): def _param_check(self): param_dict = self.param_dict - _params = ['temperature', 'loss_types', 'loss_weights'] - assert all(key in param_dict for key in _params),\ - 'Keys {} must be in input parameters.'.format(_params) - assert param_dict['temperature'] > 0.0,\ - 'Value of temperature must be positive.' - assert len(param_dict['loss_types']) == len(param_dict['loss_weights']),\ - 'Length of loss_types and loss_weights must be the same.' - assert all(type(param_dict[k]) in [list, tuple] \ - for k in ['loss_types', 'loss_weights']),\ - 'Type of loss_types and loss_weights must be list or tuple.' - assert all(any(isinstance(e, t) for t in [str, torch.nn.Module]) \ - for e in param_dict['loss_types']), \ - 'Type of loss_types element must be str or torch Module.' - assert all(0. <= e <= 1. for e in param_dict['loss_weights']) and \ - abs(sum(param_dict['loss_weights']) - 1.0) < 1e-9, \ - 'Element of loss_weights must be in interval [0, 1] and summed to 1.0.' + _params = ["temperature", "loss_types", "loss_weights"] + assert all(key in param_dict for key in _params), "Keys {} must be in input parameters.".format(_params) + assert param_dict["temperature"] > 0.0, "Value of temperature must be positive." + assert len(param_dict["loss_types"]) == len( + param_dict["loss_weights"] + ), "Length of loss_types and loss_weights must be the same." + assert all( + type(param_dict[k]) in [list, tuple] for k in ["loss_types", "loss_weights"] + ), "Type of loss_types and loss_weights must be list or tuple." + assert all( + any(isinstance(e, t) for t in [str, torch.nn.Module]) for e in param_dict["loss_types"] + ), "Type of loss_types element must be str or torch Module." + assert ( + all(0.0 <= e <= 1.0 for e in param_dict["loss_weights"]) + and abs(sum(param_dict["loss_weights"]) - 1.0) < 1e-9 + ), "Element of loss_weights must be in interval [0, 1] and summed to 1.0." new_dict = {} for k in _params: new_dict[k] = param_dict[k] @@ -550,15 +575,17 @@ def __call__(self, **kwargs): """ return PyTorchKnowledgeDistillationLoss, self._param_check() + class TensorflowKnowledgeDistillationLoss(KnowledgeDistillationLoss): """The TensorflowKnowledgeDistillationLoss class inherits from KnowledgeDistillationLoss.""" - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], - loss_weights=[0.5, 0.5], student_model=None, teacher_model=None): + def __init__( + self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None + ): """Initialize Tensorflow Knowledge Distillation Loss class. Args: - temperature (float, optional): Hyperparameters that control the entropy + temperature (float, optional): Hyperparameters that control the entropy of probability distributions. Defaults to 1.0. loss_types (list, optional): loss types. Defaults to ['CE', 'CE']. loss_weights (list, optional): loss weights. Defaults to [0.5, 0.5]. @@ -569,29 +596,33 @@ def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], NotImplementedError: NotImplementedError NotImplementedError: NotImplementedError """ - super(TensorflowKnowledgeDistillationLoss, self).__init__(temperature=temperature, - loss_types=loss_types, - loss_weights=loss_weights, - student_model=student_model, - teacher_model=teacher_model) + super(TensorflowKnowledgeDistillationLoss, self).__init__( + temperature=temperature, + loss_types=loss_types, + loss_weights=loss_weights, + student_model=student_model, + teacher_model=teacher_model, + ) if self.student_targets_loss is None: - if self.loss_types[0] == 'CE': + if self.loss_types[0] == "CE": self.student_targets_loss = tf.keras.losses.SparseCategoricalCrossentropy() else: - raise NotImplementedError('Now we only support CrossEntropyLoss ' - 'for loss of student model output with respect to targets.') - logger.info('student_targets_loss: {}, {}'.format(self.loss_types[0], \ - self.loss_weights[0])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss " "for loss of student model output with respect to targets." + ) + logger.info("student_targets_loss: {}, {}".format(self.loss_types[0], self.loss_weights[0])) if self.teacher_student_loss is None: - if self.loss_types[1] == 'CE': + if self.loss_types[1] == "CE": self.teacher_student_loss = self.SoftCrossEntropy - elif self.loss_types[1] == 'KL': + elif self.loss_types[1] == "KL": self.teacher_student_loss = tf.keras.losses.KLDivergence() else: - raise NotImplementedError('Now we only support CrossEntropyLoss' - ' for loss of student model output with respect to teacher model ouput.') - logger.info('teacher_student_loss: {}, {}'.format(self.loss_types[1], \ - self.loss_weights[1])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss" + " for loss of student model output with respect to teacher model ouput." + ) + logger.info("teacher_student_loss: {}, {}".format(self.loss_types[1], self.loss_weights[1])) + def SoftCrossEntropy(self, targets, logits): """Return SoftCrossEntropy. @@ -604,7 +635,7 @@ def SoftCrossEntropy(self, targets, logits): """ log_prob = tf.math.log(logits) targets_prob = targets - return tf.math.reduce_mean(tf.math.reduce_sum(- targets_prob * log_prob, axis=-1), axis=-1) + return tf.math.reduce_mean(tf.math.reduce_sum(-targets_prob * log_prob, axis=-1), axis=-1) def teacher_model_forward(self, input, teacher_model=None): """Teacher model forward. @@ -639,7 +670,7 @@ def teacher_student_loss_cal(self, student_outputs, teacher_outputs): Returns: tensor: loss """ - assert self.teacher_student_loss, 'teacher_student_loss not specified.' + assert self.teacher_student_loss, "teacher_student_loss not specified." return self.teacher_student_loss(teacher_outputs, student_outputs) def student_targets_loss_cal(self, student_outputs, targets): @@ -652,7 +683,7 @@ def student_targets_loss_cal(self, student_outputs, targets): Returns: tensor: loss """ - assert self.student_targets_loss, 'student_targets_loss not specified.' + assert self.student_targets_loss, "student_targets_loss not specified." return self.student_targets_loss(targets, student_outputs) def __call__(self, student_outputs, targets): @@ -670,7 +701,8 @@ def __call__(self, student_outputs, targets): targets = tmp return self.loss_cal(student_outputs, targets) -@criterion_registry('KnowledgeDistillationLoss', 'tensorflow') + +@criterion_registry("KnowledgeDistillationLoss", "tensorflow") class TensorflowKnowledgeDistillationLossWrapper(object): """TensorflowKnowledgeDistillationLossWrapper wraps TensorflowKnowledgeDistillationLoss.""" @@ -684,22 +716,22 @@ def __init__(self, param_dict): def _param_check(self): param_dict = self.param_dict - _params = ['temperature', 'loss_types', 'loss_weights'] - assert all(key in param_dict for key in _params),\ - 'Keys {} must be in input parameters.'.format(_params) - assert param_dict['temperature'] > 0.0,\ - 'Value of temperature must be positive.' - assert len(param_dict['loss_types']) == len(param_dict['loss_weights']),\ - 'Length of loss_types and loss_weights must be the same.' - assert all(type(param_dict[k]) in [list, tuple] \ - for k in ['loss_types', 'loss_weights']),\ - 'Type of loss_types and loss_weights must be list or tuple.' - assert all(any(isinstance(e, t) for t in [str, tf.keras]) \ - for e in param_dict['loss_types']), \ - 'Type of loss_types element must be str or torch Module.' - assert all(0. <= e <= 1. for e in param_dict['loss_weights']) and \ - abs(sum(param_dict['loss_weights']) - 1.0) < 1e-9, \ - 'Element of loss_weights must be in interval [0, 1] and summed to 1.0.' + _params = ["temperature", "loss_types", "loss_weights"] + assert all(key in param_dict for key in _params), "Keys {} must be in input parameters.".format(_params) + assert param_dict["temperature"] > 0.0, "Value of temperature must be positive." + assert len(param_dict["loss_types"]) == len( + param_dict["loss_weights"] + ), "Length of loss_types and loss_weights must be the same." + assert all( + type(param_dict[k]) in [list, tuple] for k in ["loss_types", "loss_weights"] + ), "Type of loss_types and loss_weights must be list or tuple." + assert all( + any(isinstance(e, t) for t in [str, tf.keras]) for e in param_dict["loss_types"] + ), "Type of loss_types element must be str or torch Module." + assert ( + all(0.0 <= e <= 1.0 for e in param_dict["loss_weights"]) + and abs(sum(param_dict["loss_weights"]) - 1.0) < 1e-9 + ), "Element of loss_weights must be in interval [0, 1] and summed to 1.0." new_dict = {} for k in _params: new_dict[k] = param_dict[k] @@ -714,15 +746,17 @@ def __call__(self, **kwargs): """ return TensorflowKnowledgeDistillationLoss, self._param_check() + class TensorflowKnowledgeDistillationLossExternal(KnowledgeDistillationLoss): """TensorflowKnowledgeDistillationLossExternal inherits from KnowledgeDistillationLoss.""" - def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], - loss_weights=[0.5, 0.5], student_model=None, teacher_model=None): + def __init__( + self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None + ): """Initialize Tensorflow Knowledge Distillation Loss class. Args: - temperature (float, optional): Hyperparameters that control the entropy + temperature (float, optional): Hyperparameters that control the entropy of probability distributions. Defaults to 1.0. loss_types (list, optional): loss types. Defaults to ['CE', 'CE']. loss_weights (list, optional): loss weights. Defaults to [0.5, 0.5]. @@ -734,29 +768,31 @@ def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], NotImplementedError: NotImplementedError """ super(TensorflowKnowledgeDistillationLossExternal, self).__init__( - temperature=temperature, - loss_types=loss_types, - loss_weights=loss_weights, - student_model=student_model, - teacher_model=teacher_model) + temperature=temperature, + loss_types=loss_types, + loss_weights=loss_weights, + student_model=student_model, + teacher_model=teacher_model, + ) if self.student_targets_loss is None: - if self.loss_types[0] == 'CE': + if self.loss_types[0] == "CE": self.student_targets_loss = tf.keras.losses.CategoricalCrossentropy() else: - raise NotImplementedError('Now we only support CrossEntropyLoss ' - 'for loss of student model output with respect to targets.') - logger.info('student_targets_loss: {}, {}'.format(self.loss_types[0], \ - self.loss_weights[0])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss " "for loss of student model output with respect to targets." + ) + logger.info("student_targets_loss: {}, {}".format(self.loss_types[0], self.loss_weights[0])) if self.teacher_student_loss is None: - if self.loss_types[1] == 'CE': + if self.loss_types[1] == "CE": self.teacher_student_loss = tf.keras.losses.CategoricalCrossentropy() - elif self.loss_types[1] == 'KL': + elif self.loss_types[1] == "KL": self.teacher_student_loss = tf.keras.losses.KLDivergence() else: - raise NotImplementedError('Now we only support CrossEntropyLoss' - ' for loss of student model output with respect to teacher model ouput.') - logger.info('teacher_student_loss: {}, {}'.format(self.loss_types[1], \ - self.loss_weights[1])) + raise NotImplementedError( + "Now we only support CrossEntropyLoss" + " for loss of student model output with respect to teacher model ouput." + ) + logger.info("teacher_student_loss: {}, {}".format(self.loss_types[1], self.loss_weights[1])) def teacher_model_forward(self, input, teacher_model=None): """Teacher model forward. @@ -791,7 +827,7 @@ def teacher_student_loss_cal(self, student_outputs, teacher_outputs): Returns: tensor: loss """ - assert self.teacher_student_loss, 'teacher_student_loss not specified.' + assert self.teacher_student_loss, "teacher_student_loss not specified." return self.teacher_student_loss(teacher_outputs, student_outputs) def student_targets_loss_cal(self, student_outputs, targets): @@ -804,19 +840,26 @@ def student_targets_loss_cal(self, student_outputs, targets): Returns: tensor: loss """ - assert self.student_targets_loss, 'student_targets_loss not specified.' + assert self.student_targets_loss, "student_targets_loss not specified." return self.student_targets_loss(targets, student_outputs) class IntermediateLayersKnowledgeDistillationLoss(KnowledgeDistillationFramework): """The IntermediateLayersKnowledgeDistillationLoss class inherits from KnowledgeDistillationLoss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, - add_origin_loss=False, student_model=None, teacher_model=None): + def __init__( + self, + layer_mappings=[], + loss_types=None, + loss_weights=None, + add_origin_loss=False, + student_model=None, + teacher_model=None, + ): """Initialize PyTorch Knowledge Distillation Loss class. Args: - temperature (float, optional): Hyperparameters that control the entropy + temperature (float, optional): Hyperparameters that control the entropy of probability distributions. Defaults to 1.0. loss_types (list, optional): loss types. Defaults to ['CE', 'CE']. loss_weights (list, optional): loss weights. Defaults to [0.5, 0.5]. @@ -828,41 +871,42 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, NotImplementedError: NotImplementedError """ super(IntermediateLayersKnowledgeDistillationLoss, self).__init__( - student_model=student_model, - teacher_model=teacher_model - ) + student_model=student_model, teacher_model=teacher_model + ) self.student_features = {} self.teacher_features = {} self.layer_mappings = [] self.layer_output_process = [] for item in layer_mappings: - assert len(item) == 1 or len(item) == 2, 'Each item in layer_mappings ' + \ - 'should be a list or tuple containing 1 list or 2 lists, with format ' + \ - '[(layer_name, )] or [(student_layer_name, ), (teacher_layer_name, )], ' + \ - 'first one is the abbreviation for cases that student_layer_name and teacher_layer_name ' + \ - 'are the same. The length of tuples in the list could be either 1 like previous cases, ' + \ - 'or 2, like [(layer_name, layer_output_process)] or ' + \ - '[(student_layer_name, student_layer_output_process), ' + \ - '(teacher_layer_name, teacher_layer_output_process)].' + \ - 'For example, with 2 tuples of length 2, element looks like ' + \ - '[(\'student_model.layer1.attention\', \'1\'), (\'teacher_model.layer1.attention\', \'1\')], ' + \ - 'where \'student_model.layer1.attention\' and \'teacher_model.layer1.attention\' ' + \ - 'represent attention module on layer 1 of the student model and the ' + \ - 'teacher model respectively, two \'1\' represent the index to retrieve the ' + \ - 'desired output from the defined module\'s outputs, in this case, the above ' + \ - 'two module\'s outputs are lists, with desired output in index 1 of these ' + \ - 'lists, in cases of dict output, retrieving can be done by defining the ' + \ - 'corresponding key, in cases of module\'s output is the desired output, ' + \ - 'just adopt the format such as [(\'student_model.layer1.output' + \ - '.output\', ), (\'teacher_model.layer1.output\', )].' + assert len(item) == 1 or len(item) == 2, ( + "Each item in layer_mappings " + + "should be a list or tuple containing 1 list or 2 lists, with format " + + "[(layer_name, )] or [(student_layer_name, ), (teacher_layer_name, )], " + + "first one is the abbreviation for cases that student_layer_name and teacher_layer_name " + + "are the same. The length of tuples in the list could be either 1 like previous cases, " + + "or 2, like [(layer_name, layer_output_process)] or " + + "[(student_layer_name, student_layer_output_process), " + + "(teacher_layer_name, teacher_layer_output_process)]." + + "For example, with 2 tuples of length 2, element looks like " + + "[('student_model.layer1.attention', '1'), ('teacher_model.layer1.attention', '1')], " + + "where 'student_model.layer1.attention' and 'teacher_model.layer1.attention' " + + "represent attention module on layer 1 of the student model and the " + + "teacher model respectively, two '1' represent the index to retrieve the " + + "desired output from the defined module's outputs, in this case, the above " + + "two module's outputs are lists, with desired output in index 1 of these " + + "lists, in cases of dict output, retrieving can be done by defining the " + + "corresponding key, in cases of module's output is the desired output, " + + "just adopt the format such as [('student_model.layer1.output" + + ".output', ), ('teacher_model.layer1.output', )]." + ) if len(item) == 1: item = [item[0], item[0]] for i in range(len(item)): if not isinstance(item[i], (list, tuple)): - item[i] = [item[i], ''] + item[i] = [item[i], ""] elif len(item[i]) == 1: - item[i] = [item[i][0], ''] + item[i] = [item[i][0], ""] else: assert len(item[i]) == 2, "Expect {} to be a tuple of length 1 or 2.".format(item[i]) self.layer_mappings.append((item[0][0], item[1][0])) @@ -871,18 +915,21 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, self.student_features[student_layer] = [] self.teacher_features[teacher_layer] = [] - self.loss_weights = [1.0 / len(layer_mappings)] * len(layer_mappings) \ - if (loss_weights is None or loss_weights == []) else loss_weights - self.loss_types = ['MSE'] * len(layer_mappings) \ - if (loss_types is None or loss_types == []) else loss_types + self.loss_weights = ( + [1.0 / len(layer_mappings)] * len(layer_mappings) + if (loss_weights is None or loss_weights == []) + else loss_weights + ) + self.loss_types = ["MSE"] * len(layer_mappings) if (loss_types is None or loss_types == []) else loss_types self.add_origin_loss = add_origin_loss self.loss_funcs = [] self.feature_matchers = None self.init_loss_funcs() - assert len(self.layer_mappings) == len(self.loss_weights) == len(self.loss_types), \ - f'Wrong length for layer_mappings:{self.layer_mappings}, ' + \ - f'loss_weights:{self.loss_weights} or loss_types:{self.loss_types}, ' + \ - 'all should be the same.' + assert len(self.layer_mappings) == len(self.loss_weights) == len(self.loss_types), ( + f"Wrong length for layer_mappings:{self.layer_mappings}, " + + f"loss_weights:{self.loss_weights} or loss_types:{self.loss_types}, " + + "all should be the same." + ) def init_loss_funcs(self): """Init loss funcs. @@ -890,8 +937,7 @@ def init_loss_funcs(self): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function init_loss_funcs ' - 'should be framework related.') + raise NotImplementedError("Function init_loss_funcs " "should be framework related.") def init_feature_matcher(self, student_feature, teacher_feature): """Init feature matcher. @@ -899,8 +945,7 @@ def init_feature_matcher(self, student_feature, teacher_feature): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function init_feature_matcher ' - 'should be framework related.') + raise NotImplementedError("Function init_feature_matcher " "should be framework related.") def teacher_model_forward(self, input, teacher_model=None): """Teacher model forward. @@ -908,8 +953,7 @@ def teacher_model_forward(self, input, teacher_model=None): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function teacher_model_forward ' - 'should be framework related.') + raise NotImplementedError("Function teacher_model_forward " "should be framework related.") def loss_cal(self): """Calculate loss. @@ -917,7 +961,7 @@ def loss_cal(self): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function loss_cal should be framework related.') + raise NotImplementedError("Function loss_cal should be framework related.") def loss_cal_sloss(self, student_outputs, teacher_outputs, student_loss): """Calculate all losses between student model and teacher model. @@ -944,17 +988,22 @@ def __call__(self, student_outputs, targets): return 0 -class PyTorchIntermediateLayersKnowledgeDistillationLoss( - IntermediateLayersKnowledgeDistillationLoss - ): +class PyTorchIntermediateLayersKnowledgeDistillationLoss(IntermediateLayersKnowledgeDistillationLoss): """PyTorch Intermediate Layers Knowledge Distillation Loss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, - add_origin_loss=False, student_model=None, teacher_model=None): + def __init__( + self, + layer_mappings=[], + loss_types=None, + loss_weights=None, + add_origin_loss=False, + student_model=None, + teacher_model=None, + ): """Initialize PyTorch Knowledge Distillation Loss class. Args: - temperature (float, optional): Hyperparameters that control the entropy + temperature (float, optional): Hyperparameters that control the entropy of probability distributions. Defaults to 1.0. loss_types (list, optional): loss types. Defaults to ['CE', 'CE']. loss_weights (list, optional): loss weights. Defaults to [0.5, 0.5]. @@ -966,12 +1015,13 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, NotImplementedError: NotImplementedError """ super(PyTorchIntermediateLayersKnowledgeDistillationLoss, self).__init__( - layer_mappings=layer_mappings, - loss_types=loss_types, - loss_weights=loss_weights, - add_origin_loss=add_origin_loss, - student_model=student_model, - teacher_model=teacher_model) + layer_mappings=layer_mappings, + loss_types=loss_types, + loss_weights=loss_weights, + add_origin_loss=add_origin_loss, + student_model=student_model, + teacher_model=teacher_model, + ) self.register_hooks_for_models() def register_hooks_for_models(self): @@ -981,32 +1031,27 @@ def register_hooks_for_models(self): AttributeError: AttributeError """ from neural_compressor.experimental.common import torch_utils - def register_model_forward_hook(model, path, output_process='', student=False): - nodes = path.split('.') + + def register_model_forward_hook(model, path, output_process="", student=False): + nodes = path.split(".") module = model for node in nodes: try: module = module.__getattr__(node) except: - raise AttributeError('There is no path {} in the model.'.format(path)) - return module.register_forward_hook( - torch_utils.get_activation(path, output_process, student) - ) + raise AttributeError("There is no path {} in the model.".format(path)) + return module.register_forward_hook(torch_utils.get_activation(path, output_process, student)) - assert isinstance(self.student_model, torch.nn.Module) and \ - isinstance(self.teacher_model, torch.nn.Module), \ - 'Expect student_model and teacher_model to be an torch.nn.Module object, ' + \ - 'got student_model:{} and teacher_model:{}'.format( - type(self.student_model), type(self.teacher_model) - ) + assert isinstance(self.student_model, torch.nn.Module) and isinstance(self.teacher_model, torch.nn.Module), ( + "Expect student_model and teacher_model to be an torch.nn.Module object, " + + "got student_model:{} and teacher_model:{}".format(type(self.student_model), type(self.teacher_model)) + ) self.hook_handles = [] for idx in range(len(self.layer_mappings)): student_layer, teacher_layer = self.layer_mappings[idx] student_output_process, teacher_output_process = self.layer_output_process[idx] - st_handle = register_model_forward_hook(self.student_model, student_layer, - student_output_process, True) - te_handle = register_model_forward_hook(self.teacher_model, teacher_layer, - teacher_output_process) + st_handle = register_model_forward_hook(self.student_model, student_layer, student_output_process, True) + te_handle = register_model_forward_hook(self.teacher_model, teacher_layer, teacher_output_process) torch_utils.STUDENT_FEATURES = self.student_features torch_utils.TEACHER_FEATURES = self.teacher_features self.hook_handles.extend([st_handle, te_handle]) @@ -1019,16 +1064,18 @@ def remove_all_hooks(self): def init_loss_funcs(self): """Init loss funcs.""" for loss_type in self.loss_types: - if loss_type == 'MSE': + if loss_type == "MSE": loss_func = torch.nn.MSELoss() - elif loss_type == 'KL': + elif loss_type == "KL": loss_func = torch.nn.KLDivLoss() - elif loss_type == 'L1': + elif loss_type == "L1": loss_func = torch.nn.L1Loss() else: - raise NotImplementedError(f'Unsupported loss type {loss_type}, supported loss is ' \ - 'MSE for mean squared error, KL for Kullback-Leibler divergence and ' \ - 'L1 for L1 loss.') + raise NotImplementedError( + f"Unsupported loss type {loss_type}, supported loss is " + "MSE for mean squared error, KL for Kullback-Leibler divergence and " + "L1 for L1 loss." + ) self.loss_funcs.append(loss_func) def init_feature_matcher(self, student_feature, teacher_feature): @@ -1041,12 +1088,14 @@ def init_feature_matcher(self, student_feature, teacher_feature): Returns: pytorch_linear_feature_matcher """ + class pytorch_linear_feature_matcher(torch.nn.Module): def __init__(self, src_shape, dst_shape): super().__init__() shape_diff = [abs(i - j) for i, j in zip(dst_shape, src_shape)] - assert shape_diff.count(0) == len(shape_diff) - 1, 'Expect only one ' + \ - 'different dimension between student_feature and teacher_feature.' + assert shape_diff.count(0) == len(shape_diff) - 1, ( + "Expect only one " + "different dimension between student_feature and teacher_feature." + ) self.dim_idx = np.argmax(shape_diff) self.dense = torch.nn.Linear(src_shape[self.dim_idx], dst_shape[self.dim_idx]) @@ -1058,19 +1107,20 @@ def forward(self, input): output = torch.transpose(output, self.dim_idx, -1) return output - assert isinstance(student_feature, (torch.Tensor, np.ndarray)) and \ - isinstance(teacher_feature, (torch.Tensor, np.ndarray)), \ - 'Expect student_feature and teacher_feature to be torch.Tensor or np.ndarray ' + \ - 'objects, got student_feature a {st} object, teacher_feature a {tt} object.'.format( + assert isinstance(student_feature, (torch.Tensor, np.ndarray)) and isinstance( + teacher_feature, (torch.Tensor, np.ndarray) + ), ( + "Expect student_feature and teacher_feature to be torch.Tensor or np.ndarray " + + "objects, got student_feature a {st} object, teacher_feature a {tt} object.".format( st=type(student_feature), tt=type(teacher_feature) ) - assert len(student_feature.shape) == len(teacher_feature.shape), \ - 'Expect student_feature and teacher_feature to have the same length of shape, ' + \ - 'got student_feature of {}, teacher_feature of {}.'.format( - student_feature.shape, teacher_feature.shape - ) + ) + assert len(student_feature.shape) == len(teacher_feature.shape), ( + "Expect student_feature and teacher_feature to have the same length of shape, " + + "got student_feature of {}, teacher_feature of {}.".format(student_feature.shape, teacher_feature.shape) + ) if sum([abs(i - j) for i, j in zip(student_feature.shape, teacher_feature.shape)]) == 0: - return lambda x:x + return lambda x: x return pytorch_linear_feature_matcher(student_feature.shape, teacher_feature.shape) def teacher_model_forward(self, input, teacher_model=None, device=None): @@ -1084,8 +1134,9 @@ def teacher_model_forward(self, input, teacher_model=None, device=None): NotImplementedError: NotImplementedError """ model = self.teacher_model if teacher_model is None else teacher_model - assert isinstance(model, torch.nn.Module), \ - 'Teacher model should be a torch Module instead of {}'.format(type(model)) + assert isinstance(model, torch.nn.Module), "Teacher model should be a torch Module instead of {}".format( + type(model) + ) model.eval() try: model_device = next(model.parameters()).device @@ -1130,28 +1181,32 @@ def loss_cal(self): student_layer, teacher_layer = self.layer_mappings[idx] student_feature = self.student_features[student_layer] teacher_feature = self.teacher_features[teacher_layer] - assert len(student_feature) == len(teacher_feature) and len(student_feature) > 0, \ - 'Lengths of student_feature and teacher_feature should be the same and larger than 0, ' + \ - 'instead of {} and {}, '.format(len(student_feature), len(teacher_feature)) + \ - 'please run student and teacher model forward properly before calculating the loss.' + assert len(student_feature) == len(teacher_feature) and len(student_feature) > 0, ( + "Lengths of student_feature and teacher_feature should be the same and larger than 0, " + + "instead of {} and {}, ".format(len(student_feature), len(teacher_feature)) + + "please run student and teacher model forward properly before calculating the loss." + ) + def device2feature_gen(features): devices_count = Counter([f.device for f in features]) - assert [1] * len(devices_count) == [_ for _ in devices_count.values()], \ - 'Currently only support 1 feature tensor per device, ' + \ - 'got {}.'.format(devices_count) - return {feat.device:feat for feat in features} + assert [1] * len(devices_count) == [ + _ for _ in devices_count.values() + ], "Currently only support 1 feature tensor per device, " + "got {}.".format(devices_count) + return {feat.device: feat for feat in features} student_feature = device2feature_gen(student_feature) teacher_feature = device2feature_gen(teacher_feature) - assert student_feature.keys() == teacher_feature.keys(), \ - 'Features from student model have different devices with that of ' + \ - 'teacher model, got student: {}, teacher: {}.'.format(student_feature.keys(), - teacher_feature.keys()) - output_device = torch.device('cuda:0') \ - if torch.device('cuda:0') in student_feature.keys() else torch.device('cpu') + assert student_feature.keys() == teacher_feature.keys(), ( + "Features from student model have different devices with that of " + + "teacher model, got student: {}, teacher: {}.".format(student_feature.keys(), teacher_feature.keys()) + ) + output_device = ( + torch.device("cuda:0") if torch.device("cuda:0") in student_feature.keys() else torch.device("cpu") + ) if init_feature_matchers: - feature_matcher = self.init_feature_matcher(student_feature[output_device], - teacher_feature[output_device]) + feature_matcher = self.init_feature_matcher( + student_feature[output_device], teacher_feature[output_device] + ) self.feature_matchers[student_layer] = feature_matcher tmp_loss = 0 @@ -1160,15 +1215,14 @@ def device2feature_gen(features): teacher_feature[device] = teacher_feature[device].to(output_device) stfeat, tefeat = student_feature[device], teacher_feature[device] stfeat = self.feature_matchers[student_layer](stfeat) - if self.loss_types[idx] == 'KL': - check_is_not_prob = \ - lambda x:(torch.abs(x.sum(dim=-1) - 1.0) > 0.2).any().item() + if self.loss_types[idx] == "KL": + check_is_not_prob = lambda x: (torch.abs(x.sum(dim=-1) - 1.0) > 0.2).any().item() if isinstance(self.feature_matchers[student_layer], torch.nn.Module): stfeat = torch.nn.LogSoftmax(dim=-1)(stfeat) else: if check_is_not_prob(stfeat): stfeat = torch.softmax(stfeat, dim=-1) - stfeat = torch.log(stfeat+1e-9) + stfeat = torch.log(stfeat + 1e-9) if check_is_not_prob(tefeat): tefeat = torch.softmax(tefeat, dim=-1) tmp_loss += self.loss_funcs[idx](stfeat, tefeat) * self.loss_weights[idx] @@ -1176,7 +1230,8 @@ def device2feature_gen(features): self.clear_features() return self.loss -@criterion_registry('IntermediateLayersKnowledgeDistillationLoss', 'pytorch') + +@criterion_registry("IntermediateLayersKnowledgeDistillationLoss", "pytorch") class PyTorchIntermediateLayersKnowledgeDistillationLossWrapper(object): """PyTorch Intermediate Layers Knowledge Distillation Loss Wrapper.""" @@ -1190,49 +1245,50 @@ def __init__(self, param_dict): def _param_check(self): param_dict = self.param_dict - _params = ['layer_mappings', 'loss_types', 'loss_weights', 'add_origin_loss'] - layer_mappings = param_dict['layer_mappings'] - if 'loss_types' not in param_dict or param_dict['loss_types'] == []: - param_dict['loss_types'] = ['MSE'] * len(layer_mappings) - if 'loss_weights' not in param_dict or param_dict['loss_weights'] == []: - param_dict['loss_weights'] = [1.0 / len(layer_mappings)] * len(layer_mappings) - if 'add_origin_loss' not in param_dict: - param_dict['add_origin_loss'] = False - assert 'layer_mappings' in param_dict, \ - 'Key layer_mappings must be in input parameters.' - assert all(type(param_dict[k]) in [list, tuple] \ - for k in ['layer_mappings', 'loss_types', 'loss_weights']), \ - 'Type of loss_types and loss_weights must be list or tuple.' - assert isinstance(param_dict['add_origin_loss'], bool), \ - 'Type of add_origin_loss should be bool.' - assert len(param_dict['layer_mappings']) == \ - len(param_dict['loss_types']) == len(param_dict['loss_weights']),\ - 'Length of layer_mappings, loss_types and loss_weights must be the same.' - assert all(type(it) in [list, tuple] and (len(it) == 1 or len(it) == 2) \ - for it in param_dict['layer_mappings']), \ - 'Each item in layer_mappings should be a list containing 1 tuple or 2 tuples, with format ' + \ - '[(layer_name, )] or [(student_layer_name, ), (teacher_layer_name, )], ' + \ - 'first one is the abbreviation for cases that student_layer_name and teacher_layer_name ' + \ - 'are the same. The length of tuples in the list could be either 1 like previous cases, ' + \ - 'or 2, like [(layer_name, layer_output_process)] or ' + \ - '[(student_layer_name, student_layer_output_process), ' + \ - '(teacher_layer_name, teacher_layer_output_process)].' + \ - 'For example, with 2 tuples of length 2, element looks like ' + \ - '[(\'student_model.layer1.attention\', \'1\'), (\'teacher_model.layer1.attention\', \'1\')], ' + \ - 'where \'student_model.layer1.attention\' and \'teacher_model.layer1.attention\' ' + \ - 'represent attention module on layer 1 of the student model and the ' + \ - 'teacher model respectively, two \'1\' represent the index to retrieve the ' + \ - 'desired output from the defined module\'s outputs, in this case, the above ' + \ - 'two module\'s outputs are lists, with desired output in index 1 of these ' + \ - 'lists, in cases of dict output, retrieving can be done by defining the ' + \ - 'corresponding key, in cases of module\'s output is the desired output, ' + \ - 'just adopt the format such as [(\'student_model.layer1.output' + \ - '.output\', ), (\'teacher_model.layer1.output\', )].' - assert all(any(isinstance(e, t) for t in [str, torch.nn.Module]) \ - for e in param_dict['loss_types']), \ - 'Type of loss_types element must be str or torch Module.' - assert all(0. <= e <= 1. for e in param_dict['loss_weights']), \ - 'Element of loss_weights must be in interval [0, 1].' + _params = ["layer_mappings", "loss_types", "loss_weights", "add_origin_loss"] + layer_mappings = param_dict["layer_mappings"] + if "loss_types" not in param_dict or param_dict["loss_types"] == []: + param_dict["loss_types"] = ["MSE"] * len(layer_mappings) + if "loss_weights" not in param_dict or param_dict["loss_weights"] == []: + param_dict["loss_weights"] = [1.0 / len(layer_mappings)] * len(layer_mappings) + if "add_origin_loss" not in param_dict: + param_dict["add_origin_loss"] = False + assert "layer_mappings" in param_dict, "Key layer_mappings must be in input parameters." + assert all( + type(param_dict[k]) in [list, tuple] for k in ["layer_mappings", "loss_types", "loss_weights"] + ), "Type of loss_types and loss_weights must be list or tuple." + assert isinstance(param_dict["add_origin_loss"], bool), "Type of add_origin_loss should be bool." + assert ( + len(param_dict["layer_mappings"]) == len(param_dict["loss_types"]) == len(param_dict["loss_weights"]) + ), "Length of layer_mappings, loss_types and loss_weights must be the same." + assert all( + type(it) in [list, tuple] and (len(it) == 1 or len(it) == 2) for it in param_dict["layer_mappings"] + ), ( + "Each item in layer_mappings should be a list containing 1 tuple or 2 tuples, with format " + + "[(layer_name, )] or [(student_layer_name, ), (teacher_layer_name, )], " + + "first one is the abbreviation for cases that student_layer_name and teacher_layer_name " + + "are the same. The length of tuples in the list could be either 1 like previous cases, " + + "or 2, like [(layer_name, layer_output_process)] or " + + "[(student_layer_name, student_layer_output_process), " + + "(teacher_layer_name, teacher_layer_output_process)]." + + "For example, with 2 tuples of length 2, element looks like " + + "[('student_model.layer1.attention', '1'), ('teacher_model.layer1.attention', '1')], " + + "where 'student_model.layer1.attention' and 'teacher_model.layer1.attention' " + + "represent attention module on layer 1 of the student model and the " + + "teacher model respectively, two '1' represent the index to retrieve the " + + "desired output from the defined module's outputs, in this case, the above " + + "two module's outputs are lists, with desired output in index 1 of these " + + "lists, in cases of dict output, retrieving can be done by defining the " + + "corresponding key, in cases of module's output is the desired output, " + + "just adopt the format such as [('student_model.layer1.output" + + ".output', ), ('teacher_model.layer1.output', )]." + ) + assert all( + any(isinstance(e, t) for t in [str, torch.nn.Module]) for e in param_dict["loss_types"] + ), "Type of loss_types element must be str or torch Module." + assert all( + 0.0 <= e <= 1.0 for e in param_dict["loss_weights"] + ), "Element of loss_weights must be in interval [0, 1]." new_dict = {} for k in _params: new_dict[k] = param_dict[k] @@ -1251,8 +1307,16 @@ def __call__(self, **kwargs): class SelfKnowledgeDistillationLoss(KnowledgeDistillationFramework): """SelfKnowledge Distillation Loss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temperature=1.0,add_origin_loss=False, - student_model=None, teacher_model=None): + def __init__( + self, + layer_mappings=[], + loss_types=None, + loss_weights=None, + temperature=1.0, + add_origin_loss=False, + student_model=None, + teacher_model=None, + ): """Initialize SelfKnowledge Distillation Loss class. Args: @@ -1267,28 +1331,30 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temper student_model (optional): student model. Defaults to None. teacher_model (optional): teacher model. Defaults to None. """ - super(SelfKnowledgeDistillationLoss, self).__init__(student_model=student_model, - teacher_model=teacher_model) + super(SelfKnowledgeDistillationLoss, self).__init__(student_model=student_model, teacher_model=teacher_model) self.temperature = temperature self.layer_mappings = [] for items in layer_mappings: for value in items: - assert len(value) == 2, 'Each item in layer_mappings ' + \ - 'should be a list or tuple of length 2, with format ' + \ - '[student_layer_name, teacher_layer_name].' + assert len(value) == 2, ( + "Each item in layer_mappings " + + "should be a list or tuple of length 2, with format " + + "[student_layer_name, teacher_layer_name]." + ) self.layer_mappings.append(items) - self.loss_weights = [1.0 / len(self.layer_mappings)] * len(self.layer_mappings) \ - if loss_weights is None else loss_weights - self.loss_types = ['CE'] * len(self.layer_mappings) \ - if loss_types is None else loss_types + self.loss_weights = ( + [1.0 / len(self.layer_mappings)] * len(self.layer_mappings) if loss_weights is None else loss_weights + ) + self.loss_types = ["CE"] * len(self.layer_mappings) if loss_types is None else loss_types self.add_origin_loss = add_origin_loss self.loss_funcs = [] self.init_loss_funcs() - assert len(self.layer_mappings) == len(self.loss_weights) == len(self.loss_types), \ - f'Wrong length for layer_mappings:{self.layer_mappings}, ' + \ - f'loss_weights:{self.loss_weights} or loss_types:{self.loss_types}, ' + \ - 'all should be the same.' + assert len(self.layer_mappings) == len(self.loss_weights) == len(self.loss_types), ( + f"Wrong length for layer_mappings:{self.layer_mappings}, " + + f"loss_weights:{self.loss_weights} or loss_types:{self.loss_types}, " + + "all should be the same." + ) def init_loss_funcs(self): """Init loss funcs. @@ -1296,8 +1362,7 @@ def init_loss_funcs(self): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function init_loss_funcs ' - 'should be framework related.') + raise NotImplementedError("Function init_loss_funcs " "should be framework related.") def teacher_model_forward(self, input, teacher_model=None): """Teacher model forward. @@ -1305,8 +1370,7 @@ def teacher_model_forward(self, input, teacher_model=None): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError('Function teacher_model_forward ' - 'should be framework related.') + raise NotImplementedError("Function teacher_model_forward " "should be framework related.") def loss_cal(self, student_outputs): """Calculate loss. @@ -1314,8 +1378,7 @@ def loss_cal(self, student_outputs): Raises: NotImplementedError: NotImplementedError """ - raise NotImplementedError( - 'Function loss_cal should be framework related.') + raise NotImplementedError("Function loss_cal should be framework related.") def loss_cal_sloss(self, student_outputs, teacher_outputs, student_loss): """Calculate all losses between student model and teacher model. @@ -1338,12 +1401,19 @@ def __call__(self, student_outputs, targets): return 0 -class PyTorchSelfKnowledgeDistillationLoss( - SelfKnowledgeDistillationLoss -): +class PyTorchSelfKnowledgeDistillationLoss(SelfKnowledgeDistillationLoss): """PyTorch SelfKnowledge Distillation Loss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temperature=1.0,add_origin_loss=False, - student_model=None, teacher_model=None): + + def __init__( + self, + layer_mappings=[], + loss_types=None, + loss_weights=None, + temperature=1.0, + add_origin_loss=False, + student_model=None, + teacher_model=None, + ): """Initialize PyTorch SelfKnowledge Distillation Loss class. Args: @@ -1365,7 +1435,8 @@ def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temper temperature=temperature, add_origin_loss=add_origin_loss, student_model=student_model, - teacher_model=teacher_model) + teacher_model=teacher_model, + ) def SoftCrossEntropy(self, logits, targets): """Return SoftCrossEntropy. @@ -1410,16 +1481,18 @@ def L2Divergence(self, feature1, feature2): def init_loss_funcs(self): """Init loss funcs.""" for loss_type in self.loss_types: - if loss_type == 'CE': + if loss_type == "CE": loss_func = self.SoftCrossEntropy - elif loss_type == 'KL': + elif loss_type == "KL": loss_func = self.KullbackLeiblerDivergence - elif loss_type == 'L2': + elif loss_type == "L2": loss_func = self.L2Divergence else: - raise NotImplementedError(f'Unsupported loss type {loss_type}, supported loss is ' \ - 'CE for software CE, KL for Kullback-Leibler divergence and ' \ - 'L2 for L2 distance.') + raise NotImplementedError( + f"Unsupported loss type {loss_type}, supported loss is " + "CE for software CE, KL for Kullback-Leibler divergence and " + "L2 for L2 distance." + ) self.loss_funcs.append(loss_func) def loss_cal(self, student_outputs): @@ -1431,7 +1504,7 @@ def loss_cal(self, student_outputs): Returns: tensor: loss """ - self.loss = torch.FloatTensor([0.]) + self.loss = torch.FloatTensor([0.0]) tmp_loss = 0 temperature = self.temperature for loss_idx in range(len(self.layer_mappings)): @@ -1441,11 +1514,14 @@ def loss_cal(self, student_outputs): student_feature = student_outputs[student_layer] teacher_feature = student_outputs[teacher_layer] if loss_idx == 1: # soft logit - tmp_loss += self.loss_funcs[loss_idx]( - student_feature/temperature, teacher_feature/temperature) * self.loss_weights[loss_idx] + tmp_loss += ( + self.loss_funcs[loss_idx](student_feature / temperature, teacher_feature / temperature) + * self.loss_weights[loss_idx] + ) else: # feature learning - tmp_loss += self.loss_funcs[loss_idx]( - student_feature, teacher_feature) * self.loss_weights[loss_idx] + tmp_loss += ( + self.loss_funcs[loss_idx](student_feature, teacher_feature) * self.loss_weights[loss_idx] + ) if tmp_loss.device != self.loss.device: self.loss = self.loss.to(tmp_loss.device) self.loss += tmp_loss @@ -1465,8 +1541,9 @@ def teacher_model_forward(self, input, teacher_model=None, device=None): outputs = None if self.loss_weights[1] > 0: model = self.teacher_model if teacher_model is None else teacher_model - assert isinstance(model, torch.nn.Module), \ - 'Teacher model should be a torch Module instead of {}'.format(type(model)) + assert isinstance(model, torch.nn.Module), "Teacher model should be a torch Module instead of {}".format( + type(model) + ) model.eval() try: model_device = next(model.parameters()).device @@ -1482,9 +1559,10 @@ def teacher_model_forward(self, input, teacher_model=None, device=None): return outputs -@criterion_registry('SelfKnowledgeDistillationLoss', 'pytorch') +@criterion_registry("SelfKnowledgeDistillationLoss", "pytorch") class PyTorchSelfKnowledgeDistillationLossWrapper(object): """PyTorch SelfKnowledge Distillation Loss Wrapper.""" + def __init__(self, param_dict): """Initialize PyTorchSelfKnowledgeDistillationLossWrapper class. @@ -1495,44 +1573,40 @@ def __init__(self, param_dict): def _param_check(self): param_dict = self.param_dict - _params = ['temperature', 'layer_mappings', - 'loss_types', 'loss_weights', 'add_origin_loss'] - layer_mappings = param_dict['layer_mappings'] - if 'loss_types' not in param_dict: - param_dict['loss_types'] = ['CE'] * len(layer_mappings) - if 'loss_weights' not in param_dict: - param_dict['loss_weights'] = [ - 1.0 / len(layer_mappings)] * len(layer_mappings) - if 'add_origin_loss' not in param_dict: - param_dict['add_origin_loss'] = False - if 'temperature' not in param_dict: - param_dict['temperature'] = 1.0 - assert 'layer_mappings' in param_dict, \ - 'Key layer_mappings must be in input parameters.' - assert all(type(param_dict[k]) in [list, tuple] - for k in ['layer_mappings', 'loss_types', 'loss_weights']), \ - 'Type of loss_types and loss_weights must be list or tuple.' - assert isinstance(param_dict['add_origin_loss'], bool), \ - 'Type of add_origin_loss should be bool.' - assert len(param_dict['layer_mappings']) == \ - len(param_dict['loss_types']) == len(param_dict['loss_weights']),\ - 'Length of layer_mappings, loss_types and loss_weights must be the same.' - assert param_dict['temperature'] > 0.0,\ - 'Value of temperature must be positive.' - for items in param_dict['layer_mappings']: - assert all(type(it) in [list, tuple] and (len(it) == 2) - for it in items), \ - 'Elements of layer_mappings must be list or tuple and with length of 2.' + \ - 'element looks like [\'resblock.1.feature.output,' + \ - '\'resblock.deepst.feature.output\'], where ' + \ - '\'resblock.1.feature.output\' and \'resblock.deepst.feature.output\' ' + \ - 'represent resblock feature output of the student model and feature output of the' + \ - 'teacher model respectively.' - assert all(any(isinstance(e, t) for t in [str]) - for e in param_dict['loss_types']), \ - 'Type of loss_types element must be str.' - assert all(0. <= e <= 1. for e in param_dict['loss_weights']), \ - 'Element of loss_weights must be in interval [0, 1].' + _params = ["temperature", "layer_mappings", "loss_types", "loss_weights", "add_origin_loss"] + layer_mappings = param_dict["layer_mappings"] + if "loss_types" not in param_dict: + param_dict["loss_types"] = ["CE"] * len(layer_mappings) + if "loss_weights" not in param_dict: + param_dict["loss_weights"] = [1.0 / len(layer_mappings)] * len(layer_mappings) + if "add_origin_loss" not in param_dict: + param_dict["add_origin_loss"] = False + if "temperature" not in param_dict: + param_dict["temperature"] = 1.0 + assert "layer_mappings" in param_dict, "Key layer_mappings must be in input parameters." + assert all( + type(param_dict[k]) in [list, tuple] for k in ["layer_mappings", "loss_types", "loss_weights"] + ), "Type of loss_types and loss_weights must be list or tuple." + assert isinstance(param_dict["add_origin_loss"], bool), "Type of add_origin_loss should be bool." + assert ( + len(param_dict["layer_mappings"]) == len(param_dict["loss_types"]) == len(param_dict["loss_weights"]) + ), "Length of layer_mappings, loss_types and loss_weights must be the same." + assert param_dict["temperature"] > 0.0, "Value of temperature must be positive." + for items in param_dict["layer_mappings"]: + assert all(type(it) in [list, tuple] and (len(it) == 2) for it in items), ( + "Elements of layer_mappings must be list or tuple and with length of 2." + + "element looks like ['resblock.1.feature.output," + + "'resblock.deepst.feature.output'], where " + + "'resblock.1.feature.output' and 'resblock.deepst.feature.output' " + + "represent resblock feature output of the student model and feature output of the" + + "teacher model respectively." + ) + assert all( + any(isinstance(e, t) for t in [str]) for e in param_dict["loss_types"] + ), "Type of loss_types element must be str." + assert all( + 0.0 <= e <= 1.0 for e in param_dict["loss_weights"] + ), "Element of loss_weights must be in interval [0, 1]." new_dict = {} for k in _params: new_dict[k] = param_dict[k] diff --git a/neural_compressor/experimental/common/dataloader.py b/neural_compressor/experimental/common/dataloader.py index 138e3958cca..18c4c3b0d5b 100644 --- a/neural_compressor/experimental/common/dataloader.py +++ b/neural_compressor/experimental/common/dataloader.py @@ -14,51 +14,61 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""common DataLoader just collects the information to construct a dataloader.""" +"""Common DataLoader just collects the information to construct a dataloader.""" from ..data import DATALOADERS + class DataLoader(object): """A wrapper of the information needed to construct a dataloader. - This class can't yield batched data and only in this Quantization/Benchmark - object's setter method a 'real' calib_dataloader will be created, the reason + This class can't yield batched data and only in this Quantization/Benchmark + object's setter method a 'real' calib_dataloader will be created, the reason is we have to know the framework info and only after the Quantization/Benchmark object created then framework infomation can be known. Future we will support creating iterable dataloader from neural_compressor.experimental.common.DataLoader """ - def __init__(self, dataset, batch_size=1, collate_fn=None, - last_batch='rollover', sampler=None, batch_sampler=None, - num_workers=0, pin_memory=False, shuffle=False, distributed=False): + def __init__( + self, + dataset, + batch_size=1, + collate_fn=None, + last_batch="rollover", + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + shuffle=False, + distributed=False, + ): """Initialize a Dataloader with needed information. Args: - dataset (object): A dataset object from which to get data. Dataset must implement + dataset (object): A dataset object from which to get data. Dataset must implement __iter__ or __getitem__ method. batch_size (int, optional): How many samples per batch to load. Defaults to 1. - collate_fn (Callable, optional): Callable function that processes the batch you + collate_fn (Callable, optional): Callable function that processes the batch you want to return from your dataloader. Defaults to None. - last_batch (str, optional): How to handle the last batch if the batch size does - not evenly divide by the number of examples in the dataset. 'discard': throw + last_batch (str, optional): How to handle the last batch if the batch size does + not evenly divide by the number of examples in the dataset. 'discard': throw it away. 'rollover': insert the examples to the beginning of the next batch. Defaults to 'rollover'. sampler (Iterable, optional): Defines the strategy to draw samples from the dataset. Defaults to None. batch_sampler (Iterable, optional): Returns a batch of indices at a time. Defaults to None. - num_workers (int, optional): how many subprocesses to use for data loading. + num_workers (int, optional): how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. Defaults to 0. - pin_memory (bool, optional): If True, the data loader will copy Tensors into device + pin_memory (bool, optional): If True, the data loader will copy Tensors into device pinned memory before returning them. Defaults to False. shuffle (bool, optional): Set to ``True`` to have the data reshuffled at every epoch. Defaults to False. - distributed (bool, optional): Set to ``True`` to support distributed computing. + distributed (bool, optional): Set to ``True`` to support distributed computing. Defaults to False. """ - assert hasattr(dataset, '__iter__') or \ - hasattr(dataset, '__getitem__'), \ - "dataset must implement __iter__ or __getitem__ magic method!" + assert hasattr(dataset, "__iter__") or hasattr( + dataset, "__getitem__" + ), "dataset must implement __iter__ or __getitem__ magic method!" self.dataset = dataset self.batch_size = batch_size self.collate_fn = collate_fn @@ -70,24 +80,27 @@ def __init__(self, dataset, batch_size=1, collate_fn=None, self.shuffle = shuffle self.distributed = distributed + def _generate_common_dataloader(dataloader, framework, distributed=False): """Generate common dataloader. - + Args: - dataloader (generator): A dataloader which can yield tuple of (input, label)/(input, _) + dataloader (generator): A dataloader which can yield tuple of (input, label)/(input, _) batched data. framework (str): The string of supported framework. - distributed (bool, optional): Set to ``True`` to support distributed computing. + distributed (bool, optional): Set to ``True`` to support distributed computing. Defaults to False. - + Returns: BaseDataLoader: neural_compressor built-in dataloader """ if not isinstance(dataloader, DataLoader): - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' - assert not distributed, "Please use \ + assert hasattr(dataloader, "__iter__") and hasattr( + dataloader, "batch_size" + ), "dataloader must implement __iter__ method and batch_size attribute" + assert ( + not distributed + ), "Please use \ neural_compressor.experimental.common.DataLoader to support distributed computing" return dataloader else: @@ -101,5 +114,5 @@ def _generate_common_dataloader(dataloader, framework, distributed=False): num_workers=dataloader.num_workers, pin_memory=dataloader.pin_memory, shuffle=dataloader.shuffle, - distributed=bool(dataloader.distributed or distributed)) - + distributed=bool(dataloader.distributed or distributed), + ) diff --git a/neural_compressor/experimental/common/metric.py b/neural_compressor/experimental/common/metric.py index 10136dd526a..df44809cc64 100644 --- a/neural_compressor/experimental/common/metric.py +++ b/neural_compressor/experimental/common/metric.py @@ -14,8 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Common Metric just collects the information to construct a Metric.""" -"""common Metric just collects the information to construct a Metric.""" class Metric(object): """A wrapper of the information needed to construct a Metric. @@ -24,12 +24,12 @@ class Metric(object): neural_compressor built-in metric always take (predictions, labels) as inputs, it's recommended to design metric_cls to take (predictions, labels) as inputs. """ - - def __init__(self, metric_cls, name='user_metric', **kwargs): + + def __init__(self, metric_cls, name="user_metric", **kwargs): """Initialize a Metric with needed information. - + Args: - metric_cls (cls): Should be a sub_class of neural_compressor.metric.BaseMetric, + metric_cls (cls): Should be a sub_class of neural_compressor.metric.BaseMetric, which takes (predictions, labels) as inputs name (str, optional): Name for metric. Defaults to 'user_metric'. """ diff --git a/neural_compressor/experimental/common/model.py b/neural_compressor/experimental/common/model.py index 0dd9c3bbc93..226954f3ba5 100644 --- a/neural_compressor/experimental/common/model.py +++ b/neural_compressor/experimental/common/model.py @@ -14,13 +14,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Common Model just collects the information to construct a Model.""" -"""common Model just collects the information to construct a Model.""" - -from neural_compressor.model.model import get_model_fwk_name, MODELS +from neural_compressor.model.model import MODELS, get_model_fwk_name from neural_compressor.model.tensorflow_model import get_model_type from neural_compressor.utils import logger -BACKEND = 'default' + +BACKEND = "default" + class Model(object): """A wrapper of the information needed to construct a Model.""" @@ -29,12 +30,12 @@ def __new__(cls, root, **kwargs): """Create a new instance object of Model. Args: - root (object): raw model format. For Tensorflow model, could be path to frozen pb file, + root (object): raw model format. For Tensorflow model, could be path to frozen pb file, path to ckpt or savedmodel folder, loaded estimator/graph_def/graph/keras model object. - For PyTorch model, it's torch.nn.model instance. For MXNet model, it's mxnet.symbol.Symbol - or gluon.HybirdBlock instance. For ONNX model, it's path to onnx model or loaded ModelProto + For PyTorch model, it's torch.nn.model instance. For MXNet model, it's mxnet.symbol.Symbol + or gluon.HybirdBlock instance. For ONNX model, it's path to onnx model or loaded ModelProto model object. - + Returns: BaseModel: neural_compressor built-in model """ @@ -42,18 +43,18 @@ def __new__(cls, root, **kwargs): if framework == "NA": framework = get_model_fwk_name(root) - if 'tensorflow' in framework: - if 'modelType' in kwargs: - model_type = kwargs['modelType'] + if "tensorflow" in framework: + if "modelType" in kwargs: + model_type = kwargs["modelType"] else: model_type = get_model_type(root) - if model_type =='AutoTrackable': # pragma: no cover - model = MODELS['tensorflow']("keras", root, **kwargs) + if model_type == "AutoTrackable": # pragma: no cover + model = MODELS["tensorflow"]("keras", root, **kwargs) else: - model = MODELS['tensorflow'](model_type, root, **kwargs) - elif framework == 'keras': - model = MODELS['keras'](root, **kwargs) - elif framework == 'pytorch': + model = MODELS["tensorflow"](model_type, root, **kwargs) + elif framework == "keras": + model = MODELS["keras"](root, **kwargs) + elif framework == "pytorch": if BACKEND != "default": framework = BACKEND model = MODELS[framework](root, **kwargs) diff --git a/neural_compressor/experimental/common/optimizer.py b/neural_compressor/experimental/common/optimizer.py index 3ff2fdb86b5..877c02ed3dc 100644 --- a/neural_compressor/experimental/common/optimizer.py +++ b/neural_compressor/experimental/common/optimizer.py @@ -18,11 +18,13 @@ """Intel Neural Compressor built-in Optimizers on multiple framework backends.""" from abc import abstractmethod + from neural_compressor.utils.utility import LazyImport, singleton -torch = LazyImport('torch') -tf = LazyImport('tensorflow') -tfa = LazyImport('tensorflow_addons') +torch = LazyImport("torch") +tf = LazyImport("tensorflow") +tfa = LazyImport("tensorflow_addons") + @singleton class TensorflowOptimizers(object): @@ -33,6 +35,7 @@ def __init__(self): self.optimizers = {} self.optimizers.update(TENSORFLOW_OPTIMIZERS) + @singleton class PyTorchOptimizers(object): """Class to get all registered PyTorch Optimizers once only.""" @@ -42,39 +45,44 @@ def __init__(self): self.optimizers = {} self.optimizers.update(PYTORCH_OPTIMIZERS) -framework_optimizers = {"tensorflow": TensorflowOptimizers, - "pytorch": PyTorchOptimizers, - "pytorch_fx": PyTorchOptimizers} + +framework_optimizers = { + "tensorflow": TensorflowOptimizers, + "pytorch": PyTorchOptimizers, + "pytorch_fx": PyTorchOptimizers, +} # user/model specific optimizers will be registered here TENSORFLOW_OPTIMIZERS = {} -PYTORCH_OPTIMIZERS= {} +PYTORCH_OPTIMIZERS = {} + +registry_optimizers = { + "tensorflow": TENSORFLOW_OPTIMIZERS, + "pytorch": PYTORCH_OPTIMIZERS, + "pytorch_fx": PYTORCH_OPTIMIZERS, +} -registry_optimizers = {"tensorflow": TENSORFLOW_OPTIMIZERS, - "pytorch": PYTORCH_OPTIMIZERS, - "pytorch_fx": PYTORCH_OPTIMIZERS} class Optimizers(object): """Main entry to get the specific type of optimizer.""" def __init__(self, framework): """Initialize `Optimizers` class.""" - assert framework in ("tensorflow", "pytorch", "pytorch_fx"), \ - "framework support tensorflow pytorch" + assert framework in ("tensorflow", "pytorch", "pytorch_fx"), "framework support tensorflow pytorch" self.optimizers = framework_optimizers[framework]().optimizers def __getitem__(self, optimizer_type): """Return the specific type of optimizer object according to the given optimizer_type.""" - assert optimizer_type in self.optimizers.keys(), "only support optimizers in {}".\ - format(self.optimizers.keys()) + assert optimizer_type in self.optimizers.keys(), "only support optimizers in {}".format(self.optimizers.keys()) return self.optimizers[optimizer_type] def register(self, name, optimizer_cls): """Allow registration of non-built-in optimizers.""" - assert name not in self.optimizers.keys(), 'registered optimizer name already exists.' + assert name not in self.optimizers.keys(), "registered optimizer name already exists." self.optimizers.update({name: optimizer_cls}) + def optimizer_registry(optimizer_type, framework): """Class decorator used to register all Optimizer subclasses. @@ -87,19 +95,20 @@ def optimizer_registry(optimizer_type, framework): Returns: cls: The class of register. """ + def decorator_optimizer(cls): - for fw in [fwk.strip() for fwk in framework.split(',')]: - assert fw in [ - "tensorflow", - "pytorch"], "The framework support tensorflow pytorch" + for fw in [fwk.strip() for fwk in framework.split(",")]: + assert fw in ["tensorflow", "pytorch"], "The framework support tensorflow pytorch" - if optimizer_type in registry_optimizers[fw ].keys(): - raise ValueError('Cannot have two optimizers with the same name') + if optimizer_type in registry_optimizers[fw].keys(): + raise ValueError("Cannot have two optimizers with the same name") registry_optimizers[fw][optimizer_type] = cls return cls + return decorator_optimizer -@optimizer_registry('SGD', 'tensorflow') + +@optimizer_registry("SGD", "tensorflow") class TensorFlowSGD(object): """TensorFlow keras SGD optimizer. @@ -109,24 +118,23 @@ class TensorFlowSGD(object): def __init__(self, param_dict): """Initialize `TensorFlowSGD` class.""" - assert isinstance(param_dict, dict), 'This optimizer constructor parameter must be a dict' + assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" self._param_dict = param_dict def _mapping(self): - _param_map = {'learning_rate': 'learning_rate', - 'momentum': 'momentum', - 'nesterov': 'nesterov'} + _param_map = {"learning_rate": "learning_rate", "momentum": "momentum", "nesterov": "nesterov"} _dict = {} for key in self._param_dict: if key in _param_map: - _dict.update({_param_map[key] : self._param_dict[key]}) + _dict.update({_param_map[key]: self._param_dict[key]}) return _dict def __call__(self, **kwargs): """Call `TensorFlowSGD` object.""" return tf.keras.optimizers.SGD, self._mapping(**kwargs) -@optimizer_registry('AdamW', 'tensorflow') + +@optimizer_registry("AdamW", "tensorflow") class TensorFlowAdamW(object): """tensorflow_addons AdamW optimizer. @@ -136,29 +144,32 @@ class TensorFlowAdamW(object): def __init__(self, param_dict): """Initialize `TensorFlowAdamW` class.""" - assert isinstance(param_dict, dict), 'This optimizer constructor parameter must be a dict' + assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" self._param_dict = param_dict def _mapping(self): - _param_map = {'learning_rate': 'learning_rate', - 'weight_decay': 'weight_decay', - 'beta_1': 'beta_1', - 'beta_2': 'beta_2', - 'epsilon': 'epsilon', - 'amsgrad': 'amsgrad'} + _param_map = { + "learning_rate": "learning_rate", + "weight_decay": "weight_decay", + "beta_1": "beta_1", + "beta_2": "beta_2", + "epsilon": "epsilon", + "amsgrad": "amsgrad", + } _dict = {} for key in self._param_dict: if key in _param_map: - _dict.update({_param_map[key] : self._param_dict[key]}) + _dict.update({_param_map[key]: self._param_dict[key]}) return _dict def __call__(self, **kwargs): """Call `TensorFlowAdamW` object.""" return tfa.optimizers.AdamW, self._mapping(**kwargs) -@optimizer_registry('Adam', 'tensorflow') + +@optimizer_registry("Adam", "tensorflow") class TensorFlowAdam(object): - """tensorflow Adam optimizer. + """Tensorflow Adam optimizer. Args: param_dict (dict): The dict of parameters setting by user for Adam optimizer @@ -166,26 +177,29 @@ class TensorFlowAdam(object): def __init__(self, param_dict): """Initialize `TensorFlowAdam` class.""" - assert isinstance(param_dict, dict), 'This optimizer constructor parameter must be a dict' + assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" self._param_dict = param_dict def _mapping(self): - _param_map = {'learning_rate': 'learning_rate', - 'beta_1': 'beta_1', - 'beta_2': 'beta_2', - 'epsilon': 'epsilon', - 'amsgrad': 'amsgrad'} + _param_map = { + "learning_rate": "learning_rate", + "beta_1": "beta_1", + "beta_2": "beta_2", + "epsilon": "epsilon", + "amsgrad": "amsgrad", + } _dict = {} for key in self._param_dict: if key in _param_map: - _dict.update({_param_map[key] : self._param_dict[key]}) + _dict.update({_param_map[key]: self._param_dict[key]}) return _dict def __call__(self, **kwargs): """Call `TensorFlowAdam` object.""" return tf.keras.optimizers.Adam, self._mapping(**kwargs) -@optimizer_registry('SGD', 'pytorch') + +@optimizer_registry("SGD", "pytorch") class PyTorchSGD(object): """PyTorch SGD optimizer. @@ -195,18 +209,20 @@ class PyTorchSGD(object): def __init__(self, param_dict): """Initialize `PyTorchSGD` class.""" - assert isinstance(param_dict, dict), 'This optimizer constructor parameter must be a dict' + assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" self._param_dict = param_dict def _mapping(self): - _param_map = {'learning_rate': 'lr', - 'momentum': 'momentum', - 'nesterov': 'nesterov', - 'weight_decay': 'weight_decay'} + _param_map = { + "learning_rate": "lr", + "momentum": "momentum", + "nesterov": "nesterov", + "weight_decay": "weight_decay", + } _dict = {} for key in self._param_dict: if key in _param_map: - _dict.update({_param_map[key] : self._param_dict[key]}) + _dict.update({_param_map[key]: self._param_dict[key]}) return _dict def __call__(self, **kwargs): diff --git a/neural_compressor/experimental/common/postprocess.py b/neural_compressor/experimental/common/postprocess.py index 605417a73ab..3521df4fb29 100644 --- a/neural_compressor/experimental/common/postprocess.py +++ b/neural_compressor/experimental/common/postprocess.py @@ -16,11 +16,12 @@ # limitations under the License. """Common Postprocess.""" + class Postprocess(object): -# class Transform(object): + # class Transform(object): """Just collect the infos to construct a Postprocess.""" - def __init__(self, postprocess_cls, name='user_postprocess', **kwargs): + def __init__(self, postprocess_cls, name="user_postprocess", **kwargs): """Initialize `Postprocess` class.""" self.postprocess_cls = postprocess_cls self.name = name diff --git a/neural_compressor/experimental/common/torch_utils.py b/neural_compressor/experimental/common/torch_utils.py index 3f33e41276c..3042a564951 100644 --- a/neural_compressor/experimental/common/torch_utils.py +++ b/neural_compressor/experimental/common/torch_utils.py @@ -14,12 +14,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """This is an utility file for PyTorch distillation.""" from neural_compressor.utils.utility import LazyImport -torch = LazyImport('torch') +torch = LazyImport("torch") STUDENT_FEATURES = {} TEACHER_FEATURES = {} @@ -33,7 +32,7 @@ def record_output(output, name, output_process, student=False): It is a help function. """ recorded_output = output - if output_process != '': + if output_process != "": if isinstance(output, dict) and output_process in output: recorded_output = output[output_process] elif isinstance(output, (tuple, list)) and str.isnumeric(output_process): @@ -41,10 +40,12 @@ def record_output(output, name, output_process, student=False): elif callable(output_process): recorded_output = output_process(output) else: - raise NotImplementedError('Current only support get the data with ' + \ - 'integer index in case the output is tuple or list and only ' + \ - 'need one item or with key in case the output is dict, ' + \ - 'or output_process is a function.') + raise NotImplementedError( + "Current only support get the data with " + + "integer index in case the output is tuple or list and only " + + "need one item or with key in case the output is dict, " + + "or output_process is a function." + ) if student: STUDENT_FEATURES[name].append(recorded_output) else: @@ -52,11 +53,13 @@ def record_output(output, name, output_process, student=False): return output -def get_activation(name, output_process='', student=False): +def get_activation(name, output_process="", student=False): """Get a hook for getting activation.""" + def hook(model, input, output): if model.training or not student: return record_output(output, name, output_process, student=student) else: return output + return hook diff --git a/neural_compressor/experimental/component.py b/neural_compressor/experimental/component.py index 47fcf05c2a0..6a62eab213f 100644 --- a/neural_compressor/experimental/component.py +++ b/neural_compressor/experimental/component.py @@ -14,20 +14,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """This is a module for Component class. The Component class will be inherited by the class 'Quantization', 'Pruning' and 'Distillation'. """ +import importlib + +from deprecated import deprecated + +from ..adaptor import FRAMEWORKS +from ..model.model import get_model_fwk_name from ..utils import logger +from ..utils.create_obj_from_config import create_dataloader, create_eval_func, create_train_func from ..utils.utility import required_libs -from ..utils.create_obj_from_config import create_dataloader, create_train_func, create_eval_func from .common import Model -from ..adaptor import FRAMEWORKS -from ..model.model import get_model_fwk_name -import importlib -from deprecated import deprecated class Component(object): @@ -59,49 +60,52 @@ def __init__(self, conf_fname_or_obj=None, combination=None): self.adaptor = None self._metric = None self.hooks = { - 'on_train_begin': self.on_train_begin, - 'on_train_end': self.on_train_end, - 'on_epoch_begin': self.on_epoch_begin, - 'on_epoch_end': self.on_epoch_end, - 'on_step_begin': self.on_step_begin, - 'on_step_end': self.on_step_end, - 'on_after_compute_loss': self.on_after_compute_loss, - 'on_before_optimizer_step': self.on_before_optimizer_step, - 'on_after_optimizer_step': self.on_after_optimizer_step, - 'on_before_eval': self.on_before_eval, - 'on_after_eval': self.on_after_eval + "on_train_begin": self.on_train_begin, + "on_train_end": self.on_train_end, + "on_epoch_begin": self.on_epoch_begin, + "on_epoch_end": self.on_epoch_end, + "on_step_begin": self.on_step_begin, + "on_step_end": self.on_step_end, + "on_after_compute_loss": self.on_after_compute_loss, + "on_before_optimizer_step": self.on_before_optimizer_step, + "on_after_optimizer_step": self.on_after_optimizer_step, + "on_before_eval": self.on_before_eval, + "on_after_eval": self.on_after_eval, } self.hooks_dict = { - 'on_train_begin': [], - 'on_train_end': [], - 'on_epoch_begin': [], - 'on_epoch_end': [], - 'on_step_begin': [], - 'on_step_end': [], - 'on_after_compute_loss': [], - 'on_before_optimizer_step': [], - 'on_after_optimizer_step': [], - 'on_before_eval': [], - 'on_after_eval': [] + "on_train_begin": [], + "on_train_end": [], + "on_epoch_begin": [], + "on_epoch_end": [], + "on_step_begin": [], + "on_step_end": [], + "on_after_compute_loss": [], + "on_before_optimizer_step": [], + "on_after_optimizer_step": [], + "on_before_eval": [], + "on_after_eval": [], } if conf_fname_or_obj is not None: # pragma: no cover from ..conf.config import Conf + if isinstance(conf_fname_or_obj, str): self.conf = Conf(conf_fname_or_obj) elif isinstance(conf_fname_or_obj, Conf): self.conf = conf_fname_or_obj else: - assert False, \ - "Please pass a YAML configuration file path and name or \ + assert ( + False + ), "Please pass a YAML configuration file path and name or \ Conf class to Component" self._init_with_conf() def _init_with_conf(self): """Initialize some attributers.""" self.cfg = self.conf.usr_cfg - if self.cfg.model.framework != 'NA': + if self.cfg.model.framework != "NA": self.framework = self.cfg.model.framework.lower() from neural_compressor.experimental.common.model import set_backend + set_backend(self.framework) if self.framework in required_libs: for lib in required_libs[self.framework]: @@ -109,49 +113,52 @@ def _init_with_conf(self): importlib.import_module(lib) except Exception as e: logger.error("{}.".format(e)) - raise RuntimeError("{} is not correctly installed. " \ - "Please check your environment".format(lib)) + raise RuntimeError( + "{} is not correctly installed. " "Please check your environment".format(lib) + ) def prepare(self): """Register Quantization Aware Training hooks.""" - if self.combination is not None and 'Quantization' in self.combination: + if self.combination is not None and "Quantization" in self.combination: if self.adaptor is None: - framework_specific_info = {'device': self.cfg.device, - 'approach': "post_training_static_quant", - 'random_seed': self.cfg.tuning.random_seed, - 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None} + framework_specific_info = { + "device": self.cfg.device, + "approach": "post_training_static_quant", + "random_seed": self.cfg.tuning.random_seed, + "workspace_path": self.cfg.tuning.workspace.path, + "q_dataloader": None, + } if self.cfg.quantization.approach is not None: - framework_specific_info['approach'] = self.cfg.quantization.approach + framework_specific_info["approach"] = self.cfg.quantization.approach - if 'tensorflow' in self.framework: - framework_specific_info.update( - {"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) + if "tensorflow" in self.framework: + framework_specific_info.update({"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) self.adaptor.model = self.model - self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat) - self.register_hook('on_train_end', self.adaptor._post_hook_for_qat) + self.register_hook("on_train_begin", self.adaptor._pre_hook_for_qat) + self.register_hook("on_train_end", self.adaptor._post_hook_for_qat) def prepare_qat(self): """Register Quantization Aware Training hooks.""" if self.adaptor is None: - framework_specific_info = {'device': self.cfg.device, - 'approach': "quant_aware_training", - 'random_seed': self.cfg.tuning.random_seed, - 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None, - 'backend': self.cfg.model.get('backend', 'default'), - 'format': self.cfg.model.get('quant_format', 'default')} + framework_specific_info = { + "device": self.cfg.device, + "approach": "quant_aware_training", + "random_seed": self.cfg.tuning.random_seed, + "workspace_path": self.cfg.tuning.workspace.path, + "q_dataloader": None, + "backend": self.cfg.model.get("backend", "default"), + "format": self.cfg.model.get("quant_format", "default"), + } if self.cfg.quantization.approach is not None: - framework_specific_info['approach'] = self.cfg.quantization.approach + framework_specific_info["approach"] = self.cfg.quantization.approach - if 'tensorflow' in self.framework: - framework_specific_info.update( - {"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) + if "tensorflow" in self.framework: + framework_specific_info.update({"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) self.adaptor.model = self.model - self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat) - self.register_hook('on_train_end', self.adaptor._post_hook_for_qat) + self.register_hook("on_train_begin", self.adaptor._pre_hook_for_qat) + self.register_hook("on_train_end", self.adaptor._post_hook_for_qat) def pre_process(self): """Initialize some attributes, such as the adaptor, the dataloader and train/eval functions from yaml config. @@ -162,16 +169,17 @@ def pre_process(self): """ if self.adaptor is None: # create adaptor - framework_specific_info = {'device': self.cfg.device, - 'random_seed': self.cfg.tuning.random_seed, - 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None} + framework_specific_info = { + "device": self.cfg.device, + "random_seed": self.cfg.tuning.random_seed, + "workspace_path": self.cfg.tuning.workspace.path, + "q_dataloader": None, + } if self.cfg.quantization.approach is not None: - framework_specific_info['approach'] = self.cfg.quantization.approach + framework_specific_info["approach"] = self.cfg.quantization.approach - if 'tensorflow' in self.framework: - framework_specific_info.update( - {"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) + if "tensorflow" in self.framework: + framework_specific_info.update({"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) self.adaptor.model = self.model @@ -179,41 +187,43 @@ def pre_process(self): # create dataloaders if self._train_dataloader is None and self._train_func is None: train_dataloader_cfg = self.cfg.train.dataloader - assert train_dataloader_cfg is not None, \ - 'No training dataloader setting in current component. Please check ' \ - 'dataloader field of train field in yaml file. Or manually pass ' \ - 'dataloader to component.' + assert train_dataloader_cfg is not None, ( + "No training dataloader setting in current component. Please check " + "dataloader field of train field in yaml file. Or manually pass " + "dataloader to component." + ) self._train_dataloader = create_dataloader(self.framework, train_dataloader_cfg) if self._eval_dataloader is None and self._eval_func is None: if self._eval_dataloader is None: eval_dataloader_cfg = self.cfg.evaluation.accuracy.dataloader - assert eval_dataloader_cfg is not None, \ - 'No evaluation dataloader setting in current component. Please check ' \ - 'dataloader field of evaluation field in yaml file. Or manually pass ' \ - 'dataloader to component.' + assert eval_dataloader_cfg is not None, ( + "No evaluation dataloader setting in current component. Please check " + "dataloader field of evaluation field in yaml file. Or manually pass " + "dataloader to component." + ) self._eval_dataloader = create_dataloader(self.framework, eval_dataloader_cfg) # create functions if self._train_func is None: - self._train_func = create_train_func(self.framework, - self._train_dataloader, - self.adaptor, - self.cfg.train, - hooks=self.hooks) + self._train_func = create_train_func( + self.framework, self._train_dataloader, self.adaptor, self.cfg.train, hooks=self.hooks + ) if self._eval_func is None: metric = [self._metric] if self._metric else self.cfg.evaluation.accuracy.metric - self._eval_func = create_eval_func(self.framework, - self._eval_dataloader, - self.adaptor, - metric, - self.cfg.evaluation.accuracy.postprocess, - fp32_baseline=False) + self._eval_func = create_eval_func( + self.framework, + self._eval_dataloader, + self.adaptor, + metric, + self.cfg.evaluation.accuracy.postprocess, + fp32_baseline=False, + ) self.prepare() # strategy will be considered in future - if getattr(self.train_dataloader, 'distributed', False): - self.register_hook('on_train_begin', self.adaptor._pre_hook_for_hvd) + if getattr(self.train_dataloader, "distributed", False): + self.register_hook("on_train_begin", self.adaptor._pre_hook_for_hvd) def execute(self): """Execute the processing of this compressor. @@ -223,15 +233,15 @@ def execute(self): """ # TODO: consider strategy sync during combination if self._train_func is not None: - modified_model = self._train_func(self._model \ - if getattr(self._train_func, 'builtin', None) else self._model.model) + modified_model = self._train_func( + self._model if getattr(self._train_func, "builtin", None) else self._model.model + ) # for the cases that model is changed not inplaced during training, for example, # oneshot with torch_fx QAT interfaces. Needs to reset model afterwards. if modified_model is not None: self._model.model = modified_model if self._eval_func is not None: - score = self._eval_func(self._model \ - if getattr(self._eval_func, 'builtin', None) else self._model.model) + score = self._eval_func(self._model if getattr(self._eval_func, "builtin", None) else self._model.model) logger.info("Evaluated model score is {}.".format(str(score))) return self._model @@ -244,39 +254,39 @@ def post_process(self): def on_train_begin(self, dataloader=None): """Be called before the beginning of epochs.""" - for on_train_begin_hook in self.hooks_dict['on_train_begin']: + for on_train_begin_hook in self.hooks_dict["on_train_begin"]: on_train_begin_hook(dataloader) def on_train_end(self): """Be called after the end of epochs.""" - for on_train_end_hook in self.hooks_dict['on_train_end']: + for on_train_end_hook in self.hooks_dict["on_train_end"]: on_train_end_hook() - @deprecated(version='2.0', reason="please use `on_train_begin` instead") + @deprecated(version="2.0", reason="please use `on_train_begin` instead") def pre_epoch_begin(self, dataloader=None): """Be called before the beginning of epochs.""" - for on_train_begin_hook in self.hooks_dict['on_train_begin']: + for on_train_begin_hook in self.hooks_dict["on_train_begin"]: on_train_begin_hook(dataloader) - @deprecated(version='2.0', reason="please use `on_train_end` instead") + @deprecated(version="2.0", reason="please use `on_train_end` instead") def post_epoch_end(self): """Be called after the end of epochs.""" - for on_train_end_hook in self.hooks_dict['on_train_end']: + for on_train_end_hook in self.hooks_dict["on_train_end"]: on_train_end_hook() def on_epoch_begin(self, epoch): """Be called on the beginning of epochs.""" - for on_epoch_begin_hook in self.hooks_dict['on_epoch_begin']: + for on_epoch_begin_hook in self.hooks_dict["on_epoch_begin"]: on_epoch_begin_hook(epoch) def on_step_begin(self, batch_id): """Be called on the beginning of batches.""" res_list = [] - for on_step_begin_hook in self.hooks_dict['on_step_begin']: + for on_step_begin_hook in self.hooks_dict["on_step_begin"]: res_list.append(on_step_begin_hook(batch_id)) return res_list - @deprecated(version='2.0', reason="please use `on_step_begin` instead") + @deprecated(version="2.0", reason="please use `on_step_begin` instead") def on_batch_begin(self, batch_id): """Be called on the beginning of batches.""" return self.on_step_begin(batch_id) @@ -284,31 +294,31 @@ def on_batch_begin(self, batch_id): def on_after_compute_loss(self, input, student_output, student_loss, teacher_output=None): """Be called on the end of loss computation.""" loss = student_loss - for on_after_compute_loss_hook in self.hooks_dict['on_after_compute_loss']: + for on_after_compute_loss_hook in self.hooks_dict["on_after_compute_loss"]: loss = on_after_compute_loss_hook(input, student_output, loss, teacher_output) return loss def on_before_optimizer_step(self): """Be called before optimizer step.""" - for on_before_optimizer_step_hook in self.hooks_dict['on_before_optimizer_step']: + for on_before_optimizer_step_hook in self.hooks_dict["on_before_optimizer_step"]: on_before_optimizer_step_hook() def on_after_optimizer_step(self): """Be called after optimizer step.""" - for on_after_optimizer_step_hook in self.hooks_dict['on_after_optimizer_step']: + for on_after_optimizer_step_hook in self.hooks_dict["on_after_optimizer_step"]: on_after_optimizer_step_hook() def on_before_eval(self): """Be called before evaluation.""" - for on_before_eval_hook in self.hooks_dict['on_before_eval']: + for on_before_eval_hook in self.hooks_dict["on_before_eval"]: on_before_eval_hook() def on_after_eval(self): """Be called after evaluation.""" - for on_after_eval_hook in self.hooks_dict['on_after_eval']: + for on_after_eval_hook in self.hooks_dict["on_after_eval"]: on_after_eval_hook() - @deprecated(version='2.0', reason="please use `on_before_optimizer_step` instead") + @deprecated(version="2.0", reason="please use `on_before_optimizer_step` instead") def on_post_grad(self): """Be called before optimizer step.""" return self.on_before_optimizer_step() @@ -316,11 +326,11 @@ def on_post_grad(self): def on_step_end(self): """Be called on the end of batches.""" res_list = [] - for on_step_end_hook in self.hooks_dict['on_step_end']: + for on_step_end_hook in self.hooks_dict["on_step_end"]: res_list.append(on_step_end_hook()) return res_list - @deprecated(version='2.0', reason="please use `on_step_end` instead") + @deprecated(version="2.0", reason="please use `on_step_end` instead") def on_batch_end(self): """Be called on the end of batches.""" return self.on_step_end() @@ -329,7 +339,7 @@ def on_epoch_end(self): """Be called on the end of epochs.""" res_list = [] - for on_epoch_end_hook in self.hooks_dict['on_epoch_end']: + for on_epoch_end_hook in self.hooks_dict["on_epoch_end"]: res_list.append(on_epoch_end_hook()) return res_list @@ -355,14 +365,14 @@ def __call__(self): def __repr__(self): """Represent this class.""" if self.combination: - return 'Combination of ' + ','.join(self.combination) + return "Combination of " + ",".join(self.combination) else: - return 'Base Component' + return "Base Component" @property def train_func(self): """Not support get train_func.""" - assert False, 'Should not try to get the value of `train_func` attribute.' + assert False, "Should not try to get the value of `train_func` attribute." return None @train_func.setter @@ -383,7 +393,7 @@ def train_func(self, user_train_func): @property def eval_func(self): """Not support get eval_func.""" - assert False, 'Should not try to get the value of `eval_func` attribute.' + assert False, "Should not try to get the value of `eval_func` attribute." return None @eval_func.setter @@ -433,8 +443,8 @@ def train_dataloader(self, dataloader): from neural_compressor.experimental.common.DataLoader. """ from .common import _generate_common_dataloader - self._train_dataloader = _generate_common_dataloader( - dataloader, self.framework, self._train_distributed) + + self._train_dataloader = _generate_common_dataloader(dataloader, self.framework, self._train_distributed) @property def eval_dataloader(self): @@ -468,8 +478,8 @@ def eval_dataloader(self, dataloader): from neural_compressor.experimental.common.DataLoader. """ from .common import _generate_common_dataloader - self._eval_dataloader = _generate_common_dataloader( - dataloader, self.framework, self._evaluation_distributed) + + self._eval_dataloader = _generate_common_dataloader(dataloader, self.framework, self._evaluation_distributed) @property def model(self): @@ -492,17 +502,19 @@ def model(self, user_model): Another corner case is slim model of tensorflow, be careful of the name of model configured in yaml file, make sure the name is in supported slim model list. - """ from ..model import BaseModel - if self.cfg.model.framework == 'NA': - assert not isinstance(user_model, BaseModel), \ - "Please pass an original framework model but not neural compressor model!" + + if self.cfg.model.framework == "NA": + assert not isinstance( + user_model, BaseModel + ), "Please pass an original framework model but not neural compressor model!" self.framework = get_model_fwk_name(user_model) if self.framework == "tensorflow": from ..model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and self.cfg.model.backend == 'itex': - self.framework = 'keras' + + if get_model_type(user_model) == "keras" and self.cfg.model.backend == "itex": + self.framework = "keras" if self.framework == "pytorch": if self.cfg.model.backend == "default": self.framework = "pytorch_fx" @@ -520,13 +532,14 @@ def model(self, user_model): # It is config of neural_compressor version < 2.0, no need in 2.0 if self.cfg.model.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel + if not isinstance(user_model, IPEXModel): self._model = Model(user_model.model, framework=self.cfg.model.framework) return self._model = user_model - if 'tensorflow' in self.framework: + if "tensorflow" in self.framework: self._model.name = self.cfg.model.name self._model.output_tensor_names = self.cfg.model.outputs self._model.input_tensor_names = self.cfg.model.inputs diff --git a/neural_compressor/experimental/contrib/__init__.py b/neural_compressor/experimental/contrib/__init__.py index c2b506951b7..a3ff38fab97 100644 --- a/neural_compressor/experimental/contrib/__init__.py +++ b/neural_compressor/experimental/contrib/__init__.py @@ -14,6 +14,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in strategy for multiple framework backends.""" -from .strategy import * \ No newline at end of file +from .strategy import * diff --git a/neural_compressor/experimental/contrib/strategy/__init__.py b/neural_compressor/experimental/contrib/strategy/__init__.py index 807ff72c28d..abfb7c7bb67 100644 --- a/neural_compressor/experimental/contrib/strategy/__init__.py +++ b/neural_compressor/experimental/contrib/strategy/__init__.py @@ -14,13 +14,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in strategy for multiple framework backends.""" from os.path import dirname, basename, isfile, join import glob modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) - diff --git a/neural_compressor/experimental/contrib/strategy/sigopt.py b/neural_compressor/experimental/contrib/strategy/sigopt.py index e6342d975ed..9178453783d 100644 --- a/neural_compressor/experimental/contrib/strategy/sigopt.py +++ b/neural_compressor/experimental/contrib/strategy/sigopt.py @@ -14,17 +14,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The SigOpt Tuning Strategy provides support for the quantization process.""" import copy -from neural_compressor.utils import logger -from neural_compressor.utils.utility import LazyImport -from neural_compressor.experimental.strategy.strategy import strategy_registry, TuneStrategy from collections import OrderedDict + +from neural_compressor.experimental.strategy.strategy import TuneStrategy, strategy_registry from neural_compressor.experimental.strategy.utils.tuning_sampler import OpWiseTuningSampler from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport + +sigopt = LazyImport("sigopt") -sigopt = LazyImport('sigopt') @strategy_registry class SigOptTuneStrategy(TuneStrategy): @@ -69,21 +70,13 @@ def eval_func(model): return accuracy dicts (dict, optional): The dict containing resume information. Defaults to None. - """ - def __init__(self, model, conf, q_dataloader, q_func=None, - eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + def __init__( + self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None + ): """Initialize the SigOpt tuning strategy if the user specified to use it.""" - super().__init__( - model, - conf, - q_dataloader, - q_func, - eval_dataloader, - eval_func, - dicts, - q_hooks) + super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, eval_func, dicts, q_hooks) strategy_name = conf.usr_cfg.tuning.strategy.name if strategy_name.lower() == "sigopt": try: @@ -92,8 +85,9 @@ def __init__(self, model, conf, q_dataloader, q_func=None, try: import subprocess import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", "sigopt"]) - import sigopt # pylint: disable=import-error + import sigopt # pylint: disable=import-error except: assert False, "Unable to import sigopt from the local environment." else: @@ -103,26 +97,32 @@ def __init__(self, model, conf, q_dataloader, q_func=None, self.project_id = conf.usr_cfg.tuning.strategy.sigopt_project_id self.experiment_name = conf.usr_cfg.tuning.strategy.sigopt_experiment_name try: - assert client_token != None - except(AssertionError): - logger.error("`sigopt_api_token` field in yaml file is required. " \ - "Please refer to details in /docs/sigopt_strategy.md.") + assert client_token is not None + except AssertionError: + logger.error( + "`sigopt_api_token` field in yaml file is required. " + "Please refer to details in /docs/sigopt_strategy.md." + ) exit(0) try: - assert self.project_id != None - logger.warning('Project id is {}, ' \ - 'Please check whether it is created in the sigopt account.'\ - .format(self.project_id)) - except(AssertionError): - logger.error("`sigopt_project_id` field in yaml file is required. " \ - "Please refer to details in /docs/sigopt_strategy.md.") + assert self.project_id is not None + logger.warning( + "Project id is {}, " "Please check whether it is created in the sigopt account.".format(self.project_id) + ) + except AssertionError: + logger.error( + "`sigopt_project_id` field in yaml file is required. " + "Please refer to details in /docs/sigopt_strategy.md." + ) exit(0) - if self.experiment_name == 'nc-tune': - logger.info("Default experiment name `nc-tune` is used, " \ - "Please refer to details in /docs/sigopt_strategy.md " \ - "if user wants to modify it.") + if self.experiment_name == "nc-tune": + logger.info( + "Default experiment name `nc-tune` is used, " + "Please refer to details in /docs/sigopt_strategy.md " + "if user wants to modify it." + ) else: - logger.info("Experiment name is {}.".format(self.experiment_name)) + logger.info("Experiment name is {}.".format(self.experiment_name)) self.conn = sigopt.Connection(client_token) self.experiment = None @@ -130,14 +130,14 @@ def __init__(self, model, conf, q_dataloader, q_func=None, def params_to_tune_configs(self, params): """Get the parameters of the tuning strategy.""" op_tuning_cfg = {} - calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name("calib_sampling_size").options for op_name_type, configs in self.op_configs.items(): if len(configs) == 1: op_tuning_cfg[op_name_type] = configs[0] else: op_tuning_cfg[op_name_type] = configs[min(len(configs) - 1, int(params[op_name_type[0]]))] - calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params['calib_sampling_size']))] - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params["calib_sampling_size"]))] + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size return op_tuning_cfg def next_tune_cfg(self): @@ -146,19 +146,19 @@ def next_tune_cfg(self): suggestion = self.conn.experiments(self.experiment.id).suggestions().create() yield self.params_to_tune_configs(suggestion.assignments) values = [ - dict(name='accuracy', value=self.last_tune_result[0]), - dict(name='latency', value=self.last_tune_result[1]) + dict(name="accuracy", value=self.last_tune_result[0]), + dict(name="latency", value=self.last_tune_result[1]), ] - obs = self.conn.experiments(self.experiment.id).observations().create( - suggestion=suggestion.id, values=values) - logger.debug("`suggestion_id` is {}, `observation_id` is {}.". - format(suggestion.id, obs.id)) + obs = ( + self.conn.experiments(self.experiment.id).observations().create(suggestion=suggestion.id, values=values) + ) + logger.debug("`suggestion_id` is {}, `observation_id` is {}.".format(suggestion.id, obs.id)) self.experiment = self.conn.experiments(self.experiment.id).fetch() def get_acc_target(self, base_acc): """Get the tuning target of the accuracy ceiterion.""" if self.cfg.tuning.accuracy_criterion.relative: - return base_acc * (1. - self.cfg.tuning.accuracy_criterion.relative) + return base_acc * (1.0 - self.cfg.tuning.accuracy_criterion.relative) else: return base_acc - self.cfg.tuning.accuracy_criterion.absolute @@ -169,28 +169,36 @@ def traverse(self): """ self._eval_baseline() - baseline_msg = '[Accuracy: {:.4f}'.format(self.baseline[0]) + \ - ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ - self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) + ']' \ - if self.baseline else 'n/a' + baseline_msg = ( + "[Accuracy: {:.4f}".format(self.baseline[0]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + if self.baseline + else "n/a" + ) logger.info("FP32 baseline is: {}".format(baseline_msg)) self.experiment = self.create_exp(acc_target=self.get_acc_target(self.baseline[0])) trials_count = 0 for tune_cfg in self.next_tune_cfg(): # add tune_cfg here as quantize use tune_cfg - tune_cfg['advance'] = self.cfg.quantization.advance + tune_cfg["advance"] = self.cfg.quantization.advance trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: - self.last_tune_result = tuning_history['last_tune_result'] - self.best_tune_result = tuning_history['best_tune_result'] + self.last_tune_result = tuning_history["last_tune_result"] + self.best_tune_result = tuning_history["best_tune_result"] logger.warn("Find evaluated tuning config, skip.") continue logger.debug("Dump current tuning configuration:") logger.debug(tune_cfg) - self.last_qmodel = self.adaptor.quantize( - tune_cfg, self.model, self.calib_dataloader, self.q_func) + self.last_qmodel = self.adaptor.quantize(tune_cfg, self.model, self.calib_dataloader, self.q_func) assert self.last_qmodel # Return the last quantized model as a result. if performance only. if self.cfg.tuning.exit_policy.performance_only: @@ -214,16 +222,17 @@ def create_exp(self, acc_target): """Set the config for the experiment.""" params = [] from copy import deepcopy + tuning_space = self.tuning_space initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options # step1. collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ["static", "dynamic", "bf16", "fp16", "fp32"] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -240,22 +249,20 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() for op, configs in self.op_configs.items(): if len(configs) > 1: - params.append(dict(name=op[0], type='int', - bounds=dict(min=0, max=len(configs) - 1))) - params.append(dict(name='calib_sampling_size', type='int', - bounds=dict(min=0, max=len(calib_sampling_size_lst) - 1))) + params.append(dict(name=op[0], type="int", bounds=dict(min=0, max=len(configs) - 1))) + params.append( + dict(name="calib_sampling_size", type="int", bounds=dict(min=0, max=len(calib_sampling_size_lst) - 1)) + ) experiment = self.conn.experiments().create( name=self.experiment_name, parameters=params, metrics=[ - dict(name='accuracy', objective='maximize', strategy='constraint', \ - threshold=acc_target), - dict(name='latency', objective='minimize', strategy='optimize'), + dict(name="accuracy", objective="maximize", strategy="constraint", threshold=acc_target), + dict(name="latency", objective="minimize", strategy="optimize"), ], parallel_bandwidth=1, # Define an Observation Budget for your experiment @@ -263,7 +270,6 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): project=self.project_id, ) - logger.debug("Create experiment at https://app.sigopt.com/experiment/{}". - format(experiment.id)) + logger.debug("Create experiment at https://app.sigopt.com/experiment/{}".format(experiment.id)) return experiment diff --git a/neural_compressor/experimental/contrib/strategy/tpe.py b/neural_compressor/experimental/contrib/strategy/tpe.py index 3f1735757bd..c336d2ab70e 100644 --- a/neural_compressor/experimental/contrib/strategy/tpe.py +++ b/neural_compressor/experimental/contrib/strategy/tpe.py @@ -14,21 +14,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Fefine the tuning strategy that uses tpe search in tuning space.""" import copy import os -from pathlib import Path +from collections import OrderedDict from functools import partial +from pathlib import Path + import numpy as np -from neural_compressor.utils import logger -from neural_compressor.utils.utility import LazyImport -from neural_compressor.experimental.strategy.strategy import strategy_registry, TuneStrategy -from collections import OrderedDict + +from neural_compressor.experimental.strategy.strategy import TuneStrategy, strategy_registry from neural_compressor.experimental.strategy.utils.tuning_sampler import OpWiseTuningSampler from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport -hyperopt = LazyImport('hyperopt') +hyperopt = LazyImport("hyperopt") try: import pandas as pd @@ -80,13 +81,15 @@ def eval_func(model): return accuracy dicts (dict, optional): The dict containing resume information. Defaults to None. - """ - def __init__(self, model, conf, q_dataloader, q_func=None, - eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + + def __init__( + self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None + ): """Initialize the tpe tuning strategy if the user specified to use it.""" - assert conf.usr_cfg.quantization.approach == 'post_training_static_quant', \ - "TPE strategy is only for post training static quantization!" + assert ( + conf.usr_cfg.quantization.approach == "post_training_static_quant" + ), "TPE strategy is only for post training static quantization!" """Initialize the tpe tuning strategy if the user specified to use it.""" strategy_name = conf.usr_cfg.tuning.strategy.name if strategy_name.lower() == "tpe": @@ -96,8 +99,9 @@ def __init__(self, model, conf, q_dataloader, q_func=None, try: import subprocess import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", "hyperopt"]) - import hyperopt # pylint: disable=import-error + import hyperopt # pylint: disable=import-error except: assert False, "Unable to import hyperopt from the local environment." else: @@ -106,36 +110,19 @@ def __init__(self, model, conf, q_dataloader, q_func=None, self.warm_start = False self.cfg_evaluated = False self.hpopt_trials = hyperopt.Trials() - self.max_trials = conf.usr_cfg.tuning.exit_policy.get('max_trials', 200) + self.max_trials = conf.usr_cfg.tuning.exit_policy.get("max_trials", 200) self.loss_function_config = { - 'acc_th': conf.usr_cfg.tuning.accuracy_criterion.relative if \ - conf.usr_cfg.tuning.accuracy_criterion and \ - conf.usr_cfg.tuning.accuracy_criterion.relative else 0.01, - 'acc_weight': conf.usr_cfg.tuning.strategy.get('accuracy_weight', 1.0), - 'lat_weight': conf.usr_cfg.tuning.strategy.get('latency_weight', 1.0) - } - self.tpe_params = { - 'n_initial_point': 10, - 'gamma': 0.3, - 'n_EI_candidates': 100, - 'prior_weight': 1.0 - } - self.best_result = { - 'best_loss': float('inf'), - 'best_acc_loss': float('inf'), - 'best_lat_diff': 0.0 + "acc_th": conf.usr_cfg.tuning.accuracy_criterion.relative + if conf.usr_cfg.tuning.accuracy_criterion and conf.usr_cfg.tuning.accuracy_criterion.relative + else 0.01, + "acc_weight": conf.usr_cfg.tuning.strategy.get("accuracy_weight", 1.0), + "lat_weight": conf.usr_cfg.tuning.strategy.get("latency_weight", 1.0), } + self.tpe_params = {"n_initial_point": 10, "gamma": 0.3, "n_EI_candidates": 100, "prior_weight": 1.0} + self.best_result = {"best_loss": float("inf"), "best_acc_loss": float("inf"), "best_lat_diff": 0.0} self._algo = None - super().__init__( - model, - conf, - q_dataloader, - q_func, - eval_dataloader, - eval_func, - dicts, - q_hooks) + super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, eval_func, dicts, q_hooks) def __getstate__(self): """Magic method for pickle saving. @@ -144,13 +131,13 @@ def __getstate__(self): dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.cfg): - history['warm_start'] = True - history['hpopt_trials'] = self.hpopt_trials - history['loss_function_config'] = self.loss_function_config - history['tpe_params'] = self.tpe_params - history['hpopt_search_space'] = self.hpopt_search_space - history['_algo'] = self._algo + if self._same_yaml(history["cfg"], self.cfg): + history["warm_start"] = True + history["hpopt_trials"] = self.hpopt_trials + history["loss_function_config"] = self.loss_function_config + history["tpe_params"] = self.tpe_params + history["hpopt_search_space"] = self.hpopt_search_space + history["_algo"] = self._algo save_dict = super().__getstate__() return save_dict @@ -160,44 +147,45 @@ def _configure_hpopt_search_space_and_params(self, search_space): for param, configs in search_space.items(): self.hpopt_search_space[(param)] = hyperopt.hp.choice((param[0]), configs) # Find minimum number of choices for params with more than one choice - multichoice_params = [len(configs) for param, configs in search_space.items() - if len(configs) > 1] + multichoice_params = [len(configs) for param, configs in search_space.items() if len(configs) > 1] if not multichoice_params: return False min_param_size = min(multichoice_params) if len(multichoice_params) > 0 else 1 - self.tpe_params['n_EI_candidates'] = min_param_size - self.tpe_params['prior_weight'] = 1 / min_param_size - self._algo = partial(hyperopt.tpe.suggest, - n_startup_jobs=self.tpe_params['n_initial_point'], - gamma=self.tpe_params['gamma'], - n_EI_candidates=self.tpe_params['n_EI_candidates'], - prior_weight=self.tpe_params['prior_weight']) + self.tpe_params["n_EI_candidates"] = min_param_size + self.tpe_params["prior_weight"] = 1 / min_param_size + self._algo = partial( + hyperopt.tpe.suggest, + n_startup_jobs=self.tpe_params["n_initial_point"], + gamma=self.tpe_params["gamma"], + n_EI_candidates=self.tpe_params["n_EI_candidates"], + prior_weight=self.tpe_params["prior_weight"], + ) return True def traverse(self): """Tpe traverse logic.""" logger.info("Start to run tpe strategy.") # prepare log file - trials_file = os.path.join(os.path.dirname(self.history_path), 'tpe_trials.csv') - best_result_file = os.path.join(os.path.dirname(self.history_path), 'tpe_best_result.csv') - logger.debug("trials_file: {} ".format(trials_file) + \ - "best_result_file: {}".format(best_result_file)) + trials_file = os.path.join(os.path.dirname(self.history_path), "tpe_trials.csv") + best_result_file = os.path.join(os.path.dirname(self.history_path), "tpe_best_result.csv") + logger.debug("trials_file: {} ".format(trials_file) + "best_result_file: {}".format(best_result_file)) if Path(trials_file).exists(): os.remove(trials_file) status = True tuning_history = self._find_self_tuning_history() from copy import deepcopy + tuning_space = self.tuning_space initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options # step1. collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ["static", "dynamic", "bf16", "fp16", "fp32"] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -212,48 +200,43 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_item_dtype_dict = OrderedDict() for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() self.opwise_tune_cfgs = {} for key, val in self.op_configs.items(): - self.opwise_tune_cfgs[key[0]] =val - self.opwise_tune_cfgs['calib_sampling_size'] = \ - self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + self.opwise_tune_cfgs[key[0]] = val + self.opwise_tune_cfgs["calib_sampling_size"] = self.tuning_space.root_item.get_option_by_name( + "calib_sampling_size" + ).options if tuning_history and not self.warm_start: # prepare loss function scaling (best result from basic can be used) best_lat, worse_acc_loss = 0, 0 - for history in tuning_history['history']: - acc_loss, lat_diff = self._calculate_acc_lat_diff( - history['tune_result'][0], - history['tune_result'][1]) + for history in tuning_history["history"]: + acc_loss, lat_diff = self._calculate_acc_lat_diff(history["tune_result"][0], history["tune_result"][1]) if lat_diff > best_lat: best_lat = lat_diff if acc_loss > worse_acc_loss: worse_acc_loss = acc_loss - self._calculate_loss_function_scaling_components( - worse_acc_loss, - best_lat, - self.loss_function_config) - first_run_cfg = self.add_loss_to_tuned_history_and_find_best(tuning_history['history']) + self._calculate_loss_function_scaling_components(worse_acc_loss, best_lat, self.loss_function_config) + first_run_cfg = self.add_loss_to_tuned_history_and_find_best(tuning_history["history"]) # Prepare hpopt config with best cfg from history self._configure_hpopt_search_space_and_params(first_run_cfg) # Run first iteration with best result from history trials_count = len(self.hpopt_trials.trials) + 1 - hyperopt.fmin(partial(self.object_evaluation, model=self.model), - space=self.hpopt_search_space, - algo=self._algo, - max_evals=trials_count, - trials=self.hpopt_trials, - show_progressbar=False) + hyperopt.fmin( + partial(self.object_evaluation, model=self.model), + space=self.hpopt_search_space, + algo=self._algo, + max_evals=trials_count, + trials=self.hpopt_trials, + show_progressbar=False, + ) if pd is not None: self._save_trials(trials_file) self._update_best_result(best_result_file) # Prepare full hpopt search space - new_tune_cfgs = self._prepare_final_searchspace( - first_run_cfg, - self.opwise_tune_cfgs) + new_tune_cfgs = self._prepare_final_searchspace(first_run_cfg, self.opwise_tune_cfgs) status = self._configure_hpopt_search_space_and_params(new_tune_cfgs) elif not self.warm_start: self._calculate_loss_function_scaling_components(0.01, 2, self.loss_function_config) @@ -267,27 +250,36 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): self.baseline = self._evaluate(self.model) self._add_tuning_history() - baseline_msg = '[Accuracy: {:.4f}'.format(self.baseline[0]) + \ - ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ - self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) \ - + ']' if self.baseline else 'n/a' + baseline_msg = ( + "[Accuracy: {:.4f}".format(self.baseline[0]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + if self.baseline + else "n/a" + ) logger.info("FP32 baseline is: {}".format(baseline_msg)) if not self.objectives.relative: - self.loss_function_config['acc_th'] =\ - (self.baseline[0] - self.objectives.acc_goal) / self.baseline[0] + self.loss_function_config["acc_th"] = (self.baseline[0] - self.objectives.acc_goal) / self.baseline[0] # start trials exit = False while not exit: self.cfg_evaluated = False - logger.debug("Trial iteration start: {} / {}.".format( - trials_count, self.max_trials)) - hyperopt.fmin(partial(self.object_evaluation, model=self.model), - space=self.hpopt_search_space, - algo=self._algo, - max_evals=trials_count, - trials=self.hpopt_trials, - show_progressbar=False) + logger.debug("Trial iteration start: {} / {}.".format(trials_count, self.max_trials)) + hyperopt.fmin( + partial(self.object_evaluation, model=self.model), + space=self.hpopt_search_space, + algo=self._algo, + max_evals=trials_count, + trials=self.hpopt_trials, + show_progressbar=False, + ) trials_count += 1 if pd is not None: self._save_trials(trials_file) @@ -315,62 +307,62 @@ def add_loss_to_tuned_history_and_find_best(self, tuning_history_list): first_run_cfg = None for history in tuning_history_list: result = self._compute_metrics( - history['tune_cfg']['op'], - history['tune_result'][0], - history['tune_result'][1]) - if best_loss is None or result['loss'] < best_loss: - best_loss = result['loss'] - first_run_cfg = history['tune_cfg']['op'].copy() - result['source'] = 'finetune' - history['result'] = result + history["tune_cfg"]["op"], history["tune_result"][0], history["tune_result"][1] + ) + if best_loss is None or result["loss"] < best_loss: + best_loss = result["loss"] + first_run_cfg = history["tune_cfg"]["op"].copy() + result["source"] = "finetune" + history["result"] = result logger.debug( - "Resumed iteration loss is {}, acc_loss is {}, lat_diff is {}, " \ - "quantization_ratio is {}.".format(result['loss'], - result['acc_loss'], - result['lat_diff'], - result['quantization_ratio'])) + "Resumed iteration loss is {}, acc_loss is {}, lat_diff is {}, " + "quantization_ratio is {}.".format( + result["loss"], result["acc_loss"], result["lat_diff"], result["quantization_ratio"] + ) + ) for op, cfg in first_run_cfg.items(): - first_run_cfg[op] = [cfg,] + first_run_cfg[op] = [ + cfg, + ] return first_run_cfg def object_evaluation(self, tune_cfg, model): """Check if config was alredy evaluated.""" for k, v in self.op_configs.items(): - tune_cfg.update({k : tune_cfg.pop(k[0])}) + tune_cfg.update({k: tune_cfg.pop(k[0])}) op_cfgs = self._tune_cfg_converter(tune_cfg) self.last_qmodel = self.adaptor.quantize(op_cfgs, self.model, self.calib_dataloader) self.last_tune_cfg = copy.deepcopy(tune_cfg) self.last_tune_result = self._evaluate(self.last_qmodel) - logger.info("The last tune result is {}.".format( - (self.last_tune_result[0], self.last_tune_result[1][0]))) + logger.info("The last tune result is {}.".format((self.last_tune_result[0], self.last_tune_result[1][0]))) saved_tune_cfg = copy.deepcopy(op_cfgs) saved_last_tune_result = copy.deepcopy(self.last_tune_result) # prepare result - result = self._compute_metrics( - op_cfgs['op'], - self.last_tune_result[0], - self.last_tune_result[1][0]) - result['source'] = 'tpe' + result = self._compute_metrics(op_cfgs["op"], self.last_tune_result[0], self.last_tune_result[1][0]) + result["source"] = "tpe" self._add_tuning_history(saved_tune_cfg, saved_last_tune_result, result=result) - logger.info("Current iteration loss is {}, acc_loss is {}, lat_diff is {}, " \ - "quantization_ratio is {}.".format(result['loss'], - result['acc_loss'], - result['lat_diff'], - result['quantization_ratio'])) + logger.info( + "Current iteration loss is {}, acc_loss is {}, lat_diff is {}, " + "quantization_ratio is {}.".format( + result["loss"], result["acc_loss"], result["lat_diff"], result["quantization_ratio"] + ) + ) return result def _compute_metrics(self, tune_cfg, acc, lat): - quantization_ratio = 1 - len([param for param in tune_cfg.values() - if param['activation']['dtype'] =='fp32']) / len(tune_cfg) + quantization_ratio = 1 - len( + [param for param in tune_cfg.values() if param["activation"]["dtype"] == "fp32"] + ) / len(tune_cfg) acc_diff, lat_diff = self._calculate_acc_lat_diff(acc, lat) return { - 'loss': self.calculate_loss(acc_diff, lat_diff, self.loss_function_config), - 'acc' : acc, - 'lat' : lat, - 'acc_loss': acc_diff, - 'lat_diff': lat_diff, - 'quantization_ratio': quantization_ratio, - 'status': hyperopt.STATUS_OK} + "loss": self.calculate_loss(acc_diff, lat_diff, self.loss_function_config), + "acc": acc, + "lat": lat, + "acc_loss": acc_diff, + "lat_diff": lat_diff, + "quantization_ratio": quantization_ratio, + "status": hyperopt.STATUS_OK, + } def _calculate_acc_lat_diff(self, acc, lat): int8_acc = acc @@ -386,13 +378,13 @@ def calculate_loss(self, acc_diff, lat_diff, config): gamma_penalty = 40 # penalty term acc_loss_component = self._calculate_acc_loss_component(acc_diff) lat_loss_component = self._calculate_lat_diff_component(lat_diff) - acc_weight = config['acc_weight'] if acc_diff > config['acc_th'] else 0.0 - if acc_weight == 0 and config['lat_weight'] == 0: + acc_weight = config["acc_weight"] if acc_diff > config["acc_th"] else 0.0 + if acc_weight == 0 and config["lat_weight"] == 0: acc_weight = 1.0 - loss = acc_weight * (config['acc_scale'] * (acc_loss_component - config['acc_min'])) \ - + config['lat_weight']\ - * (config['lat_scale'] * (lat_loss_component - config['lat_min'])) - if acc_diff > config['acc_th']: + loss = acc_weight * (config["acc_scale"] * (acc_loss_component - config["acc_min"])) + config["lat_weight"] * ( + config["lat_scale"] * (lat_loss_component - config["lat_min"]) + ) + if acc_diff > config["acc_th"]: loss += 2 * gamma_penalty return loss @@ -406,16 +398,16 @@ def _calculate_loss_function_scaling_components(self, acc_loss, lat_diff, config acc_min = self._calculate_acc_loss_component(0) acc_max = self._calculate_acc_loss_component(acc_loss) if acc_max == acc_min: - acc_max = self._calculate_acc_loss_component(config['acc_th']) - config['acc_min'] = acc_min - config['acc_scale'] = 10 / np.abs(acc_max - acc_min) + acc_max = self._calculate_acc_loss_component(config["acc_th"]) + config["acc_min"] = acc_min + config["acc_scale"] = 10 / np.abs(acc_max - acc_min) lat_min = self._calculate_lat_diff_component(lat_diff) lat_max = self._calculate_lat_diff_component(1) if lat_min == lat_max: lat_min = self._calculate_lat_diff_component(2) - config['lat_min'] = lat_min - config['lat_scale'] = 10 / np.abs(lat_max - lat_min) + config["lat_min"] = lat_min + config["lat_scale"] = 10 / np.abs(lat_max - lat_min) def _save_trials(self, trials_log): """Save the trial result to the log file.""" @@ -425,50 +417,56 @@ def _save_trials(self, trials_log): def _update_best_result(self, best_result_file): if not self.hpopt_trials: - raise Exception( - 'No trials loaded to get best result') + raise Exception("No trials loaded to get best result") trials_results = pd.DataFrame(self.hpopt_trials.results) - if not trials_results[trials_results.acc_loss <= - self.loss_function_config['acc_th']].empty: + if not trials_results[trials_results.acc_loss <= self.loss_function_config["acc_th"]].empty: # If accuracy threshold reached, choose best latency - best_result = trials_results[trials_results.acc_loss <= - self.loss_function_config['acc_th']] \ - .reset_index(drop=True).sort_values(by=['lat_diff', 'acc_loss'], - ascending=[False, True]) \ - .reset_index(drop=True).loc[0] + best_result = ( + trials_results[trials_results.acc_loss <= self.loss_function_config["acc_th"]] + .reset_index(drop=True) + .sort_values(by=["lat_diff", "acc_loss"], ascending=[False, True]) + .reset_index(drop=True) + .loc[0] + ) else: # If accuracy threshold is not reached, choose based on loss function - best_result = \ - trials_results.sort_values('loss', ascending=True).reset_index(drop=True).loc[0] + best_result = trials_results.sort_values("loss", ascending=True).reset_index(drop=True).loc[0] update_best_result = False - if not self.best_result['best_loss']: + if not self.best_result["best_loss"]: update_best_result = True - elif self.best_result['best_acc_loss'] <= self.loss_function_config['acc_th']: - if best_result['acc_loss'] <= self.loss_function_config['acc_th'] \ - and best_result['lat_diff'] > self.best_result['best_lat_diff']: + elif self.best_result["best_acc_loss"] <= self.loss_function_config["acc_th"]: + if ( + best_result["acc_loss"] <= self.loss_function_config["acc_th"] + and best_result["lat_diff"] > self.best_result["best_lat_diff"] + ): update_best_result = True else: - if best_result['acc_loss'] <= self.loss_function_config['acc_th'] or \ - best_result['loss'] < self.best_result['best_loss']: + if ( + best_result["acc_loss"] <= self.loss_function_config["acc_th"] + or best_result["loss"] < self.best_result["best_loss"] + ): update_best_result = True if update_best_result: best_result.to_csv(best_result_file, header=False) - self.best_result['best_loss'] = best_result['loss'] - self.best_result['best_acc_loss'] = best_result['acc_loss'] - self.best_result['best_lat_diff'] = best_result['lat_diff'] - self.best_result['quantization_ratio'] = best_result['quantization_ratio'] - - logger.info("Trial iteration end is {} / {}, best loss is {}, acc_loss is {}, " \ - "lat_diff is {}, quantization_ratio is {}.".format( - len(self.hpopt_trials.trials), - self.max_trials, - self.best_result['best_loss'], - self.best_result['best_acc_loss'], - self.best_result['best_lat_diff'], - self.best_result['quantization_ratio'])) + self.best_result["best_loss"] = best_result["loss"] + self.best_result["best_acc_loss"] = best_result["acc_loss"] + self.best_result["best_lat_diff"] = best_result["lat_diff"] + self.best_result["quantization_ratio"] = best_result["quantization_ratio"] + + logger.info( + "Trial iteration end is {} / {}, best loss is {}, acc_loss is {}, " + "lat_diff is {}, quantization_ratio is {}.".format( + len(self.hpopt_trials.trials), + self.max_trials, + self.best_result["best_loss"], + self.best_result["best_acc_loss"], + self.best_result["best_lat_diff"], + self.best_result["quantization_ratio"], + ) + ) def stop(self, timeout, trials_count): """Check if need to stop traversing the tuning space, either accuracy goal is met or timeout is reach. @@ -487,21 +485,37 @@ def stop(self, timeout, trials_count): else: del self.last_qmodel - last_tune_msg = '[Accuracy ({}|fp32): {:.4f}|{:.4f}'.format( \ - self.cfg.quantization.dtype, self.last_tune_result[0], self.baseline[0]) + \ - ''.join([', {} ({}|fp32): {:.4f}|{:.4f}'.format(x,self.cfg.quantization.dtype,y,z) \ - for x,y,z in zip(self.objectives.representation, \ - self.last_tune_result[1], self.baseline[1]) if x != 'Accuracy']) + ']' \ - if self.last_tune_result else 'n/a' - - best_tune_msg = '[Accuracy: {:.4f}'.format(self.best_tune_result[0]) + \ - ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ - self.objectives.representation, self.best_tune_result[1]) if x != 'Accuracy']) \ - + ']' if self.best_tune_result else 'n/a' - - logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, - last_tune_msg, - best_tune_msg)) + last_tune_msg = ( + "[Accuracy ({}|fp32): {:.4f}|{:.4f}".format( + self.cfg.quantization.dtype, self.last_tune_result[0], self.baseline[0] + ) + + "".join( + [ + ", {} ({}|fp32): {:.4f}|{:.4f}".format(x, self.cfg.quantization.dtype, y, z) + for x, y, z in zip(self.objectives.representation, self.last_tune_result[1], self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + if self.last_tune_result + else "n/a" + ) + + best_tune_msg = ( + "[Accuracy: {:.4f}".format(self.best_tune_result[0]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.best_tune_result[1]) + if x != "Accuracy" + ] + ) + + "]" + if self.best_tune_result + else "n/a" + ) + + logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, last_tune_msg, best_tune_msg)) if timeout == 0 and self.best_tune_result: need_stop = True diff --git a/neural_compressor/experimental/data/__init__.py b/neural_compressor/experimental/data/__init__.py index bdc10fbbff9..d81d6f4300b 100644 --- a/neural_compressor/experimental/data/__init__.py +++ b/neural_compressor/experimental/data/__init__.py @@ -34,4 +34,5 @@ "transform_registry", "FILTERS", "Filter", - "filter_registry",] + "filter_registry", +] diff --git a/neural_compressor/experimental/data/dataloaders/base_dataloader.py b/neural_compressor/experimental/data/dataloaders/base_dataloader.py index 5e58add4f15..99b5f8cea43 100644 --- a/neural_compressor/experimental/data/dataloaders/base_dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/base_dataloader.py @@ -26,12 +26,21 @@ class BaseDataLoader: _generate_dataloader is needed to create a dataloader object from the general params like batch_size and sampler. The dynamic batching is just to generate a new dataloader by setting batch_size and last_batch. - """ - - def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, - sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, - shuffle=False, distributed=False): + + def __init__( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + shuffle=False, + distributed=False, + ): """Initialize BaseDataLoader. Args: @@ -58,7 +67,7 @@ def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None self.shuffle = shuffle self.distributed = distributed self.last_batch = last_batch - self.drop_last = False if last_batch == 'rollover' else True + self.drop_last = False if last_batch == "rollover" else True self.dataloader = self._generate_dataloader( self.dataset, @@ -70,7 +79,8 @@ def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None num_workers=num_workers, pin_memory=pin_memory, shuffle=shuffle, - distributed=distributed) + distributed=distributed, + ) def batch(self, batch_size, last_batch=None): """Set batch size for dataloader. @@ -94,7 +104,8 @@ def batch(self, batch_size, last_batch=None): self.num_workers, self.pin_memory, self.shuffle, - self.distributed) + self.distributed, + ) @property def batch_size(self): @@ -114,6 +125,17 @@ def __iter__(self): return iter(self.dataloader) @abstractmethod - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, - batch_sampler, num_workers, pin_memory, shuffle, distributed): + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): raise NotImplementedError diff --git a/neural_compressor/experimental/data/dataloaders/dataloader.py b/neural_compressor/experimental/data/dataloaders/dataloader.py index 5ae5424b9d7..da1bf078fb5 100644 --- a/neural_compressor/experimental/data/dataloaders/dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/dataloader.py @@ -17,22 +17,22 @@ # ============================================================================== """Built-in dataloaders for multiple framework backends.""" -from .tensorflow_dataloader import TensorflowDataLoader +from .default_dataloader import DefaultDataLoader from .mxnet_dataloader import MXNetDataLoader -from .pytorch_dataloader import PyTorchDataLoader from .onnxrt_dataloader import ONNXRTDataLoader -from .default_dataloader import DefaultDataLoader - -DATALOADERS = {"tensorflow": TensorflowDataLoader, - "tensorflow_itex": TensorflowDataLoader, - "keras": TensorflowDataLoader, - "mxnet": MXNetDataLoader, - "pytorch": PyTorchDataLoader, - "pytorch_ipex": PyTorchDataLoader, - "pytorch_fx": PyTorchDataLoader, - "onnxrt_qlinearops": ONNXRTDataLoader, - "onnxrt_integerops": ONNXRTDataLoader, - "onnxrt_qdq": ONNXRTDataLoader, - "onnxruntime": ONNXRTDataLoader, - } +from .pytorch_dataloader import PyTorchDataLoader +from .tensorflow_dataloader import TensorflowDataLoader +DATALOADERS = { + "tensorflow": TensorflowDataLoader, + "tensorflow_itex": TensorflowDataLoader, + "keras": TensorflowDataLoader, + "mxnet": MXNetDataLoader, + "pytorch": PyTorchDataLoader, + "pytorch_ipex": PyTorchDataLoader, + "pytorch_fx": PyTorchDataLoader, + "onnxrt_qlinearops": ONNXRTDataLoader, + "onnxrt_integerops": ONNXRTDataLoader, + "onnxrt_qdq": ONNXRTDataLoader, + "onnxruntime": ONNXRTDataLoader, +} diff --git a/neural_compressor/experimental/data/dataloaders/default_dataloader.py b/neural_compressor/experimental/data/dataloaders/default_dataloader.py index 85a18e5c81c..cd732e8e575 100644 --- a/neural_compressor/experimental/data/dataloaders/default_dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/default_dataloader.py @@ -18,12 +18,15 @@ """Default dataloader for multiple framework backends.""" import collections -import numpy as np -from math import ceil, floor from abc import abstractmethod -from .sampler import IterableSampler, SequentialSampler, BatchSampler -from .fetcher import FETCHERS +from math import ceil, floor + +import numpy as np + from .base_dataloader import BaseDataLoader +from .fetcher import FETCHERS +from .sampler import BatchSampler, IterableSampler, SequentialSampler + def default_collate(batch): """Merge data with outer dimension batch size.""" @@ -41,12 +44,23 @@ def default_collate(batch): else: return batch + class DefaultDataLoader(BaseDataLoader): """DefaultDataLoader for multiple framework backends.""" - - def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, - sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, - shuffle=False, distributed=False): + + def __init__( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=0, + pin_memory=False, + shuffle=False, + distributed=False, + ): """Initialize DefaultDataLoader. Args: @@ -61,7 +75,7 @@ def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None num_workers (int, optional): number of subprocesses to use for data loading. Defaults to 0. pin_memory (bool, optional): whether to copy data into pinned memory before returning. Defaults to False. shuffle (bool, optional): whether to shuffle data. Defaults to False. - distributed (bool, optional): whether the dataloader is distributed. Defaults to False. + distributed (bool, optional): whether the dataloader is distributed. Defaults to False. """ self.dataset = dataset self.last_batch = last_batch @@ -73,11 +87,11 @@ def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None self._batch_size = batch_size self.shuffle = shuffle self.distributed = distributed - self.drop_last = False if last_batch == 'rollover' else True - if self.collate_fn == None: + self.drop_last = False if last_batch == "rollover" else True + if self.collate_fn is None: self.collate_fn = default_collate - def batch(self, batch_size, last_batch='rollover'): + def batch(self, batch_size, last_batch="rollover"): """Set batch_size and last_batch.""" self._batch_size = batch_size self.last_batch = last_batch @@ -99,7 +113,8 @@ def __iter__(self): num_workers=self.num_workers, pin_memory=self.pin_memory, shuffle=self.shuffle, - distributed=self.distributed) + distributed=self.distributed, + ) def __len__(self): """Get dataset length.""" @@ -110,17 +125,29 @@ def __len__(self): for _ in self.dataset: dataset_len += 1 except Exception: - raise ValueError(f"{self.dataset} is invalid, {self.dataset}" \ - " does not support calculating the length of its dataloader") - if self.drop_last == False: + raise ValueError( + f"{self.dataset} is invalid, {self.dataset}" + " does not support calculating the length of its dataloader" + ) + if self.drop_last is False: dataloader_len = ceil(dataset_len / self.batch_size) else: dataloader_len = floor(dataset_len / self.batch_size) return dataloader_len - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, - batch_sampler, num_workers, pin_memory, shuffle, distributed): - + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): sampler = self._generate_sampler(dataset, distributed) self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last) self.fetcher = FETCHERS[self.dataset_type](dataset, collate_fn, self.drop_last, distributed) @@ -134,10 +161,10 @@ def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, samp def _generate_sampler(self, dataset, distributed): if hasattr(dataset, "__getitem__"): - self.dataset_type = 'index' + self.dataset_type = "index" return SequentialSampler(dataset, distributed) elif hasattr(dataset, "__iter__"): - self.dataset_type = 'iter' + self.dataset_type = "iter" return IterableSampler(dataset) else: raise ValueError("dataset type only support (index, iter)") diff --git a/neural_compressor/experimental/data/dataloaders/fetcher.py b/neural_compressor/experimental/data/dataloaders/fetcher.py index 0f1d3aac70c..a3f47983ad8 100644 --- a/neural_compressor/experimental/data/dataloaders/fetcher.py +++ b/neural_compressor/experimental/data/dataloaders/fetcher.py @@ -19,6 +19,7 @@ from abc import abstractmethod + class Fetcher(object): """Base class for different fetchers.""" @@ -26,7 +27,7 @@ def __init__(self, dataset, collate_fn, drop_last): """Initialize Fetcher. Args: - dataset (object): dataset object from which to get data + dataset (object): dataset object from which to get data collate_fn (callable): merge data with outer dimension batch size drop_last (bool): whether to drop the last batch if it is incomplete """ @@ -40,10 +41,10 @@ def __call__(self, batched_indices): Args: batched_indices (list): fetch data according to batched_indices - """ raise NotImplementedError + class IterableFetcher(Fetcher): """Iterate to get next batch-size samples as a batch.""" @@ -55,38 +56,39 @@ def __init__(self, dataset, collate_fn, drop_last, distributed): collate_fn (callable): merge data with outer dimension batch size drop_last (bool): whether to drop the last batch if it is incomplete distributed (bool): whether the dataloader is distributed - """ super(IterableFetcher, self).__init__(dataset, collate_fn, drop_last) self.dataset_iter = iter(dataset) self.index_whole = 0 - self.process_rank = 0 # The default rank is 0, which represents the main process - self.process_size = 1 # By default, process_size=1, only the main process is running + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running if distributed: import horovod.tensorflow as hvd + hvd.init() self.process_rank = hvd.rank() self.process_size = hvd.size() if self.process_size < 2: - raise EnvironmentError("The program is now trying to traverse" \ - " the distributed TensorFlow DefaultDataLoader in only one process." \ - " If you do not want to use distributed DataLoader, please set" \ - " 'distributed: False'. Or If you want to use distributed DataLoader," \ - " please set 'distributed: True' and launch multiple processes.") + raise EnvironmentError( + "The program is now trying to traverse" + " the distributed TensorFlow DefaultDataLoader in only one process." + " If you do not want to use distributed DataLoader, please set" + " 'distributed: False'. Or If you want to use distributed DataLoader," + " please set 'distributed: True' and launch multiple processes." + ) def __call__(self, batched_indices): """Fetch data. Args: batched_indices (list): fetch data according to batched_indices - """ batch_data = [] batch_size = len(batched_indices) while True: try: iter_data = next(self.dataset_iter) - if (self.index_whole-self.process_rank)%self.process_size == 0: + if (self.index_whole - self.process_rank) % self.process_size == 0: batch_data.append(iter_data) self.index_whole += 1 if len(batch_data) == batch_size: @@ -97,6 +99,7 @@ def __call__(self, batched_indices): raise StopIteration return self.collate_fn(batch_data) + class IndexFetcher(Fetcher): """Take single index or a batch of indices to fetch samples as a batch.""" @@ -116,9 +119,12 @@ def __call__(self, batched_indices): Args: batched_indices (list): fetch data according to batched_indices - """ data = [self.dataset[idx] for idx in batched_indices] return self.collate_fn(data) -FETCHERS = {"index": IndexFetcher, "iter": IterableFetcher, } + +FETCHERS = { + "index": IndexFetcher, + "iter": IterableFetcher, +} diff --git a/neural_compressor/experimental/data/dataloaders/mxnet_dataloader.py b/neural_compressor/experimental/data/dataloaders/mxnet_dataloader.py index d89e18b7899..749770cb523 100644 --- a/neural_compressor/experimental/data/dataloaders/mxnet_dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/mxnet_dataloader.py @@ -17,26 +17,41 @@ # ============================================================================== """MXNet Dataloader implementation.""" +import logging + from neural_compressor.utils.utility import LazyImport + from .base_dataloader import BaseDataLoader -import logging -mx = LazyImport('mxnet') + +mx = LazyImport("mxnet") + class MXNetDataLoader(BaseDataLoader): """Subclass of BaseDataLoader.""" - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed): + + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): """Overwrite _generate_dataloader function.""" if shuffle: - logging.warning('Shuffle is not supported yet in MXNetDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in MXNetDataLoader, " "ignoring shuffle keyword.") return mx.gluon.data.DataLoader( - dataset, - batch_size=batch_size, - batchify_fn=collate_fn, - last_batch=last_batch, - num_workers=num_workers, - pin_memory=pin_memory, - sampler=sampler, - batch_sampler=batch_sampler) + dataset, + batch_size=batch_size, + batchify_fn=collate_fn, + last_batch=last_batch, + num_workers=num_workers, + pin_memory=pin_memory, + sampler=sampler, + batch_sampler=batch_sampler, + ) diff --git a/neural_compressor/experimental/data/dataloaders/onnxrt_dataloader.py b/neural_compressor/experimental/data/dataloaders/onnxrt_dataloader.py index fd567a001f4..bb1abe43cb2 100644 --- a/neural_compressor/experimental/data/dataloaders/onnxrt_dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/onnxrt_dataloader.py @@ -17,66 +17,102 @@ # ============================================================================== """Built-in dataloaders for onnxruntime framework backends.""" +import logging + from neural_compressor.utils.utility import LazyImport + +from ..datasets.bert_dataset import ONNXRTBertDataset from .base_dataloader import BaseDataLoader from .default_dataloader import DefaultDataLoader -from ..datasets.bert_dataset import ONNXRTBertDataset -import logging -torch = LazyImport('torch') + +torch = LazyImport("torch") + class ONNXRTBertDataLoader(DefaultDataLoader): """Built-in dataloader for onnx bert model and its varients.""" - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed): + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): import numpy as np from torch.utils.data import DataLoader, SequentialSampler + sampler = SequentialSampler(dataset) - dataloader = DataLoader(dataset, sampler=sampler, \ - batch_size=batch_size) + dataloader = DataLoader(dataset, sampler=sampler, batch_size=batch_size) dynamic_length = dataset.dynamic_length model_type = dataset.model_type max_seq_length = dataset.max_seq_length for batch in dataloader: try: - batch_seq_length = max_seq_length if not dynamic_length \ - else torch.max(batch[-2], 0)[0].item() - batch = tuple(t.detach().cpu().numpy() \ - if not isinstance(t, np.ndarray) else t \ - for t in batch) - if model_type == 'bert': + batch_seq_length = max_seq_length if not dynamic_length else torch.max(batch[-2], 0)[0].item() + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in batch) + if model_type == "bert": data = [ - batch[0][:,:batch_seq_length], - batch[1][:,:batch_seq_length], - batch[2][:,:batch_seq_length] + batch[0][:, :batch_seq_length], + batch[1][:, :batch_seq_length], + batch[2][:, :batch_seq_length], ] else: - data = [ - batch[0][:,:batch_seq_length], - batch[1][:,:batch_seq_length] - ] + data = [batch[0][:, :batch_seq_length], batch[1][:, :batch_seq_length]] label = batch[-1] yield data, label except StopIteration: return + class ONNXRTDataLoader(BaseDataLoader): """Built-in dataloader for onnxruntime framework backends.""" - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed): + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): if shuffle: - logging.warning('Shuffle is not supported yet in ONNXRTDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in ONNXRTDataLoader, " "ignoring shuffle keyword.") if isinstance(dataset, ONNXRTBertDataset): - return ONNXRTBertDataLoader(dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed) + return ONNXRTBertDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) else: - return DefaultDataLoader(dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed) + return DefaultDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) diff --git a/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py b/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py index dcc462ae616..fad8eb31c3a 100644 --- a/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py @@ -14,21 +14,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Initialize the Datasets class.""" import numpy as np + from neural_compressor.utils.utility import LazyImport + from .base_dataloader import BaseDataLoader -torch = LazyImport('torch') -hvd = LazyImport('horovod.torch') + +torch = LazyImport("torch") +hvd = LazyImport("horovod.torch") + class PyTorchDataLoader(BaseDataLoader): """PyTorchDataLoader inherits from BaseDataLoader.""" - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, - shuffle, distributed): + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): """Generate PyTorch dataloader. Args: @@ -46,15 +59,15 @@ def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, Returns: _type_: _description_ """ - drop_last = False if last_batch == 'rollover' else True - assert len(dataset) != 0, \ - "Warning: Dataset is empty, Please check dataset path!" + drop_last = False if last_batch == "rollover" else True + assert len(dataset) != 0, "Warning: Dataset is empty, Please check dataset path!" if distributed and sampler is None: # TODO: lazy init here hvd.init() # sampler option is mutually exclusive with shuffle pytorch self.sampler = sampler = torch.utils.data.distributed.DistributedSampler( - dataset, num_replicas=hvd.size(), rank=hvd.rank()) + dataset, num_replicas=hvd.size(), rank=hvd.rank() + ) return torch.utils.data.DataLoader( dataset, @@ -65,5 +78,5 @@ def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, num_workers=num_workers, pin_memory=pin_memory, sampler=sampler, - batch_sampler=batch_sampler) - + batch_sampler=batch_sampler, + ) diff --git a/neural_compressor/experimental/data/dataloaders/sampler.py b/neural_compressor/experimental/data/dataloaders/sampler.py index cbe9220bc7c..17eac37af14 100644 --- a/neural_compressor/experimental/data/dataloaders/sampler.py +++ b/neural_compressor/experimental/data/dataloaders/sampler.py @@ -19,9 +19,10 @@ from abc import abstractmethod + class Sampler(object): """Base class for all Samplers. - + __iter__ is needed no matter whether you use IterableSampler or Squential sampler, if you want implement your own sampler, make clear what the type is your Dataset, if IterableDataset(method __iter__ implemented), try to use IterableSampler, @@ -41,7 +42,7 @@ def __iter__(self): class IterableSampler(Sampler): """Interally samples elements. - + Used for datasets retrieved element by interator. Yield None to act as a placeholder for each iteration. """ @@ -63,6 +64,7 @@ def __len__(self): """Return the length of dataset.""" raise NotImplementedError("'__len__' for IterableDataset object has not defined") + class SequentialSampler(Sampler): """Sequentially samples elements, used for datasets retrieved element by index.""" @@ -78,25 +80,29 @@ def __init__(self, dataset, distributed): def __iter__(self): """Yield data in iterative order.""" - self.process_rank = 0 # The default rank is 0, which represents the main process - self.process_size = 1 # By default, process_size=1, only the main process is running + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running if self.distributed: import horovod.tensorflow as hvd + hvd.init() self.process_rank = hvd.rank() self.process_size = hvd.size() if self.process_size < 2: - raise EnvironmentError("The program is now trying to traverse" \ - " the distributed TensorFlow DefaultDataLoader in only one process." \ - " If you do not want to use distributed DataLoader, please set" \ - " 'distributed: False'. Or If you want to use distributed DataLoader," \ - " please set 'distributed: True' and launch multiple processes.") + raise EnvironmentError( + "The program is now trying to traverse" + " the distributed TensorFlow DefaultDataLoader in only one process." + " If you do not want to use distributed DataLoader, please set" + " 'distributed: False'. Or If you want to use distributed DataLoader," + " please set 'distributed: True' and launch multiple processes." + ) return iter(range(self.process_rank, len(self.whole_dataset), self.process_size)) def __len__(self): """Return the length of dataset.""" return len(self.whole_dataset) + class BatchSampler(Sampler): """Yield a batch of indices and number of batches.""" diff --git a/neural_compressor/experimental/data/dataloaders/tensorflow_dataloader.py b/neural_compressor/experimental/data/dataloaders/tensorflow_dataloader.py index c8f1ab7483f..9e55dbc6714 100644 --- a/neural_compressor/experimental/data/dataloaders/tensorflow_dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/tensorflow_dataloader.py @@ -16,45 +16,48 @@ # limitations under the License. """TensorFlow Dataloader implementation.""" -from neural_compressor.utils.utility import LazyImport -from abc import abstractmethod import collections -import numpy as np +import logging import sys +from abc import abstractmethod from math import ceil, floor -from .sampler import IterableSampler, SequentialSampler, BatchSampler -from .fetcher import FETCHERS -from .default_dataloader import default_collate -from .default_dataloader import DefaultDataLoader + +import numpy as np + +from neural_compressor.utils.utility import LazyImport + from ..datasets.bert_dataset import TensorflowBertDataset, TensorflowModelZooBertDataset from .base_dataloader import BaseDataLoader -import logging +from .default_dataloader import DefaultDataLoader, default_collate +from .fetcher import FETCHERS +from .sampler import BatchSampler, IterableSampler, SequentialSampler + +tf = LazyImport("tensorflow") +neural_compressor = LazyImport("neural_compressor") -tf = LazyImport('tensorflow') -neural_compressor = LazyImport('neural_compressor') class TFDataDataLoader(BaseDataLoader): """Tensorflow dataloader class. - + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict method to do session run, this dataloader is designed to satisfy the usage of feed dict in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. - + Args: dataset: obj. wrapper of needed data. batch_size: int. batch size """ - def __init__(self, dataset, batch_size=1, last_batch='rollover'): + def __init__(self, dataset, batch_size=1, last_batch="rollover"): """Initialize `TFDataDataLoader` class.""" self.dataset = dataset self.last_batch = last_batch self._batch_size = batch_size dataset = dataset.batch(batch_size) - def batch(self, batch_size, last_batch='rollover'): + def batch(self, batch_size, last_batch="rollover"): """Dataset return data per batch.""" - drop_last = False if last_batch == 'rollover' else True + drop_last = False if last_batch == "rollover" else True self._batch_size = batch_size self.dataset = self.dataset.batch(batch_size, drop_last) @@ -63,17 +66,26 @@ def __iter__(self): return self._generate_dataloader( self.dataset, batch_size=self.batch_size, - last_batch=self.last_batch,) + last_batch=self.last_batch, + ) - def _generate_dataloader(self, dataset, batch_size=1, last_batch='rollover', \ - collate_fn=None, sampler=None, batch_sampler=None, \ - num_workers=None, pin_memory=None, shuffle=False, \ - distributed=False): + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + shuffle=False, + distributed=False, + ): """Yield data.""" - drop_last = False if last_batch == 'rollover' else True + drop_last = False if last_batch == "rollover" else True if shuffle: - logging.warning('Shuffle is not supported yet in TFDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in TFDataLoader, " "ignoring shuffle keyword.") def check_dynamic_shape(element_spec): if isinstance(element_spec, collections.abc.Sequence): @@ -81,7 +93,7 @@ def check_dynamic_shape(element_spec): elif isinstance(element_spec, tf.TensorSpec): return True if element_spec.shape.num_elements() is None else False else: - raise ValueError('unrecognized element spec...') + raise ValueError("unrecognized element spec...") def squeeze_output(output): if isinstance(output, collections.abc.Sequence): @@ -89,19 +101,19 @@ def squeeze_output(output): elif isinstance(output, np.ndarray): return np.squeeze(output, axis=0) else: - raise ValueError('not supported output format....') + raise ValueError("not supported output format....") if tf.executing_eagerly(): index = 0 outputs = [] for iter_tensors in dataset: samples = [] - iter_inputs, iter_labels = iter_tensors[0],iter_tensors[1] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] if isinstance(iter_inputs, tf.Tensor): samples.append(iter_inputs.numpy()) else: samples.append((iter_input.numpy() for iter_input in iter_inputs)) - if isinstance(iter_labels,tf.Tensor): + if isinstance(iter_labels, tf.Tensor): samples.append(iter_labels.numpy()) else: samples.append([np.array(l) for l in iter_labels]) @@ -127,6 +139,7 @@ def squeeze_output(output): data_sess = tf.compat.v1.Session(config=data_config) # pylint: disable=no-name-in-module from tensorflow.python.framework.errors_impl import OutOfRangeError + while True: if not try_single_batch: try: @@ -152,27 +165,37 @@ def squeeze_output(output): data_sess.close() return + class TensorflowBertDataLoader(DefaultDataLoader): """Subclass of DefaultDataLoader. - + this dataloader is designed to satisfy the usage of Bert models. """ - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, shuffle, - distributed): - + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): if shuffle: - logging.warning('Shuffle is not supported yet in TensorflowBertDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in TensorflowBertDataLoader, " "ignoring shuffle keyword.") + def bert_collate_fn(batch): elem = batch[0] return elem - drop_last = False if last_batch == 'rollover' else True + + drop_last = False if last_batch == "rollover" else True sampler = self._generate_sampler(dataset, distributed) self.batch_sampler = BatchSampler(sampler, batch_size, drop_last) - self.fetcher = FETCHERS[self.dataset_type]\ - (dataset, bert_collate_fn, drop_last, distributed) + self.fetcher = FETCHERS[self.dataset_type](dataset, bert_collate_fn, drop_last, distributed) for batched_indices in self.batch_sampler: try: @@ -181,19 +204,29 @@ def bert_collate_fn(batch): except StopIteration: return + class TensorflowModelZooBertDataLoader(DefaultDataLoader): """Subclass of DefaultDataLoader. - + this dataloader is designed to satisfy the usage of Model Zoo Bert models. """ - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, pin_memory, shuffle, - distributed): - + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): if shuffle: - logging.warning('Shuffle is not supported yet in TensorflowBertDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in TensorflowBertDataLoader, " "ignoring shuffle keyword.") + def bert_collate_fn(batch): input_ids = [] input_mask = [] @@ -204,11 +237,11 @@ def bert_collate_fn(batch): segment_ids.append(elem[0][2][0]) inputs = [input_ids, input_mask, segment_ids] return inputs, batch[0][1] - drop_last = False if last_batch == 'rollover' else True + + drop_last = False if last_batch == "rollover" else True sampler = self._generate_sampler(dataset, distributed) self.batch_sampler = BatchSampler(sampler, batch_size, drop_last) - self.fetcher = FETCHERS[self.dataset_type]\ - (dataset, bert_collate_fn, drop_last, distributed) + self.fetcher = FETCHERS[self.dataset_type](dataset, bert_collate_fn, drop_last, distributed) inputs = [] for batched_indices in self.batch_sampler: @@ -218,65 +251,109 @@ def bert_collate_fn(batch): except StopIteration: return + class TensorflowDataLoader(BaseDataLoader): """DataLoader for framework Tensorflow. - - if it's a tf.data.Dataset we will directly use the dataloader in the other case + + if it's a tf.data.Dataset we will directly use the dataloader in the other case will use DefaultDataLoader instead. """ - def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, \ - sampler, batch_sampler, num_workers, pin_memory, shuffle, distributed): - + def _generate_dataloader( + self, + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ): if shuffle: - logging.warning('Shuffle is not supported yet in TensorflowDataLoader, ' \ - 'ignoring shuffle keyword.') + logging.warning("Shuffle is not supported yet in TensorflowDataLoader, " "ignoring shuffle keyword.") if isinstance(dataset, tf.data.Dataset): if int(tf.__version__[0]) > 1: - has_batch = hasattr(dataset, '_batch_size') + has_batch = hasattr(dataset, "_batch_size") else: - has_batch = hasattr(dataset._dataset, '_batch_size') + has_batch = hasattr(dataset._dataset, "_batch_size") if has_batch: - raise TypeError(f"Parameter 'batch_size={batch_size}'" \ - " conflicts with 'tf.data.Dataset'," \ - f" because {dataset} is already a BatchDataset." \ - f" Please pass in 'tf.data.Dataset' without batch attributes.") - process_rank = 0 # The default rank is 0, which represents the main process - process_size = 1 # By default, process_size=1, only the main process is running + raise TypeError( + f"Parameter 'batch_size={batch_size}'" + " conflicts with 'tf.data.Dataset'," + f" because {dataset} is already a BatchDataset." + f" Please pass in 'tf.data.Dataset' without batch attributes." + ) + process_rank = 0 # The default rank is 0, which represents the main process + process_size = 1 # By default, process_size=1, only the main process is running if self.distributed: import horovod.tensorflow as hvd + hvd.init() process_rank = hvd.rank() process_size = hvd.size() if process_size < 2: - raise EnvironmentError("The program is now trying to generate" \ - " the distributed TensorflowDataLoader in only one process." \ - " If you do not want to use distributed DataLoader, please set" \ - " 'distributed: False'. Or If you want to use distributed DataLoader," \ - " please set 'distributed: True' and launch multiple processes.") + raise EnvironmentError( + "The program is now trying to generate" + " the distributed TensorflowDataLoader in only one process." + " If you do not want to use distributed DataLoader, please set" + " 'distributed: False'. Or If you want to use distributed DataLoader," + " please set 'distributed: True' and launch multiple processes." + ) dataset = dataset.shard(process_size, process_rank) tf_dataloader = TFDataDataLoader(dataset, batch_size, last_batch=last_batch) return tf_dataloader elif isinstance(dataset, TensorflowBertDataset): if distributed: - raise NotImplementedError("Distributed TensorflowBertDataLoader" \ - " is not yet supported, please set 'distributed: False'") - tf_bert_dataloader = TensorflowBertDataLoader(dataset, batch_size, \ - last_batch, collate_fn, sampler, batch_sampler, \ - num_workers, pin_memory, shuffle, distributed) + raise NotImplementedError( + "Distributed TensorflowBertDataLoader" " is not yet supported, please set 'distributed: False'" + ) + tf_bert_dataloader = TensorflowBertDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) return tf_bert_dataloader elif isinstance(dataset, TensorflowModelZooBertDataset): if distributed: - raise NotImplementedError("Distributed TensorflowBertDataLoader" \ - " is not yet supported, please set 'distributed: False'") - tf_bert_dataloader = TensorflowModelZooBertDataLoader(dataset, batch_size, \ - last_batch, collate_fn, sampler, batch_sampler, \ - num_workers, pin_memory, shuffle, distributed) + raise NotImplementedError( + "Distributed TensorflowBertDataLoader" " is not yet supported, please set 'distributed: False'" + ) + tf_bert_dataloader = TensorflowModelZooBertDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) return tf_bert_dataloader else: - return DefaultDataLoader(dataset, batch_size, last_batch, collate_fn, - sampler, batch_sampler, num_workers, - pin_memory, shuffle, distributed) + return DefaultDataLoader( + dataset, + batch_size, + last_batch, + collate_fn, + sampler, + batch_sampler, + num_workers, + pin_memory, + shuffle, + distributed, + ) def __bool__(self): """Judgement if the dataloader exists.""" @@ -293,29 +370,35 @@ def __len__(self): dataset_len = 0 for _ in self.dataset: dataset_len += 1 - except RuntimeError: return sum([1 for _ in self]) + except RuntimeError: + return sum([1 for _ in self]) except Exception: - raise ValueError(f"{self.dataset} is invalid, {self.dataset}" \ - " does not support calculating the length of its dataloader") - process_rank = 0 # The default rank is 0, which represents the main process - process_size = 1 # By default, process_size=1, only the main process is running + raise ValueError( + f"{self.dataset} is invalid, {self.dataset}" + " does not support calculating the length of its dataloader" + ) + process_rank = 0 # The default rank is 0, which represents the main process + process_size = 1 # By default, process_size=1, only the main process is running if self.distributed: import horovod.tensorflow as hvd + hvd.init() process_rank = hvd.rank() process_size = hvd.size() if process_size < 2: - raise EnvironmentError("The program is now trying to get length of" \ - " the distributed TensorflowDataLoader in only one process." \ - " If you do not want to use distributed DataLoader, please set" \ - " 'distributed: False'. Or If you want to use distributed DataLoader," \ - " please set 'distributed: True' and launch multiple processes.") + raise EnvironmentError( + "The program is now trying to get length of" + " the distributed TensorflowDataLoader in only one process." + " If you do not want to use distributed DataLoader, please set" + " 'distributed: False'. Or If you want to use distributed DataLoader," + " please set 'distributed: True' and launch multiple processes." + ) if process_rank < (dataset_len % process_size): self.dis_dataset_len = dataset_len // process_size + 1 else: self.dis_dataset_len = dataset_len // process_size - if self.drop_last == False: + if self.drop_last is False: dataloader_len = ceil(self.dis_dataset_len / self.batch_size) else: dataloader_len = floor(self.dis_dataset_len / self.batch_size) - return sys.maxsize if dataloader_len > sys.maxsize else dataloader_len \ No newline at end of file + return sys.maxsize if dataloader_len > sys.maxsize else dataloader_len diff --git a/neural_compressor/experimental/data/datasets/__init__.py b/neural_compressor/experimental/data/datasets/__init__.py index c2460d737ed..1a6a9457f49 100644 --- a/neural_compressor/experimental/data/datasets/__init__.py +++ b/neural_compressor/experimental/data/datasets/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in datasets class for multiple framework backends.""" from .dataset import Datasets, Dataset, IterableDataset, dataset_registry @@ -24,7 +23,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) diff --git a/neural_compressor/experimental/data/datasets/bert_dataset.py b/neural_compressor/experimental/data/datasets/bert_dataset.py index 1bf7775e54f..5344c1452c2 100644 --- a/neural_compressor/experimental/data/datasets/bert_dataset.py +++ b/neural_compressor/experimental/data/datasets/bert_dataset.py @@ -14,26 +14,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in BERT datasets class for multiple framework backends.""" -import os -import logging -import json import dataclasses +import json +import logging +import os from dataclasses import dataclass from typing import List, Optional, Union + from neural_compressor.utils.utility import LazyImport -from .dataset import dataset_registry, Dataset -torch = LazyImport('torch') -transformers = LazyImport('transformers') + +from .dataset import Dataset, dataset_registry + +torch = LazyImport("torch") +transformers = LazyImport("transformers") logger = logging.getLogger("neural_compressor") -@dataset_registry(dataset_type="bert", framework="pytorch", dataset_format='') + +@dataset_registry(dataset_type="bert", framework="pytorch", dataset_format="") class PytorchBertDataset(Dataset): """PyTorch dataset used for model Bert. - + This Dataset is to construct from the Bert TensorDataset and not a full implementation from yaml config. The original repo link is: https://github.com/huggingface/transformers. When you want use this Dataset, you should add it before you initialize your DataLoader. @@ -66,7 +69,7 @@ class PytorchBertDataset(Dataset): transform=preprocess, filter=filter) """ - def __init__(self, dataset, task, model_type='bert', transform=None, filter=None): + def __init__(self, dataset, task, model_type="bert", transform=None, filter=None): """Initialize the attributes of class.""" self.dataset = dataset assert task in ("classifier", "squad"), "Bert task support only classifier squad" @@ -86,35 +89,36 @@ def __getitem__(self, index): sample = self.dataset[index] if self.transform is not None: sample = self.transform(sample) - if self.task == 'classifier': - inputs = { - 'input_ids': sample[0], - 'attention_mask': sample[1], - 'labels': sample[3]} + if self.task == "classifier": + inputs = {"input_ids": sample[0], "attention_mask": sample[1], "labels": sample[3]} - if self.model_type != 'distilbert': + if self.model_type != "distilbert": # XLM, DistilBERT and RoBERTa don't use segment_ids - if self.model_type in ['bert', 'xlnet']: - inputs['token_type_ids'] = sample[2] - sample = (inputs, inputs['labels']) + if self.model_type in ["bert", "xlnet"]: + inputs["token_type_ids"] = sample[2] + sample = (inputs, inputs["labels"]) - elif self.task == 'squad': + elif self.task == "squad": inputs = { - 'input_ids': sample[0], - 'attention_mask': sample[1], } - if self.model_type != 'distilbert': + "input_ids": sample[0], + "attention_mask": sample[1], + } + if self.model_type != "distilbert": # XLM, DistilBERT and RoBERTa don't use segment_ids - inputs['token_type_ids'] = sample[2] if self.model_type in [ - 'bert', 'xlnet'] else None - if self.model_type in ['xlnet', 'xlm']: - inputs.update({'cls_index': sample[4], 'p_mask': sample[5]}) + inputs["token_type_ids"] = sample[2] if self.model_type in ["bert", "xlnet"] else None + if self.model_type in ["xlnet", "xlm"]: + inputs.update({"cls_index": sample[4], "p_mask": sample[5]}) example_indices = sample[3] sample = (inputs, example_indices) return sample -@dataset_registry(dataset_type="GLUE", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') +@dataset_registry( + dataset_type="GLUE", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class ONNXRTBertDataset(Dataset): """ONNXRT dataset used for model Bert. @@ -141,24 +145,39 @@ class ONNXRTBertDataset(Dataset): dataset = ONNXRTBertDataset(data_dir=data_dir, model_name_or_path='bert-base-uncase', transform=preprocess, filter=filter) """ - def __init__(self, data_dir, model_name_or_path, max_seq_length=128,\ - do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\ - evaluate=True, transform=None, filter=None): + + def __init__( + self, + data_dir, + model_name_or_path, + max_seq_length=128, + do_lower_case=True, + task="mrpc", + model_type="bert", + dynamic_length=False, + evaluate=True, + transform=None, + filter=None, + ): """Initialize the attributes of class.""" task = task.lower() model_type = model_type.lower() - assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli'], 'Unsupported task type' - assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \ - model type' + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli"], "Unsupported task type" + assert model_type in [ + "distilbert", + "bert", + "mobilebert", + "roberta", + ], "Unsupported \ + model type" self.dynamic_length = dynamic_length self.model_type = model_type self.max_seq_length = max_seq_length - tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, - do_lower_case=do_lower_case) - self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ - max_seq_length, task, model_type, tokenizer, evaluate) + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples( + data_dir, model_name_or_path, max_seq_length, task, model_type, tokenizer, evaluate + ) def __len__(self): """Length of the dataset.""" @@ -172,8 +191,7 @@ def __getitem__(self, index): return self.dataset[index] -def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ - model_type, tokenizer, evaluate): +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, model_type, tokenizer, evaluate): """Load and cache the examples. Helper Function for ONNXRTBertDataset. @@ -185,28 +203,32 @@ def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, # Load data features from cache or dataset file if not os.path.exists("./dataset_cached"): os.makedirs("./dataset_cached") - cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format( - 'dev' if evaluate else 'train', - list(filter(None, model_name_or_path.split('/'))).pop(), - str(max_seq_length), - str(task))) + cached_features_file = os.path.join( + "./dataset_cached", + "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task), + ), + ) if os.path.exists(cached_features_file): logger.info("Load features from cached file {}.".format(cached_features_file)) features = torch.load(cached_features_file) else: logger.info("Create features from dataset file at {}.".format(data_dir)) label_list = processor.get_labels() - if task in ['mnli', 'mnli-mm'] and model_type in ['roberta']: + if task in ["mnli", "mnli-mm"] and model_type in ["roberta"]: # HACK(label indices are swapped in RoBERTa pretrained model) label_list[1], label_list[2] = label_list[2], label_list[1] - examples = processor.get_dev_examples(data_dir) if evaluate else \ - processor.get_train_examples(data_dir) - features = convert_examples_to_features(examples, - tokenizer, - task=task, - label_list=label_list, - max_length=max_seq_length, - output_mode=output_mode, + examples = processor.get_dev_examples(data_dir) if evaluate else processor.get_train_examples(data_dir) + features = convert_examples_to_features( + examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, ) logger.info("Save features into cached file {}.".format(cached_features_file)) torch.save(features, cached_features_file) @@ -219,8 +241,7 @@ def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, all_labels = torch.tensor([f.label for f in features], dtype=torch.long) elif output_mode == "regression": all_labels = torch.tensor([f.label for f in features], dtype=torch.float) - dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ - all_seq_lengths, all_labels) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_seq_lengths, all_labels) return dataset @@ -245,7 +266,7 @@ def convert_examples_to_features( logger.info("Use label list {} for task {}.".format(label_list, task)) label_map = {label: i for i, label in enumerate(label_list)} features = [] - for (ex_index, example) in enumerate(examples): + for ex_index, example in enumerate(examples): inputs = tokenizer.encode_plus( example.text_a, example.text_b, @@ -264,19 +285,14 @@ def convert_examples_to_features( padding_length = max_length - len(input_ids) input_ids = input_ids + ([pad_token] * padding_length) - attention_mask = attention_mask + \ - ([0 if mask_padding_with_zero else 1] * padding_length) + attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length) token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) - assert len(input_ids) == max_length, \ - "Error with input_ids length {} vs {}".format( - len(input_ids), max_length) - assert len(attention_mask) == max_length, \ - "Error with attention_mask length {} vs {}".format( + assert len(input_ids) == max_length, "Error with input_ids length {} vs {}".format(len(input_ids), max_length) + assert len(attention_mask) == max_length, "Error with attention_mask length {} vs {}".format( len(attention_mask), max_length ) - assert len(token_type_ids) == max_length, \ - "Error with token_type_ids length {} vs {}".format( + assert len(token_type_ids) == max_length, "Error with token_type_ids length {} vs {}".format( len(token_type_ids), max_length ) if output_mode == "classification": @@ -326,7 +342,7 @@ def to_json_string(self): return json.dumps(dataclasses.asdict(self)) + "\n" -@dataset_registry(dataset_type="bert", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="bert", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowBertDataset(Dataset): """Tensorflow dataset used for model Bert. @@ -341,14 +357,14 @@ class TensorflowBertDataset(Dataset): to specific conditions """ - def __init__(self, root, label_file, task='squad', - model_type='bert', transform=None, filter=None): + def __init__(self, root, label_file, task="squad", model_type="bert", transform=None, filter=None): """Initialize the attributes of class.""" import json + with open(label_file) as lf: label_json = json.load(lf) - assert label_json['version'] == '1.1', 'only support squad 1.1' - self.label = label_json['data'] + assert label_json["version"] == "1.1", "only support squad 1.1" + self.label = label_json["data"] self.root = root self.transform = transform self.filter = filter @@ -365,7 +381,7 @@ def __len__(self): return 1 -class ParseDecodeBert(): +class ParseDecodeBert: """Helper function for TensorflowModelZooBertDataset. Parse the features from sample. @@ -378,25 +394,24 @@ def __call__(self, sample): sample: Data to be parsed. """ import tensorflow as tf + # Dense features in Example proto. feature_map = { - 'input_ids': - tf.compat.v1.VarLenFeature(dtype=tf.int64), - 'input_mask': - tf.compat.v1.VarLenFeature(dtype=tf.int64), - 'segment_ids': - tf.compat.v1.VarLenFeature(dtype=tf.int64), + "input_ids": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "input_mask": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "segment_ids": tf.compat.v1.VarLenFeature(dtype=tf.int64), } features = tf.io.parse_single_example(sample, feature_map) - input_ids = features['input_ids'].values - input_mask = features['input_mask'].values - segment_ids = features['segment_ids'].values + input_ids = features["input_ids"].values + input_mask = features["input_mask"].values + segment_ids = features["segment_ids"].values return (input_ids, input_mask, segment_ids) -@dataset_registry(dataset_type="mzbert", framework="tensorflow, tensorflow_itex", dataset_format='') + +@dataset_registry(dataset_type="mzbert", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowModelZooBertDataset(Dataset): """Tensorflow dataset for three-input Bert in tf record format. @@ -410,37 +425,43 @@ class TensorflowModelZooBertDataset(Dataset): filter (Filter objects, default=None): filter out examples according. """ - def __init__(self, root, label_file, task='squad', - model_type='bert', transform=None, filter=None, num_cores=28): + def __init__(self, root, label_file, task="squad", model_type="bert", transform=None, filter=None, num_cores=28): """Initialize the attributes of class.""" import json + with open(label_file) as lf: label_json = json.load(lf) - assert label_json['version'] == '1.1', 'only support squad 1.1' - self.label = label_json['data'] + assert label_json["version"] == "1.1", "only support squad 1.1" + self.label = label_json["data"] import tensorflow as tf + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) example = tf.train.SequenceExample() for element in record_iterator: example.ParseFromString(element) break feature = example.context.feature - if len(feature['input_ids'].int64_list.value) == 0 \ - and len(feature['input_mask'].int64_list.value) == 0: - raise ValueError("Tfrecord format is incorrect, please refer\ + if len(feature["input_ids"].int64_list.value) == 0 and len(feature["input_mask"].int64_list.value) == 0: + raise ValueError( + "Tfrecord format is incorrect, please refer\ 'https://github.com/tensorflow/models/blob/master/research/\ - object_detection/dataset_tools/' to create correct tfrecord") + object_detection/dataset_tools/' to create correct tfrecord" + ) # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave + tfrecord_paths = [root] ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) ds = ds.apply( - parallel_interleave(tf.data.TFRecordDataset, - cycle_length=num_cores, - block_length=5, - sloppy=True, - buffer_output_elements=10000, - prefetch_input_elements=10000)) + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) if transform is not None: transform.transform_list.insert(0, ParseDecodeBert()) else: @@ -450,6 +471,7 @@ def __init__(self, root, label_file, task='squad', ds = ds.filter(filter) ds = ds.prefetch(buffer_size=1000) from ..dataloaders.tensorflow_dataloader import TFDataDataLoader + ds = TFDataDataLoader(ds) self.root = [] for inputs in ds: diff --git a/neural_compressor/experimental/data/datasets/coco_dataset.py b/neural_compressor/experimental/data/datasets/coco_dataset.py index d583caa16bd..7a4dddb78a2 100644 --- a/neural_compressor/experimental/data/datasets/coco_dataset.py +++ b/neural_compressor/experimental/data/datasets/coco_dataset.py @@ -29,18 +29,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """Built-in COCO datasets class for multiple framework backends.""" from PIL import Image + from neural_compressor.utils.utility import LazyImport -from .dataset import dataset_registry, IterableDataset, Dataset -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -torch = LazyImport('torch') +from .dataset import Dataset, IterableDataset, dataset_registry + +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +torch = LazyImport("torch") + -class ParseDecodeCoco(): +class ParseDecodeCoco: """Helper function for TensorflowModelZooBertDataset. Parse the features from sample. @@ -54,30 +56,31 @@ def __call__(self, sample): """ # Dense features in Example proto. feature_map = { - 'image/encoded': - tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=''), - 'image/object/class/text': - tf.compat.v1.VarLenFeature(dtype=tf.string), - 'image/object/class/label': - tf.compat.v1.VarLenFeature(dtype=tf.int64), - 'image/source_id':tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=''), + "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string), + "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), } sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. - feature_map.update({ - k: sparse_float32 - for k in [ - 'image/object/bbox/xmin', 'image/object/bbox/ymin', - 'image/object/bbox/xmax', 'image/object/bbox/ymax' - ] - }) + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) features = tf.io.parse_single_example(sample, feature_map) - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0) + ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0) + xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0) + ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0) bbox = tf.concat([ymin, xmin, ymax, xmax], 0) # Force the variable number of bounding boxes into the shape @@ -85,17 +88,18 @@ def __call__(self, sample): bbox = tf.expand_dims(bbox, 0) bbox = tf.transpose(bbox, [0, 2, 1]) - encoded_image = features['image/encoded'] + encoded_image = features["image/encoded"] image_tensor = tf.image.decode_image(encoded_image, channels=3) image_tensor.set_shape([None, None, 3]) - str_label = features['image/object/class/text'].values - int_label = features['image/object/class/label'].values - image_id = features['image/source_id'] + str_label = features["image/object/class/text"].values + int_label = features["image/object/class/label"].values + image_id = features["image/source_id"] return image_tensor, (bbox[0], str_label, int_label, image_id) -@dataset_registry(dataset_type="COCORecord", framework="tensorflow, tensorflow_itex", dataset_format='') + +@dataset_registry(dataset_type="COCORecord", framework="tensorflow, tensorflow_itex", dataset_format="") class COCORecordDataset(IterableDataset): """Tensorflow COCO dataset in tf record format. @@ -105,7 +109,7 @@ class COCORecordDataset(IterableDataset): Args: root (str): Root directory of dataset. num_cores (int, default=28):The number of input Datasets to interleave from in parallel. transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according + filter (Filter objects, default=None): filter out examples according to specific conditions. """ @@ -117,23 +121,31 @@ def __new__(cls, root, num_cores=28, transform=None, filter=filter): example.ParseFromString(element) break feature = example.context.feature - if len(feature['image/object/class/text'].bytes_list.value) == 0 \ - and len(feature['image/object/class/label'].int64_list.value) == 0: - raise ValueError("Tfrecord format is incorrect, please refer\ + if ( + len(feature["image/object/class/text"].bytes_list.value) == 0 + and len(feature["image/object/class/label"].int64_list.value) == 0 + ): + raise ValueError( + "Tfrecord format is incorrect, please refer\ 'https://github.com/tensorflow/models/blob/master/research/\ object_detection/dataset_tools/create_coco_tf_record.py' to\ - create correct tfrecord") + create correct tfrecord" + ) # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave + tfrecord_paths = [root] ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) ds = ds.apply( - parallel_interleave(tf.data.TFRecordDataset, - cycle_length=num_cores, - block_length=5, - sloppy=True, - buffer_output_elements=10000, - prefetch_input_elements=10000)) + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) if transform is not None: transform.transform_list.insert(0, ParseDecodeCoco()) else: @@ -144,9 +156,14 @@ def __new__(cls, root, num_cores=28, transform=None, filter=filter): ds = ds.prefetch(buffer_size=1000) return ds -@dataset_registry(dataset_type="COCORaw", framework="onnxrt_qlinearops, \ + +@dataset_registry( + dataset_type="COCORaw", + framework="onnxrt_qlinearops, \ onnxrt_integerops, pytorch, mxnet, tensorflow, \ - tensorflow_itex", dataset_format='') + tensorflow_itex", + dataset_format="", +) class COCORaw(Dataset): """Coco raw dataset. @@ -162,17 +179,20 @@ class COCORaw(Dataset): img_dir (str, default='val2017'): image file directory. anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according + filter (Filter objects, default=None): filter out examples according to specific conditions. """ - def __init__(self, root, img_dir='val2017', \ - anno_dir='annotations/instances_val2017.json', transform=None, filter=filter): + def __init__( + self, root, img_dir="val2017", anno_dir="annotations/instances_val2017.json", transform=None, filter=filter + ): """Initialize the attributes of class.""" import json import os + import numpy as np from pycocotools.coco import COCO + self.image_list = [] self.transform = transform img_path = os.path.join(root, img_dir) @@ -186,21 +206,25 @@ def __init__(self, root, img_dir='val2017', \ labels = [] ids = [] img_detail = coco.loadImgs(img_id)[0] - ids.append(img_detail['file_name'].encode('utf-8')) - pic_height = img_detail['height'] - pic_width = img_detail['width'] + ids.append(img_detail["file_name"].encode("utf-8")) + pic_height = img_detail["height"] + pic_width = img_detail["width"] - ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_ids) anns = coco.loadAnns(ann_ids) for ann in anns: - bbox = ann['bbox'] + bbox = ann["bbox"] if len(bbox) == 0: continue - bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\ - bbox[2]/float(pic_width), bbox[3]/float(pic_height)] - bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]]) - labels.append(coco.cats[ann['category_id']]['name'].encode('utf8')) - img_file = os.path.join(img_path, img_detail['file_name']) + bbox = [ + bbox[0] / float(pic_width), + bbox[1] / float(pic_height), + bbox[2] / float(pic_width), + bbox[3] / float(pic_height), + ] + bboxes.append([bbox[1], bbox[0], bbox[1] + bbox[3], bbox[0] + bbox[2]]) + labels.append(coco.cats[ann["category_id"]]["name"].encode("utf8")) + img_file = os.path.join(img_path, img_detail["file_name"]) if not os.path.exists(img_file) or len(bboxes) == 0: continue @@ -208,10 +232,18 @@ def __init__(self, root, img_dir='val2017', \ continue with Image.open(img_file) as image: - image = np.array(image.convert('RGB')) + image = np.array(image.convert("RGB")) self.image_list.append( - (image, [np.array(bboxes), np.array(labels), np.array([]),\ - np.array(img_detail['file_name'].encode('utf-8'))])) + ( + image, + [ + np.array(bboxes), + np.array(labels), + np.array([]), + np.array(img_detail["file_name"].encode("utf-8")), + ], + ) + ) def __len__(self): """Length of the dataset.""" @@ -224,12 +256,17 @@ def __getitem__(self, index): """ sample = self.image_list[index] if self.transform is not None: - sample= self.transform(sample) + sample = self.transform(sample) return sample -@dataset_registry(dataset_type="COCONpy", framework="onnxrt_qlinearops, \ + +@dataset_registry( + dataset_type="COCONpy", + framework="onnxrt_qlinearops, \ onnxrt_integerops, pytorch, mxnet, tensorflow, \ - tensorflow_itex", dataset_format='') + tensorflow_itex", + dataset_format="", +) class COCONpy(Dataset): """COCO npy dataset. @@ -244,17 +281,20 @@ class COCONpy(Dataset): npy_dir (str, default='val2017'): npy file directory. anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according + filter (Filter objects, default=None): filter out examples according to specific conditions. """ - def __init__(self, root, npy_dir='val2017', \ - anno_dir='annotations/instances_val2017.json', transform=None, filter=None): + def __init__( + self, root, npy_dir="val2017", anno_dir="annotations/instances_val2017.json", transform=None, filter=None + ): """Initialize the attributes of class.""" import json import os + import numpy as np from pycocotools.coco import COCO + self.image_list = [] npy_path = os.path.join(root, npy_dir) anno_path = os.path.join(root, anno_dir) @@ -266,26 +306,25 @@ def __init__(self, root, npy_dir='val2017', \ labels = [] ids = [] img_detail = coco.loadImgs(img_id)[0] - ids.append(img_detail['file_name'].encode('utf-8')) - pic_height = img_detail['height'] - pic_width = img_detail['width'] + ids.append(img_detail["file_name"].encode("utf-8")) + pic_height = img_detail["height"] + pic_width = img_detail["width"] - ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_ids) anns = coco.loadAnns(ann_ids) for ann in anns: - bbox = ann['bbox'] - category_id = ann['category_id'] + bbox = ann["bbox"] + category_id = ann["category_id"] if len(bbox) == 0: continue labels.append((np.array(category_id), np.array(bbox))) - npy_file = os.path.join(npy_path, img_detail['file_name']) + npy_file = os.path.join(npy_path, img_detail["file_name"]) npy_file = npy_file + ".npy" if not os.path.exists(npy_file): continue image = np.load(npy_file) - self.image_list.append( - (image, labels)) + self.image_list.append((image, labels)) def __len__(self): """Length of the dataset.""" diff --git a/neural_compressor/experimental/data/datasets/dataset.py b/neural_compressor/experimental/data/datasets/dataset.py index 8ce0bc48f00..07409f662ec 100644 --- a/neural_compressor/experimental/data/datasets/dataset.py +++ b/neural_compressor/experimental/data/datasets/dataset.py @@ -14,24 +14,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """This is the base class for each framework.""" -from abc import abstractmethod import os -from neural_compressor.utils.utility import LazyImport, singleton +from abc import abstractmethod + from PIL import Image -torch = LazyImport('torch') -torchvision = LazyImport('torchvision') -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -np = LazyImport('numpy') -hashlib = LazyImport('hashlib') -gzip = LazyImport('gzip') -tarfile = LazyImport('tarfile') -zipfile = LazyImport('zipfile') -pickle = LazyImport('pickle') -glob = LazyImport('glob') + +from neural_compressor.utils.utility import LazyImport, singleton + +torch = LazyImport("torch") +torchvision = LazyImport("torchvision") +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +np = LazyImport("numpy") +hashlib = LazyImport("hashlib") +gzip = LazyImport("gzip") +tarfile = LazyImport("tarfile") +zipfile = LazyImport("zipfile") +pickle = LazyImport("pickle") +glob = LazyImport("glob") @singleton @@ -51,8 +53,7 @@ class PyTorchDatasets(object): def __init__(self): """Initialize the attributes of class.""" self.datasets = { - 'ImageFolder': PytorchMxnetWrapDataset( - torchvision.datasets.ImageFolder), + "ImageFolder": PytorchMxnetWrapDataset(torchvision.datasets.ImageFolder), } self.datasets.update(PYTORCH_DATASETS) @@ -87,7 +88,7 @@ def __init__(self): self.datasets.update(ONNXRTIT_DATASETS) -class PytorchMxnetWrapDataset(): +class PytorchMxnetWrapDataset: """The base class for PyTorch and MXNet frameworks. Args: @@ -100,11 +101,10 @@ def __init__(self, datafunc): def __call__(self, transform=None, filter=None, *args, **kwargs): """Wrap the dataset for PyTorch and MXNet framework.""" - return PytorchMxnetWrapFunction(self.datafunc, transform=transform, \ - filter=filter, *args, **kwargs) + return PytorchMxnetWrapFunction(self.datafunc, transform=transform, filter=filter, *args, **kwargs) -class PytorchMxnetWrapFunction(): +class PytorchMxnetWrapFunction: """The Helper class for PytorchMxnetWrapDataset. Args: @@ -135,26 +135,26 @@ def __getitem__(self, index): return sample -framework_datasets = {"tensorflow": TensorflowDatasets, - "tensorflow_itex": TensorflowDatasets, - "mxnet": MXNetDatasets, - "pytorch": PyTorchDatasets, - "pytorch_ipex": PyTorchDatasets, - "pytorch_fx": PyTorchDatasets, - "onnxrt_qdq": ONNXRTQLDatasets, - "onnxrt_qlinearops": ONNXRTQLDatasets, - "onnxruntime": ONNXRTQLDatasets, - "onnxrt_integerops": ONNXRTITDatasets, - } - +framework_datasets = { + "tensorflow": TensorflowDatasets, + "tensorflow_itex": TensorflowDatasets, + "mxnet": MXNetDatasets, + "pytorch": PyTorchDatasets, + "pytorch_ipex": PyTorchDatasets, + "pytorch_fx": PyTorchDatasets, + "onnxrt_qdq": ONNXRTQLDatasets, + "onnxrt_qlinearops": ONNXRTQLDatasets, + "onnxruntime": ONNXRTQLDatasets, + "onnxrt_integerops": ONNXRTITDatasets, +} """The datasets supported by neural_compressor, it's model specific and can be configured by yaml file. - User could add new datasets by implementing new Dataset subclass under this directory. - The naming convention of new dataset subclass should be something like ImageClassifier, user - could choose this dataset by setting "imageclassifier" string in tuning.strategy field of yaml. +User could add new datasets by implementing new Dataset subclass under this directory. +The naming convention of new dataset subclass should be something like ImageClassifier, user +could choose this dataset by setting "imageclassifier" string in tuning.strategy field of yaml. - Datasets variable is used to store all implemented Dataset subclasses to support - model specific dataset. +Datasets variable is used to store all implemented Dataset subclasses to support +model specific dataset. """ @@ -169,10 +169,19 @@ class Datasets(object): def __init__(self, framework): """Initialize the attributes of class.""" - assert framework in ["tensorflow", "tensorflow_itex", "keras", \ - "mxnet", "onnxrt_qdq", "onnxrt_qlinearops", "onnxrt_integerops", \ - "pytorch", "pytorch_ipex", "pytorch_fx", "onnxruntime"], \ - "framework support tensorflow pytorch mxnet onnxrt" + assert framework in [ + "tensorflow", + "tensorflow_itex", + "keras", + "mxnet", + "onnxrt_qdq", + "onnxrt_qlinearops", + "onnxrt_integerops", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxruntime", + ], "framework support tensorflow pytorch mxnet onnxrt" self.datasets = framework_datasets[framework]().datasets def __getitem__(self, dataset_type): @@ -180,8 +189,7 @@ def __getitem__(self, dataset_type): x[i] is roughly equivalent to type(x).__getitem__(x, index) """ - assert dataset_type in self.datasets.keys(), "dataset type only support {}".\ - format(self.datasets.keys()) + assert dataset_type in self.datasets.keys(), "dataset type only support {}".format(self.datasets.keys()) return self.datasets[dataset_type] @@ -195,20 +203,21 @@ def __getitem__(self, dataset_type): ONNXRTQL_DATASETS = {} ONNXRTIT_DATASETS = {} -registry_datasets = {"tensorflow": TENSORFLOW_DATASETS, - "tensorflow_itex": TENSORFLOWITEX_DATASETS, - "mxnet": MXNET_DATASETS, - "pytorch": PYTORCH_DATASETS, - "pytorch_ipex": PYTORCHIPEX_DATASETS, - "pytorch_fx": PYTORCHFX_DATASETS, - "onnxrt_integerops": ONNXRTIT_DATASETS, - "onnxrt_qdq": ONNXRTQL_DATASETS, - "onnxruntime": ONNXRTQL_DATASETS, - "onnxrt_qlinearops": ONNXRTQL_DATASETS, - } - - -def dataset_registry(dataset_type, framework, dataset_format=''): +registry_datasets = { + "tensorflow": TENSORFLOW_DATASETS, + "tensorflow_itex": TENSORFLOWITEX_DATASETS, + "mxnet": MXNET_DATASETS, + "pytorch": PYTORCH_DATASETS, + "pytorch_ipex": PYTORCHIPEX_DATASETS, + "pytorch_fx": PYTORCHFX_DATASETS, + "onnxrt_integerops": ONNXRTIT_DATASETS, + "onnxrt_qdq": ONNXRTQL_DATASETS, + "onnxruntime": ONNXRTQL_DATASETS, + "onnxrt_qlinearops": ONNXRTQL_DATASETS, +} + + +def dataset_registry(dataset_type, framework, dataset_format=""): """Register dataset subclasses. Args: @@ -220,8 +229,9 @@ def dataset_registry(dataset_type, framework, dataset_format=''): Returns: cls: The class of register. """ + def decorator_dataset(cls): - for single_framework in [fwk.strip() for fwk in framework.split(',')]: + for single_framework in [fwk.strip() for fwk in framework.split(",")]: assert single_framework in [ "tensorflow", "tensorflow_itex", @@ -236,9 +246,10 @@ def decorator_dataset(cls): ], "The framework support tensorflow mxnet pytorch onnxrt" dataset_name = dataset_type + dataset_format if dataset_name in registry_datasets[single_framework].keys(): - raise ValueError('Cannot have two datasets with the same name') + raise ValueError("Cannot have two datasets with the same name") registry_datasets[single_framework][dataset_name] = cls return cls + return decorator_dataset @@ -289,6 +300,7 @@ def download_url(url, root, filename=None, md5=None): # pragma: no cover md5 (str): the md5 string. """ import urllib + root = os.path.expanduser(root) if not filename: filename = os.path.basename(url) @@ -297,23 +309,16 @@ def download_url(url, root, filename=None, md5=None): # pragma: no cover os.makedirs(root, exist_ok=True) if check_integrity(fpath, md5): - print('Using downloaded and verified file: ' + fpath) + print("Using downloaded and verified file: " + fpath) else: try: - print('Downloading ' + url + ' to ' + fpath) - urllib.request.urlretrieve( - url, fpath, - reporthook=gen_bar_updater() - ) + print("Downloading " + url + " to " + fpath) + urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater()) except (urllib.error.URLError, IOError) as e: - if url[:5] == 'https': - url = url.replace('https:', 'http:') - print('Failed download. Trying https -> http instead.' - ' Downloading ' + url + ' to ' + fpath) - urllib.request.urlretrieve( - url, fpath, - reporthook=gen_bar_updater() - ) + if url[:5] == "https": + url = url.replace("https:", "http:") + print("Failed download. Trying https -> http instead." " Downloading " + url + " to " + fpath) + urllib.request.urlretrieve(url, fpath, reporthook=gen_bar_updater()) else: raise e if not check_integrity(fpath, md5): @@ -323,6 +328,7 @@ def download_url(url, root, filename=None, md5=None): # pragma: no cover def gen_bar_updater(): """Generate progress bar.""" from tqdm import tqdm + pbar = tqdm(total=None) def bar_update(count, block_size, total_size): @@ -331,6 +337,7 @@ def bar_update(count, block_size, total_size): pbar.total = total_size progress_bytes = count * block_size pbar.update(progress_bytes - pbar.n) + return bar_update @@ -343,16 +350,21 @@ def check_integrity(fpath, md5): return md5 == calculate_md5(fpath) -def calculate_md5(fpath, chunk_size=1024*1024): +def calculate_md5(fpath, chunk_size=1024 * 1024): """Generate MD5 checksum for a file.""" md5 = hashlib.md5() - with open(fpath, 'rb') as f: - for chunk in iter(lambda: f.read(chunk_size), b''): + with open(fpath, "rb") as f: + for chunk in iter(lambda: f.read(chunk_size), b""): md5.update(chunk) return md5.hexdigest() -@dataset_registry(dataset_type="CIFAR10", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') + +@dataset_registry( + dataset_type="CIFAR10", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class CIFAR10(Dataset): """The CIFAR10 and CIFAR100 database. @@ -379,39 +391,33 @@ class CIFAR10(Dataset): url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" filename = "cifar-10-python.tar.gz" - tgz_md5 = 'c58f30108f718f92721af3b95e74349a' + tgz_md5 = "c58f30108f718f92721af3b95e74349a" train_list = [ - ['data_batch_1', 'c99cafc152244af753f735de768cd75f'], - ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'], - ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'], - ['data_batch_4', '634d18415352ddfa80567beed471001a'], - ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'], + ["data_batch_1", "c99cafc152244af753f735de768cd75f"], + ["data_batch_2", "d4bba439e000b95fd0a9bffe97cbabec"], + ["data_batch_3", "54ebc095f3ab1f0389bbae665268c751"], + ["data_batch_4", "634d18415352ddfa80567beed471001a"], + ["data_batch_5", "482c414d41f54cd18b22e5b47cb7c3cb"], ] test_list = [ - ['test_batch', '40351d587109b95175f43aff81a1287e'], + ["test_batch", "40351d587109b95175f43aff81a1287e"], ] meta = { - 'filename': 'batches.meta', - 'key': 'label_names', - 'md5': '5ff9c542aee3614f3951f8cda6e48888', + "filename": "batches.meta", + "key": "label_names", + "md5": "5ff9c542aee3614f3951f8cda6e48888", } - def __init__(self, - root, - train=False, - transform=None, - filter=None, - download=True): # pragma: no cover + def __init__(self, root, train=False, transform=None, filter=None, download=True): # pragma: no cover """Initialize the attributes of class.""" self.root = root if download: self.download() if not self._check_integrity(): - raise RuntimeError( - 'Dataset not found or corrupted. You can use download=True to download it') + raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it") if train: downloaded_list = self.train_list else: @@ -421,13 +427,13 @@ def __init__(self, self.targets = [] for file_name, checksum in downloaded_list: file_path = os.path.join(self.root, file_name) - with open(file_path, 'rb') as f: - entry = pickle.load(f, encoding='latin1') - self.data.append(entry['data']) - if 'labels' in entry: - self.targets.extend(entry['labels']) + with open(file_path, "rb") as f: + entry = pickle.load(f, encoding="latin1") + self.data.append(entry["data"]) + if "labels" in entry: + self.targets.extend(entry["labels"]) else: - self.targets.extend(entry['fine_labels']) + self.targets.extend(entry["fine_labels"]) self.data = np.vstack(self.data).reshape(-1, 3, 32, 32) self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC @@ -436,13 +442,14 @@ def __init__(self, def load_meta(self): # pragma: no cover """Load meta.""" - path = os.path.join(self.root, self.meta['filename']) - if not check_integrity(path, self.meta['md5']): - raise RuntimeError('Dataset metadata file not found or corrupted.' + - ' You can use download=True to download it') - with open(path, 'rb') as infile: - data = pickle.load(infile, encoding='latin1') - self.classes = data[self.meta['key']] + path = os.path.join(self.root, self.meta["filename"]) + if not check_integrity(path, self.meta["md5"]): + raise RuntimeError( + "Dataset metadata file not found or corrupted." + " You can use download=True to download it" + ) + with open(path, "rb") as infile: + data = pickle.load(infile, encoding="latin1") + self.classes = data[self.meta["key"]] self.class_to_idx = {_class: i for i, _class in enumerate(self.classes)} def __getitem__(self, index): # pragma: no cover @@ -462,20 +469,20 @@ def __len__(self): # pragma: no cover def download(self): # pragma: no cover """Download a file.""" if self._check_integrity(): - print('Files already downloaded and verified') + print("Files already downloaded and verified") return download_root = os.path.expanduser(self.root) filename = os.path.basename(self.url) download_url(self.url, download_root, filename, self.tgz_md5) archive = os.path.join(download_root, filename) print("Extracting {} to {}".format(archive, download_root)) - with tarfile.open(archive, 'r:gz') as tar: + with tarfile.open(archive, "r:gz") as tar: tar.extractall(path=download_root) def _check_integrity(self): # pragma: no cover """Check MD5 checksum.""" root = self.root - for fentry in (self.train_list + self.test_list): + for fentry in self.train_list + self.test_list: filename, md5 = fentry[0], fentry[1] fpath = os.path.join(root, filename) if not check_integrity(fpath, md5): @@ -483,7 +490,7 @@ def _check_integrity(self): # pragma: no cover return True -@dataset_registry(dataset_type="CIFAR10", framework="pytorch", dataset_format='') +@dataset_registry(dataset_type="CIFAR10", framework="pytorch", dataset_format="") class PytorchCIFAR10(CIFAR10): """The PyTorch datasets for CIFAR10.""" @@ -499,7 +506,7 @@ def __getitem__(self, index): # pragma: no cover return (image, label) -@dataset_registry(dataset_type="CIFAR10", framework="mxnet", dataset_format='') +@dataset_registry(dataset_type="CIFAR10", framework="mxnet", dataset_format="") class MXNetCIFAR10(CIFAR10): """The MXNet datasets for CIFAR10.""" @@ -515,7 +522,7 @@ def __getitem__(self, index): # pragma: no cover return (image, label) -@dataset_registry(dataset_type="CIFAR10", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="CIFAR10", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowCIFAR10(CIFAR10): """The Tensorflow datasets for CIFAR10.""" @@ -527,16 +534,20 @@ def __getitem__(self, index): # pragma: no cover image, label = self.data[index], self.targets[index] if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="CIFAR100", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') +@dataset_registry( + dataset_type="CIFAR100", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class CIFAR100(CIFAR10): """CIFAR100 database. @@ -559,21 +570,21 @@ class CIFAR100(CIFAR10): url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" filename = "cifar-100-python.tar.gz" - tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' + tgz_md5 = "eb9058c3a382ffc7106e4002c42a8d85" train_list = [ - ['train', '16019d7e3df5f24257cddd939b257f8d'], + ["train", "16019d7e3df5f24257cddd939b257f8d"], ] test_list = [ - ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], + ["test", "f0ef6b0ae62326f3e7ffdfab6717acfc"], ] meta = { - 'filename': 'meta', - 'key': 'fine_label_names', - 'md5': '7973b15100ade9c7d40fb424638fde48', + "filename": "meta", + "key": "fine_label_names", + "md5": "7973b15100ade9c7d40fb424638fde48", } -@dataset_registry(dataset_type="CIFAR100", framework="pytorch", dataset_format='') +@dataset_registry(dataset_type="CIFAR100", framework="pytorch", dataset_format="") class PytorchCIFAR100(CIFAR100): """The PyTorch datasets for CIFAR100.""" @@ -590,7 +601,7 @@ def __getitem__(self, index): # pragma: no cover return (image, label) -@dataset_registry(dataset_type="CIFAR100", framework="mxnet", dataset_format='') +@dataset_registry(dataset_type="CIFAR100", framework="mxnet", dataset_format="") class MXNetCIFAR100(CIFAR100): """The MXNet datasets for CIFAR100.""" @@ -606,11 +617,11 @@ def __getitem__(self, index): # pragma: no cover return (image, label) -@dataset_registry(dataset_type="CIFAR100", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="CIFAR100", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowCIFAR100(CIFAR100): """The Tensorflow datasets for CIFAR100.""" - def __getitem__(self, index): # pragma: no cover + def __getitem__(self, index): # pragma: no cover """Magic method. x[i] is roughly equivalent to type(x).__getitem__(x, index) @@ -618,15 +629,20 @@ def __getitem__(self, index): # pragma: no cover image, label = self.data[index], self.targets[index] if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="MNIST", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') + +@dataset_registry( + dataset_type="MNIST", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class MNIST(Dataset): """Modified National Institute of Standards and Technology database and FashionMNIST database. @@ -649,11 +665,20 @@ class MNIST(Dataset): downloaded, it is not downloaded again. """ - classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', - '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine'] + classes = [ + "0 - zero", + "1 - one", + "2 - two", + "3 - three", + "4 - four", + "5 - five", + "6 - six", + "7 - seven", + "8 - eight", + "9 - nine", + ] resource = [ - ('https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz', - '8a61469f7ea1b51cbae51d4f78837e45') + ("https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz", "8a61469f7ea1b51cbae51d4f78837e45") ] def __init__(self, root, train=False, transform=None, filter=None, download=True): @@ -671,13 +696,12 @@ def read_data(self): for file_name, checksum in self.resource: file_path = os.path.join(self.root, os.path.basename(file_name)) if not os.path.exists(file_path): - raise RuntimeError( - 'Dataset not found. You can use download=True to download it') + raise RuntimeError("Dataset not found. You can use download=True to download it") with np.load(file_path, allow_pickle=True) as f: if self.train: - self.data, self.targets = f['x_train'], f['y_train'] + self.data, self.targets = f["x_train"], f["y_train"] else: - self.data, self.targets = f['x_test'], f['y_test'] + self.data, self.targets = f["x_test"], f["y_test"] def __len__(self): """Length of the dataset.""" @@ -706,11 +730,10 @@ def download(self): if os.path.exists(os.path.join(self.root, filename)): continue else: - download_url(url, root=self.root, - filename=filename, md5=md5) + download_url(url, root=self.root, filename=filename, md5=md5) -@dataset_registry(dataset_type="MNIST", framework="pytorch", dataset_format='') +@dataset_registry(dataset_type="MNIST", framework="pytorch", dataset_format="") class PytorchMNIST(MNIST): """The PyTorch datasets for MNIST.""" @@ -720,14 +743,14 @@ def __getitem__(self, index): x[i] is roughly equivalent to type(x).__getitem__(x, index) """ image, label = self.data[index], int(self.targets[index]) - image = Image.fromarray(image, mode='L') + image = Image.fromarray(image, mode="L") if self.transform is not None: image, label = self.transform((image, label)) image = np.array(image) return (image, label) -@dataset_registry(dataset_type="MNIST", framework="mxnet", dataset_format='') +@dataset_registry(dataset_type="MNIST", framework="mxnet", dataset_format="") class MXNetMNIST(MNIST): """The MXNet datasets for MNIST.""" @@ -744,7 +767,7 @@ def __getitem__(self, index): return (image, label) -@dataset_registry(dataset_type="MNIST", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="MNIST", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowMNIST(MNIST): """The Tensorflow datasets for MNIST.""" @@ -757,16 +780,20 @@ def __getitem__(self, index): image = np.expand_dims(image, -1) if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="FashionMNIST", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') +@dataset_registry( + dataset_type="FashionMNIST", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class FashionMNIST(MNIST): """FashionMNIST database. @@ -788,35 +815,37 @@ class FashionMNIST(MNIST): """ resource = [ - ('https://storage.googleapis.com/tensorflow/tf-keras-datasets/' + file_name, None) + ("https://storage.googleapis.com/tensorflow/tf-keras-datasets/" + file_name, None) for file_name in [ - 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', - 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' - ] + "train-labels-idx1-ubyte.gz", + "train-images-idx3-ubyte.gz", + "t10k-labels-idx1-ubyte.gz", + "t10k-images-idx3-ubyte.gz", + ] ] - classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', - 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] + classes = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"] def read_data(self): """Read data from a file.""" import struct + if self.train: - label_path = os.path.join(self.root, 'train-labels-idx1-ubyte.gz') - image_path = os.path.join(self.root, 'train-images-idx3-ubyte.gz') + label_path = os.path.join(self.root, "train-labels-idx1-ubyte.gz") + image_path = os.path.join(self.root, "train-images-idx3-ubyte.gz") else: - label_path = os.path.join(self.root, 't10k-labels-idx1-ubyte.gz') - image_path = os.path.join(self.root, 't10k-images-idx3-ubyte.gz') - with gzip.open(label_path, 'rb') as f: + label_path = os.path.join(self.root, "t10k-labels-idx1-ubyte.gz") + image_path = os.path.join(self.root, "t10k-images-idx3-ubyte.gz") + with gzip.open(label_path, "rb") as f: struct.unpack(">II", f.read(8)) self.targets = np.frombuffer(f.read(), dtype=np.uint8).astype(np.int32) - with gzip.open(image_path, 'rb') as f: + with gzip.open(image_path, "rb") as f: struct.unpack(">IIII", f.read(16)) data = np.frombuffer(f.read(), dtype=np.uint8) self.data = data.reshape(len(self.targets), 28, 28) -@dataset_registry(dataset_type="FashionMNIST", framework="pytorch", dataset_format='') +@dataset_registry(dataset_type="FashionMNIST", framework="pytorch", dataset_format="") class PytorchFashionMNIST(FashionMNIST): """The PyTorch datasets for FashionMNIST.""" @@ -826,14 +855,14 @@ def __getitem__(self, index): x[i] is roughly equivalent to type(x).__getitem__(x, index) """ image, label = self.data[index], int(self.targets[index]) - image = Image.fromarray(image, mode='L') + image = Image.fromarray(image, mode="L") if self.transform is not None: image, label = self.transform((image, label)) image = np.array(image) return (image, label) -@dataset_registry(dataset_type="FashionMNIST", framework="mxnet", dataset_format='') +@dataset_registry(dataset_type="FashionMNIST", framework="mxnet", dataset_format="") class MXNetFashionMNIST(FashionMNIST): """The MXNet Dataset for FashionMNIST.""" @@ -850,7 +879,7 @@ def __getitem__(self, index): return (image, label) -@dataset_registry(dataset_type="FashionMNIST", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="FashionMNIST", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowFashionMNIST(FashionMNIST): """The Tensorflow Dataset for FashionMNIST.""" @@ -863,16 +892,20 @@ def __getitem__(self, index): image = np.expand_dims(image, -1) if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="ImageFolder", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') +@dataset_registry( + dataset_type="ImageFolder", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class ImageFolder(Dataset): """The base class for ImageFolder. @@ -902,10 +935,10 @@ def __init__(self, root, transform=None, filter=None): self.transform = transform self.image_list = [] - files = glob.glob(os.path.join(self.root, '*')) + files = glob.glob(os.path.join(self.root, "*")) files.sort() for idx, file in enumerate(files): - imgs = glob.glob(os.path.join(file, '*')) + imgs = glob.glob(os.path.join(file, "*")) imgs.sort() for img in imgs: self.image_list.append((img, idx)) @@ -928,7 +961,7 @@ def __getitem__(self, index): return (image, label) -@dataset_registry(dataset_type="ImageFolder", framework="mxnet", dataset_format='') +@dataset_registry(dataset_type="ImageFolder", framework="mxnet", dataset_format="") class MXNetImageFolder(ImageFolder): """The MXNet Dataset for image folder. @@ -964,7 +997,7 @@ def __getitem__(self, index): return (image, label) -@dataset_registry(dataset_type="ImageFolder", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="ImageFolder", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowImageFolder(ImageFolder): """The Tensorflow Dataset for image folder. @@ -995,20 +1028,20 @@ def __getitem__(self, index): sample = self.image_list[index] label = sample[1] with Image.open(sample[0]) as image: - if image.mode != 'RGB': - image = image.convert('RGB') + if image.mode != "RGB": + image = image.convert("RGB") image = np.array(image) if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="TFRecordDataset", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="TFRecordDataset", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowTFRecordDataset(IterableDataset): """The Tensorflow TFRecord Dataset. @@ -1025,17 +1058,17 @@ def __new__(cls, root, transform=None, filter=None): # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave from tensorflow.python.platform import gfile + file_names = gfile.Glob(root) ds = tf.data.Dataset.from_tensor_slices(file_names) - ds = ds.apply(parallel_interleave( - tf.data.TFRecordDataset, cycle_length=len(file_names))) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) if transform is not None: ds = ds.map(transform, num_parallel_calls=None) ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned return ds -@dataset_registry(dataset_type="ImageRecord", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="ImageRecord", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowImageRecord(IterableDataset): """Tensorflow imageNet database in tf record format. @@ -1053,20 +1086,23 @@ class TensorflowImageRecord(IterableDataset): """ """Configuration for Imagenet dataset.""" + def __new__(cls, root, transform=None, filter=None): """Build a new object of TensorflowImageRecord class.""" from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module - glob_pattern = os.path.join(root, '*-*-of-*') + + glob_pattern = os.path.join(root, "*-*-of-*") file_names = gfile.Glob(glob_pattern) if not file_names: - raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave + from neural_compressor.experimental.data.transforms.imagenet_transform import ParseDecodeImagenet + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) - ds = ds.apply(parallel_interleave( - tf.data.TFRecordDataset, cycle_length=len(file_names))) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) if transform is not None: transform.transform_list.insert(0, ParseDecodeImagenet()) @@ -1077,7 +1113,7 @@ def __new__(cls, root, transform=None, filter=None): return ds -@dataset_registry(dataset_type="VOCRecord", framework="tensorflow, tensorflow_itex", dataset_format='') +@dataset_registry(dataset_type="VOCRecord", framework="tensorflow, tensorflow_itex", dataset_format="") class TensorflowVOCRecord(IterableDataset): """The Tensorflow PASCAL VOC 2012 database in tf record format. @@ -1097,16 +1133,17 @@ class TensorflowVOCRecord(IterableDataset): def __new__(cls, root, transform=None, filter=None): """Build a new object of TensorflowVOCRecord class.""" from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module - glob_pattern = os.path.join(root, '%s-*' % 'val') + + glob_pattern = os.path.join(root, "%s-*" % "val") file_names = gfile.Glob(glob_pattern) if not file_names: - raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) # pylint: disable=no-name-in-module from tensorflow.python.data.experimental import parallel_interleave + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) - ds = ds.apply(parallel_interleave( - tf.data.TFRecordDataset, cycle_length=len(file_names))) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) if transform is not None: ds = ds.map(transform, num_parallel_calls=None) diff --git a/neural_compressor/experimental/data/datasets/dummy_dataset.py b/neural_compressor/experimental/data/datasets/dummy_dataset.py index 5a9b7113957..8428dd56453 100644 --- a/neural_compressor/experimental/data/datasets/dummy_dataset.py +++ b/neural_compressor/experimental/data/datasets/dummy_dataset.py @@ -17,21 +17,28 @@ # ============================================================================== """Dummy dataset for dummy data generation on multiple framework backends.""" -from .dataset import dataset_registry, Dataset +import logging + import numpy as np + from neural_compressor.utils.utility import LazyImport -import logging -mx = LazyImport('mxnet') -torch = LazyImport('torch') +from .dataset import Dataset, dataset_registry + +mx = LazyImport("mxnet") +torch = LazyImport("torch") logger = logging.getLogger("neural_compressor") -@dataset_registry(dataset_type="dummy", framework="tensorflow, tensorflow_itex, \ + +@dataset_registry( + dataset_type="dummy", + framework="tensorflow, tensorflow_itex, \ onnxrt_qlinearops, onnxrt_integerops, \ pytorch, pytorch_ipex, pytorch_fx, \ mxnet", - dataset_format='') + dataset_format="", +) class DummyDataset(Dataset): """Dataset used for dummy data generation. @@ -40,8 +47,7 @@ class DummyDataset(Dataset): (TODO) construct dummy data from real dataset or iteration of data. """ - def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ - transform=None, filter=None): + def __init__(self, shape, low=-128.0, high=127.0, dtype="float32", label=True, transform=None, filter=None): """Initialize `DummyDataset` class. Args: @@ -59,37 +65,47 @@ def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ If transform is not None, it will ignore it. filter (Filter objects, default=None): Filter out examples according to specific conditions. """ - dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ - 'int8': np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':bool,\ - 'string': str} + dtype_map = { + "float32": np.float32, + "float16": np.float16, + "uint8": np.uint8, + "int8": np.int8, + "int32": np.int32, + "int64": np.int64, + "bool": bool, + "string": str, + } np.random.seed(9527) self.transform = transform self.label = label - if len(shape)==0: + if len(shape) == 0: logger.info("No data in the dummy dataset.") elif isinstance(shape, list): # list tensor should same first demension n n = shape[0][0] - assert all(isinstance(elem, tuple) and elem[0] == n for elem in shape), \ - 'each tensor shape should be tuple and same fisrt demension' + assert all( + isinstance(elem, tuple) and elem[0] == n for elem in shape + ), "each tensor shape should be tuple and same fisrt demension" if isinstance(low, list): - assert len(low) == len(shape) and all(isinstance(elem, float) for elem in low), \ - 'low list should have same length with shape with element data type float' + assert len(low) == len(shape) and all( + isinstance(elem, float) for elem in low + ), "low list should have same length with shape with element data type float" else: low = (low * np.ones(len(shape))).astype(float) if isinstance(high, list): - assert len(high) == len(shape) and all(isinstance(elem, float) for elem in high), \ - 'high list should have same length with shape with element data type float' + assert len(high) == len(shape) and all( + isinstance(elem, float) for elem in high + ), "high list should have same length with shape with element data type float" else: high = (high * np.ones(len(shape))).astype(float) if isinstance(dtype, list): - assert len(dtype) == len(shape) and \ - all(elem in dtype_map.keys() for elem in dtype), \ - 'high list should have same length with shape with element data type float' + assert len(dtype) == len(shape) and all( + elem in dtype_map.keys() for elem in dtype + ), "high list should have same length with shape with element data type float" else: dtype = [dtype for i in range(0, len(shape))] @@ -98,22 +114,24 @@ def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ if isinstance(low, float): low = [low] else: - assert isinstance(low, list) and len(low) == 1 and isinstance(low[0], float), \ - 'low should be float or list of float with length 1' + assert ( + isinstance(low, list) and len(low) == 1 and isinstance(low[0], float) + ), "low should be float or list of float with length 1" if isinstance(high, float): high = [high] else: - assert isinstance(high, list) and len(high) == 1 and isinstance(high[0], float), \ - 'high should be float or list of float with length 1' + assert ( + isinstance(high, list) and len(high) == 1 and isinstance(high[0], float) + ), "high should be float or list of float with length 1" if isinstance(dtype, str): - assert dtype in dtype_map.keys(), 'dtype only support {}'.format(dtype_map.keys()) + assert dtype in dtype_map.keys(), "dtype only support {}".format(dtype_map.keys()) dtype = [dtype] else: - assert isinstance(dtype, list) and \ - len(dtype) == 1 and dtype[0] in dtype_map.keys(), \ - 'dtype should be str or list of str in supported dtypes' + assert ( + isinstance(dtype, list) and len(dtype) == 1 and dtype[0] in dtype_map.keys() + ), "dtype should be str or list of str in supported dtypes" self.dataset = [] for idx in range(0, len(shape)): @@ -126,7 +144,6 @@ def __init__(self, shape, low=-128., high=127., dtype='float32', label=True, \ else: self.dataset = [elem for elem in zip(*self.dataset)] - def __len__(self): """Return the length of dataset.""" return len(self.dataset) diff --git a/neural_compressor/experimental/data/datasets/dummy_dataset_v2.py b/neural_compressor/experimental/data/datasets/dummy_dataset_v2.py index fadf729fcbe..9323c123ccb 100644 --- a/neural_compressor/experimental/data/datasets/dummy_dataset_v2.py +++ b/neural_compressor/experimental/data/datasets/dummy_dataset_v2.py @@ -18,18 +18,25 @@ """Dummy dataset for dummy_v2/sparse_dummy_v2 data generation on multiple framework backends.""" import sys -from .dataset import dataset_registry, IterableDataset +from functools import reduce + import numpy as np + from neural_compressor.utils.utility import LazyImport -from functools import reduce -mx = LazyImport('mxnet') -torch = LazyImport('torch') +from .dataset import IterableDataset, dataset_registry + +mx = LazyImport("mxnet") +torch = LazyImport("torch") -@dataset_registry(dataset_type="dummy_v2", framework="tensorflow, tensorflow_itex, \ + +@dataset_registry( + dataset_type="dummy_v2", + framework="tensorflow, tensorflow_itex, \ onnxrt_qlinearops, onnxrt_integerops, \ pytorch, pytorch_ipex, pytorch_fx, mxnet", - dataset_format='') + dataset_format="", +) class DummyDataset(IterableDataset): """Dataset used for dummy_v2 data generation. @@ -37,8 +44,9 @@ class DummyDataset(IterableDataset): The value range is calculated from: low * stand_normal(0, 1) + high. """ - def __init__(self, input_shape, label_shape=None, low=-128., high=127., \ - dtype='float32', transform=None, filter=None): + def __init__( + self, input_shape, label_shape=None, low=-128.0, high=127.0, dtype="float32", transform=None, filter=None + ): """Initialize `DummyDataset` class. Args: @@ -61,8 +69,15 @@ def __init__(self, input_shape, label_shape=None, low=-128., high=127., \ If transform is not None, it will ignore it. filter (Filter objects, default=None): Filter out examples according to specific conditions. """ - self.dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ - 'int8':np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':bool} + self.dtype_map = { + "float32": np.float32, + "float16": np.float16, + "uint8": np.uint8, + "int8": np.int8, + "int32": np.int32, + "int64": np.int64, + "bool": bool, + } np.random.seed(9527) self.transform = transform @@ -83,23 +98,23 @@ def __init__(self, input_shape, label_shape=None, low=-128., high=127., \ self.total_dim = self.input_dim + self.label_dim if isinstance(high, list): - assert len(high) == self.total_dim and \ - all(isinstance(elem, float) for elem in high),\ - 'high value list length should same with label dim + input_dim' + assert len(high) == self.total_dim and all( + isinstance(elem, float) for elem in high + ), "high value list length should same with label dim + input_dim" else: self.high = (high * np.ones(self.total_dim)).astype(np.float32) if isinstance(low, list): - assert len(low) == self.total_dim and \ - all(isinstance(elem, float) for elem in low), \ - 'low value list length should same with label dim + input_dim' + assert len(low) == self.total_dim and all( + isinstance(elem, float) for elem in low + ), "low value list length should same with label dim + input_dim" else: self.low = (low * np.ones(self.total_dim)).astype(np.float32) if isinstance(dtype, list): - assert len(dtype) == self.total_dim and \ - all(elem in self.dtype_map.keys() for elem in dtype), \ - 'dtype list length should same with label dim + input_dim' + assert len(dtype) == self.total_dim and all( + elem in self.dtype_map.keys() for elem in dtype + ), "dtype list length should same with label dim + input_dim" else: self.dtype = [self.dtype for i in range(0, self.total_dim)] @@ -114,39 +129,42 @@ def __iter__(self): while True: input_data = [] for idx in range(0, self.input_dim): - tensor = np.random.uniform(\ - low=self.low[idx], high=self.high[idx], size=self.input_shape[idx]) + tensor = np.random.uniform(low=self.low[idx], high=self.high[idx], size=self.input_shape[idx]) tensor = tensor.astype(self.dtype_map[self.dtype[idx]]) input_data.append(tensor) label = [] for idx in range(0, self.label_dim): - shift_idx = self.input_dim + idx - tensor = np.random.uniform(low=self.low[shift_idx], - high=self.high[shift_idx], - size=self.label_shape[idx]) + shift_idx = self.input_dim + idx + tensor = np.random.uniform( + low=self.low[shift_idx], high=self.high[shift_idx], size=self.label_shape[idx] + ) tensor = tensor.astype(self.dtype_map[self.dtype[shift_idx]]) label.append(tensor) if len(input_data) == 1: - input_data = input_data[0] + input_data = input_data[0] if len(label) == 1: - label = label[0] + label = label[0] if len(label) > 0: yield input_data, label else: yield input_data - + def __len__(self): """Return the length of dataset.""" return sys.maxsize -@dataset_registry(dataset_type="sparse_dummy_v2", framework="tensorflow, tensorflow_itex, \ + +@dataset_registry( + dataset_type="sparse_dummy_v2", + framework="tensorflow, tensorflow_itex, \ onnxrt_qlinearops, onnxrt_integerops, \ pytorch, pytorch_ipex, pytorch_fx, mxnet", - dataset_format='') + dataset_format="", +) class SparseDummyDataset(IterableDataset): """Dataset used for sparse_dummy_v2 data generation. @@ -154,8 +172,17 @@ class SparseDummyDataset(IterableDataset): The value range is calculated from: low * stand_normal(0, 1) + high. """ - def __init__(self, dense_shape, label_shape=None, sparse_ratio=0.5, low=-128., high=127., \ - dtype='float32', transform=None, filter=None): + def __init__( + self, + dense_shape, + label_shape=None, + sparse_ratio=0.5, + low=-128.0, + high=127.0, + dtype="float32", + transform=None, + filter=None, + ): """Initialize `SparseDummyDataset` class. Args: @@ -178,8 +205,15 @@ def __init__(self, dense_shape, label_shape=None, sparse_ratio=0.5, low=-128., h If transform is not None, it will ignore it. filter (Filter objects, default=None): Filter out examples according to specific conditions. """ - self.dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \ - 'int8':np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':bool} + self.dtype_map = { + "float32": np.float32, + "float16": np.float16, + "uint8": np.uint8, + "int8": np.int8, + "int32": np.int32, + "int64": np.int64, + "bool": bool, + } np.random.seed(9527) self.transform = transform @@ -200,39 +234,40 @@ def __init__(self, dense_shape, label_shape=None, sparse_ratio=0.5, low=-128., h self.label_shape = [label_shape] if len(self.label_shape) == 1 and len(self.label_shape) != len(self.dense_shape): self.label_shape = len(self.dense_shape) * self.label_shape - assert len(self.label_shape) == len(self.dense_shape), \ - 'length of dense_shape should be euqal to length of label_shape' + assert len(self.label_shape) == len( + self.dense_shape + ), "length of dense_shape should be euqal to length of label_shape" self.label_dim = len(self.label_shape) self.input_dim = 1 if isinstance(dense_shape, tuple) else len(dense_shape) self.total_dim = self.input_dim + self.label_dim if isinstance(sparse_ratio, list): - assert len(sparse_ratio) == self.input_dim and \ - all(isinstance(elem, float) for elem in sparse_ratio),\ - 'sparse_ratio list length should same with input_dim' + assert len(sparse_ratio) == self.input_dim and all( + isinstance(elem, float) for elem in sparse_ratio + ), "sparse_ratio list length should same with input_dim" else: self.sparse_ratio = (sparse_ratio * np.ones(self.input_dim)).astype(np.float32) - assert all([0 <= i <= 1 for i in self.sparse_ratio]), 'sparse_ratio should be in [0,1]' + assert all([0 <= i <= 1 for i in self.sparse_ratio]), "sparse_ratio should be in [0,1]" if isinstance(high, list): - assert len(high) == self.total_dim and \ - all(isinstance(elem, float) for elem in high),\ - 'high value list length should same with label dim + input_dim' + assert len(high) == self.total_dim and all( + isinstance(elem, float) for elem in high + ), "high value list length should same with label dim + input_dim" else: self.high = (high * np.ones(self.total_dim)).astype(np.float32) if isinstance(low, list): - assert len(low) == self.total_dim and \ - all(isinstance(elem, float) for elem in low), \ - 'low value list length should same with label dim + input_dim' + assert len(low) == self.total_dim and all( + isinstance(elem, float) for elem in low + ), "low value list length should same with label dim + input_dim" else: self.low = (low * np.ones(self.total_dim)).astype(np.float32) if isinstance(dtype, list): - assert len(dtype) == self.total_dim and \ - all(elem in self.dtype_map.keys() for elem in dtype), \ - 'dtype list length should same with label dim + input_dim' + assert len(dtype) == self.total_dim and all( + elem in self.dtype_map.keys() for elem in dtype + ), "dtype list length should same with label dim + input_dim" else: self.dtype = [self.dtype for i in range(0, self.total_dim)] @@ -242,34 +277,32 @@ def __iter__(self): input_data = [] for idx, shape in enumerate(self.dense_shape): dim = len(shape) - total = reduce(lambda x, y: x*y, shape) + total = reduce(lambda x, y: x * y, shape) sparse_num = round(total * (1 - self.sparse_ratio[idx])) - val = np.random.uniform(\ - low=self.low[idx], high=self.high[idx], size=sparse_num) + val = np.random.uniform(low=self.low[idx], high=self.high[idx], size=sparse_num) val = val.astype(self.dtype_map[self.dtype[idx]]) nums = np.arange(sparse_num) indices = [] - dim_shape = [reduce(lambda x, y: x*y, shape[i:])/shape[i] \ - for i in range(len(shape))] + dim_shape = [reduce(lambda x, y: x * y, shape[i:]) / shape[i] for i in range(len(shape))] for num in nums: indice = [] for item in dim_shape: - indice.append(num//item) + indice.append(num // item) num = num - indice[-1] * item if num - indice[-1] * item > 0 else num indices.append(indice) if self.label_dim > 0: - shift_idx = self.input_dim + idx - tensor = np.random.uniform(low=self.low[shift_idx], - high=self.high[shift_idx], - size=self.label_shape[idx]) + shift_idx = self.input_dim + idx + tensor = np.random.uniform( + low=self.low[shift_idx], high=self.high[shift_idx], size=self.label_shape[idx] + ) tensor = tensor.astype(self.dtype_map[self.dtype[shift_idx]]) input_data.append([(np.array(indices), val), tensor]) else: input_data.append((np.array(indices), val)) yield input_data - + def __len__(self): """Return the length of dataset.""" return sys.maxsize diff --git a/neural_compressor/experimental/data/datasets/imagenet_dataset.py b/neural_compressor/experimental/data/datasets/imagenet_dataset.py index 8d5c52ee528..3a47e4e9506 100644 --- a/neural_compressor/experimental/data/datasets/imagenet_dataset.py +++ b/neural_compressor/experimental/data/datasets/imagenet_dataset.py @@ -33,26 +33,35 @@ import os import re + import numpy as np from PIL import Image -from neural_compressor.utils.utility import LazyImport + from neural_compressor.utils import logger -from .dataset import dataset_registry, IterableDataset, Dataset -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -torch = LazyImport('torch') +from neural_compressor.utils.utility import LazyImport + +from .dataset import Dataset, IterableDataset, dataset_registry -@dataset_registry(dataset_type="ImagenetRaw", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +torch = LazyImport("torch") + + +@dataset_registry( + dataset_type="ImagenetRaw", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class ImagenetRaw(Dataset): """Configuration for ImageNet raw dataset. - Please arrange data in this way: - data_path/img1.jpg - data_path/img2.jpg - ... - data_path/imgx.jpg - dataset will read name and label of each image from image_list file, + Please arrange data in this way: + data_path/img1.jpg + data_path/img2.jpg + ... + data_path/imgx.jpg + dataset will read name and label of each image from image_list file, if user set image_list to None, it will read from data_path/val_map.txt automatically. """ @@ -74,7 +83,7 @@ def __init__(self, data_path, image_list, transform=None, filter=None): # by default look for val.txt image_list = os.path.join(data_path, "val.txt") - with open(image_list, 'r') as f: + with open(image_list, "r") as f: for s in f: image_name, label = re.split(r"\s+", s.strip()) src = os.path.join(data_path, image_name) @@ -94,7 +103,7 @@ def __getitem__(self, index): """Return the item of dataset according to the given index.""" image_path, label = self.image_list[index], self.label_list[index] with Image.open(image_path) as image: - image = np.array(image.convert('RGB')) + image = np.array(image.convert("RGB")) if self.transform is not None: image, label = self.transform((image, label)) return (image, label) @@ -103,7 +112,8 @@ def __len__(self): """Return the length of dataset.""" return len(self.image_list) -@dataset_registry(dataset_type="ImagenetRaw", framework="pytorch", dataset_format='') + +@dataset_registry(dataset_type="ImagenetRaw", framework="pytorch", dataset_format="") class PytorchImagenetRaw(ImagenetRaw): """Dataset for ImageNet data generation on pytorch backend.""" @@ -111,13 +121,14 @@ def __getitem__(self, index): """Return the item of dataset according to the given index.""" image_path, label = self.image_list[index], self.label_list[index] with Image.open(image_path) as image: - image = image.convert('RGB') + image = image.convert("RGB") if self.transform is not None: image, label = self.transform((image, label)) image = np.array(image) return (image, label) -@dataset_registry(dataset_type="ImagenetRaw", framework="mxnet", dataset_format='') + +@dataset_registry(dataset_type="ImagenetRaw", framework="mxnet", dataset_format="") class MXNetImagenetRaw(ImagenetRaw): """Dataset for ImageNet data generation on mxnet backend.""" @@ -129,8 +140,13 @@ def __getitem__(self, index): image, label = self.transform((image, label)) return (image, label) -@dataset_registry(dataset_type="ImagenetRaw", framework="tensorflow, \ - tensorflow_itex", dataset_format='') + +@dataset_registry( + dataset_type="ImagenetRaw", + framework="tensorflow, \ + tensorflow_itex", + dataset_format="", +) class TensorflowImagenetRaw(ImagenetRaw): """Dataset for ImageNet data generation on tensorflow/inteltensorflow/tensorflow_itex backend.""" @@ -138,39 +154,39 @@ def __getitem__(self, index): """Return the item of dataset according to the given index.""" image_path, label = self.image_list[index], self.label_list[index] with Image.open(image_path) as image: - image = np.array(image.convert('RGB')) + image = np.array(image.convert("RGB")) if self.transform is not None: image, label = self.transform((image, label)) - if type(image).__name__ == 'Tensor': + if type(image).__name__ == "Tensor": with tf.compat.v1.Session() as sess: image = sess.run(image) - elif type(image).__name__ == 'EagerTensor': + elif type(image).__name__ == "EagerTensor": image = image.numpy() return (image, label) -@dataset_registry(dataset_type="Imagenet", framework="tensorflow", dataset_format='') + +@dataset_registry(dataset_type="Imagenet", framework="tensorflow", dataset_format="") class TensorflowImagenetDataset(IterableDataset): """Configuration for Imagenet dataset.""" - def __new__(cls, root, subset='validation', num_cores=28, transform=None, filter=None): + def __new__(cls, root, subset="validation", num_cores=28, transform=None, filter=None): """New a imagenet dataset for tensorflow.""" - assert subset in ('validation', 'train'), \ - 'only support subset (validation, train)' - logger.warning("This api is going to be deprecated, " - "please use ImageRecord instead.") + assert subset in ("validation", "train"), "only support subset (validation, train)" + logger.warning("This api is going to be deprecated, " "please use ImageRecord instead.") from tensorflow.python.platform import gfile - glob_pattern = os.path.join(root, '%s-*-of-*' % subset) + + glob_pattern = os.path.join(root, "%s-*-of-*" % subset) file_names = gfile.Glob(glob_pattern) if not file_names: - raise ValueError('Found no files in --root matching: {}'.format(glob_pattern)) + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) from tensorflow.python.data.experimental import parallel_interleave + from neural_compressor.data.transforms.imagenet_transform import ParseDecodeImagenet + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) - ds = ds.apply( - parallel_interleave( - tf.data.TFRecordDataset, cycle_length=num_cores)) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=num_cores)) if transform is not None: transform.transform_list.insert(0, ParseDecodeImagenet()) @@ -181,26 +197,32 @@ def __new__(cls, root, subset='validation', num_cores=28, transform=None, filter ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned return ds -@dataset_registry(dataset_type="Imagenet", framework="onnxrt_qlinearops, \ - onnxrt_integerops", dataset_format='') + +@dataset_registry( + dataset_type="Imagenet", + framework="onnxrt_qlinearops, \ + onnxrt_integerops", + dataset_format="", +) class ONNXRTImagenetDataset(Dataset): """Configuration for Imagenet dataset.""" - def __init__(self, root, subset='val', num_cores=28, transform=None, filter=None): + def __init__(self, root, subset="val", num_cores=28, transform=None, filter=None): """Initialize `ONNXRTImagenetDataset` class.""" self.val_dir = os.path.join(root, subset) - assert os.path.exists(self.val_dir), "find no val dir in {}".format(root) + \ - "please make sure there are train/val subfolders" + assert os.path.exists(self.val_dir), ( + "find no val dir in {}".format(root) + "please make sure there are train/val subfolders" + ) import glob - logger.warning("This api is going to be deprecated, " - "please use ImageRecord instead.") + + logger.warning("This api is going to be deprecated, " "please use ImageRecord instead.") self.transform = transform self.image_list = [] - files = glob.glob(os.path.join(self.val_dir, '*')) + files = glob.glob(os.path.join(self.val_dir, "*")) files.sort() for idx, file in enumerate(files): - imgs = glob.glob(os.path.join(file, '*')) + imgs = glob.glob(os.path.join(file, "*")) for img in imgs: self.image_list.append((img, idx)) @@ -211,9 +233,9 @@ def __len__(self): def __getitem__(self, index): """Return the item of dataset according to the given index.""" from PIL import Image + sample = self.image_list[index] image = Image.open(sample[0]) if self.transform is not None: image, label = self.transform((image, sample[1])) return (image, label) - diff --git a/neural_compressor/experimental/data/datasets/style_transfer_dataset.py b/neural_compressor/experimental/data/datasets/style_transfer_dataset.py index 0bbd5092178..7c665190a01 100644 --- a/neural_compressor/experimental/data/datasets/style_transfer_dataset.py +++ b/neural_compressor/experimental/data/datasets/style_transfer_dataset.py @@ -17,14 +17,20 @@ # ============================================================================== """Dataset used for style transfer task on multiple framework backends.""" +import glob import os + import numpy as np -import glob -from .dataset import dataset_registry, Dataset +from .dataset import Dataset, dataset_registry -@dataset_registry(dataset_type="style_transfer", framework="tensorflow, \ - tensorflow_itex", dataset_format='') + +@dataset_registry( + dataset_type="style_transfer", + framework="tensorflow, \ + tensorflow_itex", + dataset_format="", +) class StyleTransferDataset(Dataset): """Dataset used for style transfer task on tensorflow/inteltensorflow/tensorflow_itex backend. @@ -32,8 +38,16 @@ class StyleTransferDataset(Dataset): content image folder and style image folder. """ - def __init__(self, content_folder, style_folder, crop_ratio=0.1, - resize_shape=(256, 256), image_format='jpg', transform=None, filter=None): + def __init__( + self, + content_folder, + style_folder, + crop_ratio=0.1, + resize_shape=(256, 256), + image_format="jpg", + transform=None, + filter=None, + ): """Initialize `StyleTransferDataset` class. Args: @@ -50,8 +64,8 @@ def __init__(self, content_folder, style_folder, crop_ratio=0.1, self.style_folder = style_folder self.resize_shape = resize_shape self.crop_ratio = crop_ratio - self.content_images = glob.glob(os.path.join(content_folder, '*' + image_format)) - self.style_images = glob.glob(os.path.join(style_folder, '*' + image_format)) + self.content_images = glob.glob(os.path.join(content_folder, "*" + image_format)) + self.style_images = glob.glob(os.path.join(style_folder, "*" + image_format)) self.image_list = [] for content in self.content_images: for style in self.style_images: @@ -64,21 +78,18 @@ def __len__(self): def __getitem__(self, index): """Return the item of dataset according to the given index.""" from PIL import Image + content_image, style_image = self.image_list[index] content_image = Image.open(content_image) style_image = Image.open(style_image) width, height = style_image.size crop_ratio = self.crop_ratio - crop_box = ( - crop_ratio * height, - crop_ratio * width, - (1 - crop_ratio) * height, - (1 - crop_ratio) * width) + crop_box = (crop_ratio * height, crop_ratio * width, (1 - crop_ratio) * height, (1 - crop_ratio) * width) content_image = np.asarray(content_image.resize(self.resize_shape)) style_image = np.asarray(style_image.resize(self.resize_shape)) if content_image.max() > 1.0: - content_image = content_image / 255. + content_image = content_image / 255.0 if style_image.max() > 1.0: - style_image = style_image / 255. + style_image = style_image / 255.0 return (content_image, style_image), 0 diff --git a/neural_compressor/experimental/data/filters/__init__.py b/neural_compressor/experimental/data/filters/__init__.py index 6ec13cf416f..760ac6e8927 100644 --- a/neural_compressor/experimental/data/filters/__init__.py +++ b/neural_compressor/experimental/data/filters/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in filter.""" from .filter import FILTERS, Filter, filter_registry @@ -24,7 +23,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) diff --git a/neural_compressor/experimental/data/filters/coco_filter.py b/neural_compressor/experimental/data/filters/coco_filter.py index edf757a4111..6172108918c 100644 --- a/neural_compressor/experimental/data/filters/coco_filter.py +++ b/neural_compressor/experimental/data/filters/coco_filter.py @@ -14,12 +14,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in COCO filter.""" from neural_compressor.utils.utility import LazyImport + from .filter import Filter, filter_registry -tf = LazyImport('tensorflow') + +tf = LazyImport("tensorflow") @filter_registry(filter_type="LabelBalanceCOCORecord", framework="tensorflow, tensorflow_itex") @@ -40,8 +41,11 @@ def __call__(self, image, label): return tf.math.equal(len(label[0]), self.size) -@filter_registry(filter_type="LabelBalanceCOCORaw", framework="tensorflow, \ - tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") +@filter_registry( + filter_type="LabelBalanceCOCORaw", + framework="tensorflow, \ + tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops", +) class LabelBalanceCOCORawFilter(Filter): """The label balance filter for COCO raw data.""" @@ -57,4 +61,3 @@ def __call__(self, image, label): label: label of a sample. """ return len(label) == self.size - diff --git a/neural_compressor/experimental/data/filters/filter.py b/neural_compressor/experimental/data/filters/filter.py index 851bf382264..f86b24259ac 100644 --- a/neural_compressor/experimental/data/filters/filter.py +++ b/neural_compressor/experimental/data/filters/filter.py @@ -14,10 +14,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The base filter class for all frameworks.""" from abc import abstractmethod + from neural_compressor.utils.utility import singleton @@ -78,28 +78,31 @@ def __init__(self): PYTORCH_FILTERS = {} MXNET_FILTERS = {} -framework_filters = {"tensorflow": TensorflowFilters, - "tensorflow_itex": TensorflowFilters, - "pytorch": PyTorchFilters, - "pytorch_ipex": PyTorchFilters, - "pytorch_fx": PyTorchFilters, - "mxnet": MXNetFilters, - "onnxrt_qlinearops": ONNXRTQLFilters, - "onnxrt_qdq": ONNXRTQLFilters, - "onnxruntime": ONNXRTQLFilters, - "onnxrt_integerops": ONNXRTITFilters, - } - -registry_filters = {"tensorflow": TENSORFLOW_FILTERS, - "tensorflow_itex": TENSORFLOW_ITEX_FILTERS, - "pytorch": PYTORCH_FILTERS, - "pytorch_ipex": PYTORCH_FILTERS, - "pytorch_fx": PYTORCH_FILTERS, - "mxnet": MXNET_FILTERS, - "onnxrt_integerops": ONNXRT_IT_FILTERS, - "onnxrt_qdq": ONNXRT_QL_FILTERS, - "onnxruntime": ONNXRT_QL_FILTERS, - "onnxrt_qlinearops": ONNXRT_QL_FILTERS} +framework_filters = { + "tensorflow": TensorflowFilters, + "tensorflow_itex": TensorflowFilters, + "pytorch": PyTorchFilters, + "pytorch_ipex": PyTorchFilters, + "pytorch_fx": PyTorchFilters, + "mxnet": MXNetFilters, + "onnxrt_qlinearops": ONNXRTQLFilters, + "onnxrt_qdq": ONNXRTQLFilters, + "onnxruntime": ONNXRTQLFilters, + "onnxrt_integerops": ONNXRTITFilters, +} + +registry_filters = { + "tensorflow": TENSORFLOW_FILTERS, + "tensorflow_itex": TENSORFLOW_ITEX_FILTERS, + "pytorch": PYTORCH_FILTERS, + "pytorch_ipex": PYTORCH_FILTERS, + "pytorch_fx": PYTORCH_FILTERS, + "mxnet": MXNET_FILTERS, + "onnxrt_integerops": ONNXRT_IT_FILTERS, + "onnxrt_qdq": ONNXRT_QL_FILTERS, + "onnxruntime": ONNXRT_QL_FILTERS, + "onnxrt_qlinearops": ONNXRT_QL_FILTERS, +} class FILTERS(object): @@ -114,10 +117,19 @@ class FILTERS(object): def __init__(self, framework): """Initialize the attribute of class.""" - assert framework in ["tensorflow", "tensorflow_itex", "keras", - "mxnet", "onnxrt_qdq", "pytorch", "pytorch_ipex", "pytorch_fx", - "onnxrt_integerops", "onnxrt_qlinearops", "onnxruntime"], \ - "framework support tensorflow pytorch mxnet onnxrt" + assert framework in [ + "tensorflow", + "tensorflow_itex", + "keras", + "mxnet", + "onnxrt_qdq", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxrt_integerops", + "onnxrt_qlinearops", + "onnxruntime", + ], "framework support tensorflow pytorch mxnet onnxrt" self.filters = framework_filters[framework]().filters self.framework = framework @@ -126,8 +138,7 @@ def __getitem__(self, filter_type): x[i] is roughly equivalent to type(x).__getitem__(x, index) """ - assert filter_type in self.filters.keys(), "filter support {}".\ - format(self.filters.keys()) + assert filter_type in self.filters.keys(), "filter support {}".format(self.filters.keys()) return self.filters[filter_type] @@ -142,11 +153,12 @@ def filter_registry(filter_type, framework): Returns: cls: The class of register. """ + def decorator_transform(cls): """Decorate a class.""" - for single_framework in [fwk.strip() for fwk in framework.split(',')]: + for single_framework in [fwk.strip() for fwk in framework.split(",")]: assert single_framework in [ - "tensorflow", + "tensorflow", "tensorflow_itex", "pytorch", "pytorch_ipex", @@ -155,12 +167,13 @@ def decorator_transform(cls): "onnxrt_integerops", "onnxrt_qdq", "onnxrt_qlinearops", - "onnxruntime" + "onnxruntime", ], "The framework support tensorflow mxnet pytorch onnxrt" if filter_type in registry_filters[single_framework].keys(): - raise ValueError('Cannot have two transforms with the same name') + raise ValueError("Cannot have two transforms with the same name") registry_filters[single_framework][filter_type] = cls return cls + return decorator_transform @@ -168,7 +181,6 @@ class Filter(object): """The base class for transform. __call__ method is needed when write user specific transform. - """ @abstractmethod diff --git a/neural_compressor/experimental/data/transforms/__init__.py b/neural_compressor/experimental/data/transforms/__init__.py index 7f57107ab6f..3154633584f 100644 --- a/neural_compressor/experimental/data/transforms/__init__.py +++ b/neural_compressor/experimental/data/transforms/__init__.py @@ -24,7 +24,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) diff --git a/neural_compressor/experimental/data/transforms/imagenet_transform.py b/neural_compressor/experimental/data/transforms/imagenet_transform.py index 5afe6b24c06..c5a1ca25eb4 100644 --- a/neural_compressor/experimental/data/transforms/imagenet_transform.py +++ b/neural_compressor/experimental/data/transforms/imagenet_transform.py @@ -32,14 +32,17 @@ """Neural Compressor built-in imagenet transforms.""" import numpy as np -from neural_compressor.utils.utility import LazyImport + from neural_compressor.utils import logger -from .transform import transform_registry, BaseTransform -tf = LazyImport('tensorflow') -cv2 = LazyImport('cv2') +from neural_compressor.utils.utility import LazyImport + +from .transform import BaseTransform, transform_registry + +tf = LazyImport("tensorflow") +cv2 = LazyImport("cv2") -@transform_registry(transform_type="QuantizedInput", \ - process="preprocess", framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="QuantizedInput", process="preprocess", framework="tensorflow, tensorflow_itex") class QuantizedInput(BaseTransform): """Convert the dtype of input to quantize it. @@ -53,9 +56,8 @@ class QuantizedInput(BaseTransform): def __init__(self, dtype, scale=None): """Initialize `QuantizedInput` class.""" - self.dtype_map = {'uint8': tf.uint8, 'int8': tf.int8} - assert dtype in self.dtype_map.keys(), \ - 'only support cast dtype {}'.format(self.dtype_map.keys()) + self.dtype_map = {"uint8": tf.uint8, "int8": tf.int8} + assert dtype in self.dtype_map.keys(), "only support cast dtype {}".format(self.dtype_map.keys()) self.dtype = dtype self.scale = scale @@ -63,18 +65,22 @@ def __call__(self, sample): """Convert the dtype of input.""" # scale is not know when tuning, in this case this transform # do nothing, it's only used when scale is set - if self.scale == None: + if self.scale is None: return sample image, label = sample image = image * self.scale - if self.dtype == 'uint8': + if self.dtype == "uint8": image = image + 128 image = tf.dtypes.cast(image, dtype=self.dtype_map[self.dtype]) return image, label -@transform_registry(transform_type="LabelShift", \ - process="postprocess", framework="pytorch, tensorflow, tensorflow_itex,\ - onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="LabelShift", + process="postprocess", + framework="pytorch, tensorflow, tensorflow_itex,\ + onnxrt_qlinearops, onnxrt_integerops", +) class LabelShift(BaseTransform): """Convert label to label - label_shift. @@ -106,7 +112,8 @@ def __call__(self, sample): labels = np.array(labels) - self.label_shift return images, labels -class ParseDecodeImagenet(): + +class ParseDecodeImagenet: """Parse features in Example proto. Returns: @@ -117,41 +124,49 @@ def __call__(self, sample): """Parse features in example.""" # Dense features in Example proto. feature_map = { - 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), - 'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1)} + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) features = tf.io.parse_single_example(serialized=sample, features=feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - image = features['image/encoded'] - image = tf.image.decode_jpeg( - image, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") return (image, label) -@transform_registry(transform_type="ParseDecodeImagenet", \ - process="preprocess", framework="tensorflow") + +@transform_registry(transform_type="ParseDecodeImagenet", process="preprocess", framework="tensorflow") class ParseDecodeImagenetTransform(BaseTransform): """Imagenet decoding will be performed automatically from Neural Compressor v1.4. Returns: sample """ - + def __call__(self, sample): """Convert `ParseDecodeImagenetTransform` feature.""" - logger.warning("This transform is going to be deprecated, " \ - "imagenet decoding will be performed automatically from Neural Compressor v1.4.") + logger.warning( + "This transform is going to be deprecated, " + "imagenet decoding will be performed automatically from Neural Compressor v1.4." + ) return sample -@transform_registry(transform_type="ResizeCropImagenet", \ - process="preprocess", framework="tensorflow") + +@transform_registry(transform_type="ResizeCropImagenet", process="preprocess", framework="tensorflow") class TensorflowResizeCropImagenetTransform(BaseTransform): """Combination of a series of transforms which is applicable to images in Imagenet. @@ -168,10 +183,19 @@ class TensorflowResizeCropImagenetTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, height, width, random_crop=False, resize_side=256, \ - resize_method='bilinear', random_flip_left_right=False, \ - mean_value=[0.0,0.0,0.0], scale=1.0, \ - data_format='channels_last', subpixels='RGB'): + def __init__( + self, + height, + width, + random_crop=False, + resize_side=256, + resize_method="bilinear", + random_flip_left_right=False, + mean_value=[0.0, 0.0, 0.0], + scale=1.0, + data_format="channels_last", + subpixels="RGB", + ): """Initialize `TensorflowResizeCropImagenetTransform` class.""" self.height = height self.width = width @@ -189,37 +213,44 @@ def __call__(self, sample): """Convert `TensorflowResizeCropImagenetTransform` feature.""" image, label = sample shape = tf.shape(input=image) - - height = tf.cast(shape[0], dtype=tf.float32) \ - if self.data_format=="channels_last" else tf.cast(shape[1], dtype=tf.float32) - width = tf.cast(shape[1], dtype=tf.float32) \ - if self.data_format=="channels_last" else tf.cast(shape[2], dtype=tf.float32) - scale = tf.cond(pred=tf.greater(height, width), \ - true_fn=lambda: self.resize_side / width, - false_fn=lambda: self.resize_side / height,) + + height = ( + tf.cast(shape[0], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[1], dtype=tf.float32) + ) + width = ( + tf.cast(shape[1], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[2], dtype=tf.float32) + ) + scale = tf.cond( + pred=tf.greater(height, width), + true_fn=lambda: self.resize_side / width, + false_fn=lambda: self.resize_side / height, + ) scale = tf.cast(scale, dtype=tf.float32) - new_height = tf.cast(tf.math.rint(height*scale), dtype=tf.int32) - new_width = tf.cast(tf.math.rint(width*scale), dtype=tf.int32) + new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32) + new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32) - if self.subpixels=='BGR' and self.data_format=='channels_first': + if self.subpixels == "BGR" and self.data_format == "channels_first": # 'RGB'->'BGR' - image = tf.cond(tf.equal(tf.rank(image), 3), - lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), - lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1)) - elif self.subpixels=='BGR': + image = tf.cond( + tf.equal(tf.rank(image), 3), + lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), + lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1), + ) + elif self.subpixels == "BGR": # 'RGB'->'BGR' image = image[..., ::-1] image = tf.expand_dims(image, 0) - image = tf.image.resize(image, [new_height, new_width], - method=self.resize_method) - image = tf.squeeze(image) + image = tf.image.resize(image, [new_height, new_width], method=self.resize_method) + image = tf.squeeze(image) shape = tf.shape(input=image) if self.random_crop: - y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height +1), - dtype=tf.dtypes.int32) - x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width +1), - dtype=tf.dtypes.int32) + y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height + 1), dtype=tf.dtypes.int32) + x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width + 1), dtype=tf.dtypes.int32) else: y0 = (shape[0] - self.height) // 2 x0 = (shape[1] - self.width) // 2 @@ -232,14 +263,14 @@ def __call__(self, sample): image = (image - means) * self.scale return (image, label) -@transform_registry(transform_type="BilinearImagenet", \ - process="preprocess", framework="tensorflow") + +@transform_registry(transform_type="BilinearImagenet", process="preprocess", framework="tensorflow") class BilinearImagenetTransform(BaseTransform): """Combination of a series of transforms which is applicable to images in Imagenet. Args: height: Height of the result - width:Width of the result + width:Width of the result central_fraction(float, default=0.875):fraction of size to crop mean_value(list, default=[0.0,0.0,0.0]):means for each channel scale(float, default=1.0):std value @@ -248,8 +279,7 @@ class BilinearImagenetTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, height, width, central_fraction=0.875, - mean_value=[0.0,0.0,0.0], scale=1.0): + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): """Initialize `BilinearImagenetTransform` class.""" self.height = height self.width = width @@ -270,8 +300,7 @@ def __call__(self, sample): if self.height and self.width: # Resize the image to the specified height and width. image = tf.expand_dims(image, 0) - image = tf.image.resize(image, [self.height, self.width], \ - method=tf.image.ResizeMethod.BILINEAR) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) image = tf.squeeze(image, [0]) image = tf.subtract(image, 0.5) @@ -280,14 +309,16 @@ def __call__(self, sample): image = (image - means) * self.scale return (image, label) -@transform_registry(transform_type="BilinearImagenet", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="BilinearImagenet", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class OnnxBilinearImagenetTransform(BaseTransform): """Combination of a series of transforms which is applicable to images in Imagenet. Args: height: Height of the result - width:Width of the result + width:Width of the result central_fraction(float, default=0.875):fraction of size to crop mean_value(list, default=[0.0,0.0,0.0]):means for each channel scale(float, default=1.0):std value @@ -296,8 +327,7 @@ class OnnxBilinearImagenetTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, height, width, central_fraction=0.875, - mean_value=[0.0,0.0,0.0], scale=1.0): + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): """Initialize `OnnxBilinearImagenetTransform` class.""" self.height = height self.width = width @@ -309,7 +339,7 @@ def __call__(self, sample): """Convert `OnnxBilinearImagenetTransform` feature.""" image, label = sample if isinstance(image, np.ndarray): - image = image.astype('float32') / 255. + image = image.astype("float32") / 255.0 img_shape = image.shape depth = img_shape[2] img_hd = float(img_shape[0]) @@ -320,11 +350,11 @@ def __call__(self, sample): bbox_h_size = img_shape[0] - bbox_h_start * 2 bbox_w_size = img_shape[1] - bbox_w_start * 2 - image = image[bbox_h_start:bbox_h_start+bbox_h_size, bbox_w_start:bbox_w_start+bbox_w_size] + image = image[bbox_h_start : bbox_h_start + bbox_h_size, bbox_w_start : bbox_w_start + bbox_w_size] if self.height and self.width: image = cv2.resize(image, (self.width, self.height), interpolation=cv2.INTER_LINEAR) - + image = np.subtract(image, 0.5) image = np.multiply(image, 2.0) means = np.broadcast_to(self.mean_value, image.shape) @@ -332,14 +362,16 @@ def __call__(self, sample): image = image.astype(np.float32) return (image, label) -@transform_registry(transform_type="ResizeCropImagenet", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="ResizeCropImagenet", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class ONNXResizeCropImagenetTransform(BaseTransform): """Combination of a series of transforms which is applicable to images in Imagenet. Args: height: Height of the result - width:Width of the result + width:Width of the result central_fraction(float, default=0.875):fraction of size to crop mean_value(list, default=[0.0,0.0,0.0]):means for each channel scale(float, default=1.0):std value @@ -348,9 +380,18 @@ class ONNXResizeCropImagenetTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, height, width, random_crop=False, resize_side=256, \ - mean_value=[0.0,0.0,0.0], std_value=[0.229, 0.224, 0.225], \ - resize_method='bilinear', data_format='channels_last', subpixels='RGB'): + def __init__( + self, + height, + width, + random_crop=False, + resize_side=256, + mean_value=[0.0, 0.0, 0.0], + std_value=[0.229, 0.224, 0.225], + resize_method="bilinear", + data_format="channels_last", + subpixels="RGB", + ): """Initialize `ONNXResizeCropImagenetTransform` class.""" self.height = height self.width = width @@ -369,14 +410,14 @@ def __call__(self, sample): image, label = sample height, width = image.shape[0], image.shape[1] scale = self.resize_side / width if height > width else self.resize_side / height - new_height = int(height*scale) - new_width = int(width*scale) + new_height = int(height * scale) + new_width = int(width * scale) image = cv2.resize(image, (new_height, new_width)) - image = image / 255. + image = image / 255.0 shape = image.shape if self.random_crop: - y0 = np.random.randint(low=0, high=(shape[0] - self.height +1)) - x0 = np.random.randint(low=0, high=(shape[1] - self.width +1)) + y0 = np.random.randint(low=0, high=(shape[0] - self.height + 1)) + x0 = np.random.randint(low=0, high=(shape[1] - self.width + 1)) else: y0 = (shape[0] - self.height) // 2 x0 = (shape[1] - self.width) // 2 @@ -384,12 +425,14 @@ def __call__(self, sample): image = np.array([image]) image = np.repeat(image, 3, axis=0) image = image.transpose(1, 2, 0) - image = image[y0:y0+self.height, x0:x0+self.width, :] - image = ((image - self.mean_value)/self.std_value).astype(np.float32) + image = image[y0 : y0 + self.height, x0 : x0 + self.width, :] + image = ((image - self.mean_value) / self.std_value).astype(np.float32) return (image.transpose(2, 0, 1), label) -@transform_registry(transform_type="ResizeWithAspectRatio", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="ResizeWithAspectRatio", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class ResizeWithAspectRatio(BaseTransform): """Resize the image with aspect ratio. @@ -409,8 +452,8 @@ def __call__(self, sample): (img, label) = sample assert len(img.shape) == 3 height, width, _ = img.shape - new_height = int(100. * self.height / self.scale) - new_width = int(100. * self.width / self.scale) + new_height = int(100.0 * self.height / self.scale) + new_width = int(100.0 * self.width / self.scale) if height > width: w = new_width h = int(new_height * height / width) diff --git a/neural_compressor/experimental/data/transforms/tokenization.py b/neural_compressor/experimental/data/transforms/tokenization.py index b16160800bc..53814ad5bdc 100644 --- a/neural_compressor/experimental/data/transforms/tokenization.py +++ b/neural_compressor/experimental/data/transforms/tokenization.py @@ -32,17 +32,20 @@ # ============================================================================== """Tokenization helper classes.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from neural_compressor.utils.utility import LazyImport +from __future__ import absolute_import, division, print_function + import collections import re import unicodedata + import six -tf = LazyImport('tensorflow') -def convert_to_unicode(text): # pragma: no cover +from neural_compressor.utils.utility import LazyImport + +tf = LazyImport("tensorflow") + + +def convert_to_unicode(text): # pragma: no cover """Convert `text` to Unicode (if it's not already), assuming utf-8 input.""" if six.PY3: if isinstance(text, str): @@ -54,13 +57,14 @@ def convert_to_unicode(text): # pragma: no cover elif six.PY2: if isinstance(text, str): return text.decode("utf-8", "ignore") - elif isinstance(text, unicode): # pylint: disable=undefined-variable # noqa: F821 + elif isinstance(text, unicode): # pylint: disable=undefined-variable # noqa: F821 return text else: raise ValueError("Unsupported string type: %s" % (type(text))) else: raise ValueError("Not running on Python2 or Python 3?") + def load_vocab(vocab_file): """Load a vocabulary file into a dictionary.""" vocab = collections.OrderedDict() @@ -75,6 +79,7 @@ def load_vocab(vocab_file): index += 1 return vocab + def convert_by_vocab(vocab, items): """Convert a sequence of [tokens|ids] using the vocab.""" output = [] @@ -82,6 +87,7 @@ def convert_by_vocab(vocab, items): output.append(vocab[item]) return output + def whitespace_tokenize(text): """Run basic whitespace cleaning and splitting on a piece of text.""" text = text.strip() @@ -195,7 +201,7 @@ def _tokenize_chinese_chars(self, text): output = [] for char in text: cp = ord(char) - if self._is_chinese_char(cp): # pragma: no cover + if self._is_chinese_char(cp): # pragma: no cover output.append(" ") output.append(char) output.append(" ") @@ -213,14 +219,16 @@ def _is_chinese_char(self, cp): # as is Japanese Hiragana and Katakana. Those alphabets are used to write # space-separated words, so they are not treated specially and handled # like the all of the other languages. - if ((cp >= 0x4E00 and cp <= 0x9FFF) or # - (cp >= 0x3400 and cp <= 0x4DBF) or # - (cp >= 0x20000 and cp <= 0x2A6DF) or # - (cp >= 0x2A700 and cp <= 0x2B73F) or # - (cp >= 0x2B740 and cp <= 0x2B81F) or # - (cp >= 0x2B820 and cp <= 0x2CEAF) or - (cp >= 0xF900 and cp <= 0xFAFF) or # - (cp >= 0x2F800 and cp <= 0x2FA1F)): # + if ( + (cp >= 0x4E00 and cp <= 0x9FFF) + or (cp >= 0x3400 and cp <= 0x4DBF) # + or (cp >= 0x20000 and cp <= 0x2A6DF) # + or (cp >= 0x2A700 and cp <= 0x2B73F) # + or (cp >= 0x2B740 and cp <= 0x2B81F) # + or (cp >= 0x2B820 and cp <= 0x2CEAF) # + or (cp >= 0xF900 and cp <= 0xFAFF) + or (cp >= 0x2F800 and cp <= 0x2FA1F) # + ): # return True return False @@ -230,7 +238,7 @@ def _clean_text(self, text): output = [] for char in text: cp = ord(char) - if cp == 0 or cp == 0xfffd or _is_control(char): + if cp == 0 or cp == 0xFFFD or _is_control(char): continue if _is_whitespace(char): output.append(" ") @@ -274,7 +282,7 @@ def tokenize(self, text): output_tokens = [] for token in whitespace_tokenize(text): chars = list(token) - if len(chars) > self.max_input_chars_per_word: # pragma: no cover + if len(chars) > self.max_input_chars_per_word: # pragma: no cover output_tokens.append(self.unk_token) continue @@ -304,6 +312,7 @@ def tokenize(self, text): output_tokens.extend(sub_tokens) return output_tokens + def _is_whitespace(char): """Check whether `chars` is a whitespace character.""" # \t, \n, and \r are technically contorl characters but we treat them @@ -311,11 +320,12 @@ def _is_whitespace(char): if char == " " or char == "\t" or char == "\n" or char == "\r": return True cat = unicodedata.category(char) - if cat == "Zs": # pragma: no cover + if cat == "Zs": # pragma: no cover return True return False -def _is_control(char): # pragma: no cover + +def _is_control(char): # pragma: no cover """Check whether `chars` is a control character.""" # These are technically control characters but we count them as whitespace # characters. @@ -326,15 +336,15 @@ def _is_control(char): # pragma: no cover return True return False -def _is_punctuation(char): # pragma: no cover + +def _is_punctuation(char): # pragma: no cover """Check whether `chars` is a punctuation character.""" cp = ord(char) # We treat all non-letter/number ASCII as punctuation. # Characters such as "^", "$", and "`" are not in the Unicode # Punctuation class but we treat them as punctuation anyways, for # consistency. - if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or - (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + if (cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126): return True cat = unicodedata.category(char) if cat.startswith("P"): diff --git a/neural_compressor/experimental/data/transforms/transform.py b/neural_compressor/experimental/data/transforms/transform.py index 474ea008aa3..2c795c4e4b8 100644 --- a/neural_compressor/experimental/data/transforms/transform.py +++ b/neural_compressor/experimental/data/transforms/transform.py @@ -17,17 +17,20 @@ # ============================================================================== """Neural Compressor built-in Transforms on multiple framework backends.""" -import numpy as np import collections from abc import abstractmethod -from neural_compressor.utils.utility import LazyImport, singleton + +import numpy as np + from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport, singleton + +torchvision = LazyImport("torchvision") +torch = LazyImport("torch") +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +cv2 = LazyImport("cv2") -torchvision = LazyImport('torchvision') -torch = LazyImport('torch') -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -cv2 = LazyImport('cv2') class Transforms(object): """INC supports built-in preprocessing, postprocessing and general methods on different framework backends. @@ -44,12 +47,14 @@ def __init__(self, process, concat_general=True): concat_general (Boolean): users can use general transform in both preprocess or postprocess if set True """ - transform_map = {"preprocess": self._get_preprocess, - "postprocess": self._get_postprocess, - "general": self._get_general, } + transform_map = { + "preprocess": self._get_preprocess, + "postprocess": self._get_postprocess, + "general": self._get_general, + } self.transforms = transform_map[process]() if concat_general: - self.transforms.update(transform_map['general']()) + self.transforms.update(transform_map["general"]()) @abstractmethod def _get_preprocess(self): @@ -115,14 +120,10 @@ def _get_preprocess(self): preprocess: a dict including all the registered preprocess methods """ preprocess = { - 'ToTensor': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.ToTensor), - 'CenterCrop': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.CenterCrop), - 'RandomHorizontalFlip': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.RandomFlipLeftRight), - 'RandomVerticalFlip': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.RandomFlipTopBottom), + "ToTensor": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.ToTensor), + "CenterCrop": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.CenterCrop), + "RandomHorizontalFlip": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.RandomFlipLeftRight), + "RandomVerticalFlip": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.RandomFlipTopBottom), } preprocess.update(MXNET_TRANSFORMS["preprocess"]) return preprocess @@ -144,9 +145,8 @@ def _get_general(self): general: a dict including all the registered general methods """ general = { - 'Compose': mx.gluon.data.vision.transforms.Compose, - 'Cast': PytorchMxnetWrapFunction( - mx.gluon.data.vision.transforms.Cast), + "Compose": mx.gluon.data.vision.transforms.Compose, + "Cast": PytorchMxnetWrapFunction(mx.gluon.data.vision.transforms.Cast), } general.update(MXNET_TRANSFORMS["general"]) return general @@ -162,22 +162,14 @@ def _get_preprocess(self): preprocess: a dict including all the registered preprocess methods """ preprocess = { - "ToTensor": PytorchMxnetWrapFunction( - torchvision.transforms.ToTensor), - "ToPILImage": PytorchMxnetWrapFunction( - torchvision.transforms.ToPILImage), - "CenterCrop": PytorchMxnetWrapFunction( - torchvision.transforms.CenterCrop), - "RandomCrop": PytorchMxnetWrapFunction( - torchvision.transforms.RandomCrop), - "RandomHorizontalFlip": PytorchMxnetWrapFunction( - torchvision.transforms.RandomHorizontalFlip), - "RandomVerticalFlip": PytorchMxnetWrapFunction( - torchvision.transforms.RandomVerticalFlip), - "Pad": PytorchMxnetWrapFunction( - torchvision.transforms.Pad), - "ColorJitter": PytorchMxnetWrapFunction( - torchvision.transforms.ColorJitter), + "ToTensor": PytorchMxnetWrapFunction(torchvision.transforms.ToTensor), + "ToPILImage": PytorchMxnetWrapFunction(torchvision.transforms.ToPILImage), + "CenterCrop": PytorchMxnetWrapFunction(torchvision.transforms.CenterCrop), + "RandomCrop": PytorchMxnetWrapFunction(torchvision.transforms.RandomCrop), + "RandomHorizontalFlip": PytorchMxnetWrapFunction(torchvision.transforms.RandomHorizontalFlip), + "RandomVerticalFlip": PytorchMxnetWrapFunction(torchvision.transforms.RandomVerticalFlip), + "Pad": PytorchMxnetWrapFunction(torchvision.transforms.Pad), + "ColorJitter": PytorchMxnetWrapFunction(torchvision.transforms.ColorJitter), } preprocess.update(PYTORCH_TRANSFORMS["preprocess"]) return preprocess @@ -204,6 +196,7 @@ def _get_general(self): general.update(PYTORCH_TRANSFORMS["general"]) return general + class ONNXRTQLTransforms(Transforms): """Onnxrt_qlinearops Transforms subclass.""" @@ -237,6 +230,7 @@ def _get_general(self): general.update(ONNXRT_QL_TRANSFORMS["general"]) return general + class ONNXRTITTransforms(Transforms): """Onnxrt_integerops Transforms subclass.""" @@ -271,16 +265,18 @@ def _get_general(self): return general -framework_transforms = {"tensorflow": TensorflowTransforms, - "tensorflow_itex": TensorflowTransforms, - "mxnet": MXNetTransforms, - "pytorch": PyTorchTransforms, - "pytorch_ipex": PyTorchTransforms, - "pytorch_fx": PyTorchTransforms, - "onnxrt_qlinearops": ONNXRTQLTransforms, - "onnxrt_integerops": ONNXRTITTransforms, - "onnxruntime": ONNXRTQLTransforms, - "onnxrt_qdq": ONNXRTQLTransforms} +framework_transforms = { + "tensorflow": TensorflowTransforms, + "tensorflow_itex": TensorflowTransforms, + "mxnet": MXNetTransforms, + "pytorch": PyTorchTransforms, + "pytorch_ipex": PyTorchTransforms, + "pytorch_fx": PyTorchTransforms, + "onnxrt_qlinearops": ONNXRTQLTransforms, + "onnxrt_integerops": ONNXRTITTransforms, + "onnxruntime": ONNXRTQLTransforms, + "onnxrt_qdq": ONNXRTQLTransforms, +} # transform registry will register transforms into these dicts TENSORFLOW_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} @@ -290,17 +286,19 @@ def _get_general(self): ONNXRT_QL_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} ONNXRT_IT_TRANSFORMS = {"preprocess": {}, "postprocess": {}, "general": {}} -registry_transforms = {"tensorflow": TENSORFLOW_TRANSFORMS, - "tensorflow_itex": TENSORFLOW_ITEX_TRANSFORMS, - "mxnet": MXNET_TRANSFORMS, - "pytorch": PYTORCH_TRANSFORMS, - "pytorch_ipex": PYTORCH_TRANSFORMS, - "pytorch_fx": PYTORCH_TRANSFORMS, - "onnxrt_qlinearops": ONNXRT_QL_TRANSFORMS, - "onnxrt_qdq": ONNXRT_QL_TRANSFORMS, - "onnxruntime": ONNXRT_QL_TRANSFORMS, - "onnxrt_integerops": ONNXRT_IT_TRANSFORMS, - } +registry_transforms = { + "tensorflow": TENSORFLOW_TRANSFORMS, + "tensorflow_itex": TENSORFLOW_ITEX_TRANSFORMS, + "mxnet": MXNET_TRANSFORMS, + "pytorch": PYTORCH_TRANSFORMS, + "pytorch_ipex": PYTORCH_TRANSFORMS, + "pytorch_fx": PYTORCH_TRANSFORMS, + "onnxrt_qlinearops": ONNXRT_QL_TRANSFORMS, + "onnxrt_qdq": ONNXRT_QL_TRANSFORMS, + "onnxruntime": ONNXRT_QL_TRANSFORMS, + "onnxrt_integerops": ONNXRT_IT_TRANSFORMS, +} + class TRANSFORMS(object): """Transforms collection class. @@ -316,12 +314,20 @@ def __init__(self, framework, process): framework (str): different framework type like tensorflow, pytorch and so on process (str): process type, the value can be preprocess, postprocess or general """ - assert framework in ("tensorflow", "tensorflow_itex", "keras", "onnxruntime", \ - "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", \ - "onnxrt_qlinearops", "onnxrt_integerops", "mxnet"), \ - "framework support tensorflow pytorch mxnet onnxrt" - assert process in ("preprocess", "postprocess", - "general"), "process support preprocess postprocess, general" + assert framework in ( + "tensorflow", + "tensorflow_itex", + "keras", + "onnxruntime", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxrt_qdq", + "onnxrt_qlinearops", + "onnxrt_integerops", + "mxnet", + ), "framework support tensorflow pytorch mxnet onnxrt" + assert process in ("preprocess", "postprocess", "general"), "process support preprocess postprocess, general" self.transforms = framework_transforms[framework](process).transforms self.framework = framework self.process = process @@ -335,8 +341,7 @@ def __getitem__(self, transform_type): Returns: Transforms: the registered Transforms """ - assert transform_type in self.transforms.keys(), "transform support {}".\ - format(self.transforms.keys()) + assert transform_type in self.transforms.keys(), "transform support {}".format(self.transforms.keys()) return self.transforms[transform_type] def register(self, name, transform_cls): @@ -346,8 +351,9 @@ def register(self, name, transform_cls): name (str): process name transform_cls (class): process function wrapper class """ - assert name not in registry_transforms[self.framework][self.process].keys(), \ - 'register transform name already exists.' + assert ( + name not in registry_transforms[self.framework][self.process].keys() + ), "register transform name already exists." registry_transforms[self.framework][self.process].update({name: transform_cls}) @@ -363,8 +369,9 @@ def transform_registry(transform_type, process, framework): Returns: cls: The class of register. """ + def decorator_transform(cls): - for single_framework in [fwk.strip() for fwk in framework.split(',')]: + for single_framework in [fwk.strip() for fwk in framework.split(",")]: assert single_framework in [ "tensorflow", "tensorflow_itex", @@ -378,9 +385,10 @@ def decorator_transform(cls): "onnxruntime", ], "The framework support tensorflow mxnet pytorch onnxrt" if transform_type in registry_transforms[single_framework][process].keys(): - raise ValueError('Cannot have two transforms with the same name') + raise ValueError("Cannot have two transforms with the same name") registry_transforms[single_framework][process][transform_type] = cls return cls + return decorator_transform @@ -412,6 +420,7 @@ def __call__(self, **kwargs): """ return TensorflowTransform(self.transform_func, **kwargs) + class TensorflowTransform(BaseTransform): """Tensorflow transform class, the subclass of BaseTransform.""" @@ -434,6 +443,7 @@ def __call__(self, sample): image = self.transform_func(image, **self.kwargs) return (image, label) + class PytorchMxnetWrapFunction(object): """Pytorch and MXNet wrapper function class.""" @@ -453,6 +463,7 @@ def __call__(self, **args): """ return PytorchMxnetTransform(self.transform_func(**args)) + class PytorchMxnetTransform(BaseTransform): """Pytorch and Mxnet transform class, the subclass of BaseTransform.""" @@ -474,40 +485,47 @@ def __call__(self, sample): image = self.transform_func(image) return (image, label) + interpolation_map = { - 'nearest': cv2.INTER_NEAREST, - 'bilinear': cv2.INTER_LINEAR, - 'bicubic': cv2.INTER_CUBIC, + "nearest": cv2.INTER_NEAREST, + "bilinear": cv2.INTER_LINEAR, + "bicubic": cv2.INTER_CUBIC, } interpolation_pytorch_map = { - 'nearest': 0, - 'bilinear': 2, - 'bicubic': 3, + "nearest": 0, + "bilinear": 2, + "bicubic": 3, } interpolation_mxnet_map = { - 'nearest': 0, - 'bilinear': 1, - 'bicubic': 2, + "nearest": 0, + "bilinear": 1, + "bicubic": 2, } + def get_torchvision_map(interpolation): """Get torchvision interpolation map.""" try: from torchvision.transforms.functional import InterpolationMode + interpolation_torchvision_map = { 0: InterpolationMode.NEAREST, 2: InterpolationMode.BILINEAR, 3: InterpolationMode.BICUBIC, } return interpolation_torchvision_map[interpolation] - except: # pragma: no cover + except: # pragma: no cover return interpolation -@transform_registry(transform_type="Compose", process="general", \ - framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ - tensorflow_itex") + +@transform_registry( + transform_type="Compose", + process="general", + framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ + tensorflow_itex", +) class ComposeTransform(BaseTransform): """Composes several transforms together. @@ -528,8 +546,8 @@ def __call__(self, sample): sample = transform(sample) return sample -@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ - framework="pytorch") + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", framework="pytorch") class CropToBoundingBox(BaseTransform): """Crops an image to a specified bounding box. @@ -554,15 +572,12 @@ def __call__(self, sample): """Call torchvision.transforms.functional.crop.""" image, label = sample image = torchvision.transforms.functional.crop( - image, - self.offset_height, - self.offset_width, - self.target_height, - self.target_width) + image, self.offset_height, self.offset_width, self.target_height, self.target_width + ) return (image, label) -@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ - framework="mxnet") + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", framework="mxnet") class MXNetCropToBoundingBox(CropToBoundingBox): """Crops an image to a specified bounding box. @@ -579,16 +594,13 @@ class MXNetCropToBoundingBox(CropToBoundingBox): def __call__(self, sample): """Call mx.image.fixed_crop.""" image, label = sample - image = mx.image.fixed_crop( - image, - self.offset_height, - self.offset_width, - self.target_height, - self.target_width) + image = mx.image.fixed_crop(image, self.offset_height, self.offset_width, self.target_height, self.target_width) return (image, label) -@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="CropToBoundingBox", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class ONNXRTCropToBoundingBox(CropToBoundingBox): """Crops an image to a specified bounding box. @@ -605,12 +617,15 @@ class ONNXRTCropToBoundingBox(CropToBoundingBox): def __call__(self, sample): """Crop the image in sample.""" image, label = sample - image = image[self.offset_height : self.offset_height+self.target_height, - self.offset_width : self.offset_width+self.target_width, :] + image = image[ + self.offset_height : self.offset_height + self.target_height, + self.offset_width : self.offset_width + self.target_width, + :, + ] return (image, label) -@transform_registry(transform_type="CropToBoundingBox", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="CropToBoundingBox", process="preprocess", framework="tensorflow, tensorflow_itex") class TensorflowCropToBoundingBox(CropToBoundingBox): """Crops an image to a specified bounding box. @@ -628,15 +643,23 @@ def __call__(self, sample): """Crop the image in sample.""" image, label = sample if isinstance(image, tf.Tensor): - image = tf.image.crop_to_bounding_box(image, self.offset_height, - self.offset_width, self.target_height, self.target_width) + image = tf.image.crop_to_bounding_box( + image, self.offset_height, self.offset_width, self.target_height, self.target_width + ) else: - image = image[self.offset_height : self.offset_height+self.target_height, - self.offset_width : self.offset_width+self.target_width, :] + image = image[ + self.offset_height : self.offset_height + self.target_height, + self.offset_width : self.offset_width + self.target_width, + :, + ] return (image, label) -@transform_registry(transform_type="ResizeWithRatio", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops, pytorch, mxnet") + +@transform_registry( + transform_type="ResizeWithRatio", + process="preprocess", + framework="onnxrt_qlinearops, onnxrt_integerops, pytorch, mxnet", +) class ResizeWithRatio(BaseTransform): """Resize image with aspect ratio and pad it to max shape(optional). @@ -680,20 +703,26 @@ def __call__(self, sample): if self.padding: h, w = image.shape[:2] - pad_param = [[(self.max_dim-h)//2, self.max_dim-h-(self.max_dim-h)//2], - [(self.max_dim-w)//2, self.max_dim-w-(self.max_dim-w)//2], - [0, 0]] + pad_param = [ + [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2], + [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2], + [0, 0], + ] if not isinstance(bbox, np.ndarray): bbox = np.array(bbox) resized_box = bbox * [height, width, height, width] * scale - moved_box = (resized_box + [(self.max_dim-h)//2, (self.max_dim-w)//2, \ - (self.max_dim-h)//2, (self.max_dim-w)//2]) + moved_box = resized_box + [ + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + ] bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] - image = np.pad(image, pad_param, mode='constant', constant_values=self.constant_value) + image = np.pad(image, pad_param, mode="constant", constant_values=self.constant_value) return image, (bbox, str_label, int_label, image_id) -@transform_registry(transform_type="ResizeWithRatio", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="ResizeWithRatio", process="preprocess", framework="tensorflow, tensorflow_itex") class TensorflowResizeWithRatio(BaseTransform): """Resize image with aspect ratio and pad it to max shape(optional). @@ -728,27 +757,33 @@ def __call__(self, sample): width = tf.cast(shape[1], dtype=tf.float32) scale = 1 if self.min_dim: - scale = tf.maximum(1., tf.cast(self.min_dim / tf.math.minimum(height, width),\ - dtype=tf.float32)) + scale = tf.maximum(1.0, tf.cast(self.min_dim / tf.math.minimum(height, width), dtype=tf.float32)) if self.max_dim: image_max = tf.cast(tf.maximum(height, width), dtype=tf.float32) - scale = tf.cond(pred=tf.greater(tf.math.round(image_max * scale), self.max_dim), \ - true_fn=lambda: self.max_dim / image_max, - false_fn=lambda: scale) - image = tf.image.resize(image, (tf.math.round(height * scale), \ - tf.math.round(width * scale))) + scale = tf.cond( + pred=tf.greater(tf.math.round(image_max * scale), self.max_dim), + true_fn=lambda: self.max_dim / image_max, + false_fn=lambda: scale, + ) + image = tf.image.resize(image, (tf.math.round(height * scale), tf.math.round(width * scale))) bbox, str_label, int_label, image_id = label if self.padding: shape = tf.shape(input=image) h = tf.cast(shape[0], dtype=tf.float32) w = tf.cast(shape[1], dtype=tf.float32) - pad_param = [[(self.max_dim-h)//2, self.max_dim-h-(self.max_dim-h)//2], - [(self.max_dim-w)//2, self.max_dim-w-(self.max_dim-w)//2], - [0, 0]] + pad_param = [ + [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2], + [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2], + [0, 0], + ] resized_box = bbox * [height, width, height, width] * scale - moved_box = (resized_box + [(self.max_dim-h)//2, (self.max_dim-w)//2, \ - (self.max_dim-h)//2, (self.max_dim-w)//2]) + moved_box = resized_box + [ + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + ] bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] image = tf.pad(image, pad_param, constant_values=self.constant_value) else: @@ -756,8 +791,8 @@ def __call__(self, sample): image, (bbox, str_label, int_label, image_id) = transform(sample) return image, (bbox, str_label, int_label, image_id) -@transform_registry(transform_type="Transpose", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="Transpose", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class Transpose(BaseTransform): """Transpose image according to perm. @@ -779,8 +814,8 @@ def __call__(self, sample): image = np.transpose(image, axes=self.perm) return (image, label) -@transform_registry(transform_type="Transpose", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="Transpose", process="preprocess", framework="tensorflow, tensorflow_itex") class TensorflowTranspose(Transpose): """Transpose image according to perm. @@ -801,6 +836,7 @@ def __call__(self, sample): image = np.transpose(image, axes=self.perm) return (image, label) + @transform_registry(transform_type="Transpose", process="preprocess", framework="mxnet") class MXNetTranspose(Transpose): """Transpose image according to perm. @@ -819,6 +855,7 @@ def __call__(self, sample): image = mx.ndarray.transpose(image, self.perm) return (image, label) + @transform_registry(transform_type="Transpose", process="preprocess", framework="pytorch") class PyTorchTranspose(Transpose): """Transpose image according to perm. @@ -837,8 +874,10 @@ def __call__(self, sample): image = image.permute(self.perm) return (image, label) -@transform_registry(transform_type="RandomVerticalFlip", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="RandomVerticalFlip", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class RandomVerticalFlip(BaseTransform): """Vertically flip the given image randomly. @@ -853,8 +892,8 @@ def __call__(self, sample): image = np.flipud(image) return (image, label) -@transform_registry(transform_type="RandomVerticalFlip", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="RandomVerticalFlip", process="preprocess", framework="tensorflow, tensorflow_itex") class TensorflowRandomVerticalFlip(BaseTransform): """Vertically flip the given image randomly. @@ -872,8 +911,10 @@ def __call__(self, sample): image = np.flipud(image) return (image, label) -@transform_registry(transform_type="RandomHorizontalFlip", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="RandomHorizontalFlip", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class RandomHorizontalFlip(BaseTransform): """Horizontally flip the given image randomly. @@ -888,8 +929,10 @@ def __call__(self, sample): image = np.fliplr(image) return (image, label) -@transform_registry(transform_type="RandomHorizontalFlip", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry( + transform_type="RandomHorizontalFlip", process="preprocess", framework="tensorflow, tensorflow_itex" +) class TensorflowRandomHorizontalFlip(BaseTransform): """Horizontally flip the given image randomly. @@ -907,9 +950,13 @@ def __call__(self, sample): image = np.fliplr(image) return (image, label) -@transform_registry(transform_type="ToArray", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ - tensorflow_itex, pytorch, mxnet") + +@transform_registry( + transform_type="ToArray", + process="preprocess", + framework="onnxrt_qlinearops, onnxrt_integerops, tensorflow, \ + tensorflow_itex, pytorch, mxnet", +) class ToArray(BaseTransform): """Convert PIL Image or NDArray to numpy array. @@ -920,23 +967,37 @@ class ToArray(BaseTransform): def __call__(self, sample): """Convert image in sample to numpy array.""" from PIL import Image + image, label = sample if isinstance(image, Image.Image): image = np.array(image) - elif isinstance(image, mx.ndarray.NDArray): # pylint: disable=no-member + elif isinstance(image, mx.ndarray.NDArray): # pylint: disable=no-member image = image.asnumpy() else: raise ValueError("Unknown image type!") return (image, label) -np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64, - 'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32, - 'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32, - 'float16': np.float16, 'float64': np.float64, 'bool': bool, - 'string': str, 'complex128': np.complex128, 'int16': np.int16} -@transform_registry(transform_type="Cast", process="general", \ - framework="tensorflow, tensorflow_itex") +np_dtype_map = { + "int8": np.int8, + "uint8": np.uint8, + "complex64": np.complex64, + "uint16": np.uint16, + "int32": np.int32, + "uint32": np.uint32, + "int64": np.int64, + "uint64": np.uint64, + "float32": np.float32, + "float16": np.float16, + "float64": np.float64, + "bool": bool, + "string": str, + "complex128": np.complex128, + "int16": np.int16, +} + + +@transform_registry(transform_type="Cast", process="general", framework="tensorflow, tensorflow_itex") class CastTFTransform(BaseTransform): """Convert image to given dtype. @@ -947,15 +1008,27 @@ class CastTFTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, dtype='float32'): + def __init__(self, dtype="float32"): """Initialize `CastTFTransform` class.""" - self.tf_dtype_map = {'int16': tf.int16, 'uint8': tf.uint8, 'uint16': tf.uint16, - 'uint32':tf.uint32, 'uint64': tf.uint64, 'complex64': tf.complex64, - 'int32': tf.int32, 'int64':tf.int64, 'float32': tf.float32, - 'float16': tf.float16, 'float64':tf.float64, 'bool': tf.bool, - 'string': tf.string, 'int8': tf.int8, 'complex128': tf.complex128} + self.tf_dtype_map = { + "int16": tf.int16, + "uint8": tf.uint8, + "uint16": tf.uint16, + "uint32": tf.uint32, + "uint64": tf.uint64, + "complex64": tf.complex64, + "int32": tf.int32, + "int64": tf.int64, + "float32": tf.float32, + "float16": tf.float16, + "float64": tf.float64, + "bool": tf.bool, + "string": tf.string, + "int8": tf.int8, + "complex128": tf.complex128, + } - assert dtype in self.tf_dtype_map.keys(), 'Unknown dtype' + assert dtype in self.tf_dtype_map.keys(), "Unknown dtype" self.dtype = dtype def __call__(self, sample): @@ -967,8 +1040,8 @@ def __call__(self, sample): image = image.astype(np_dtype_map[self.dtype]) return (image, label) -@transform_registry(transform_type="Cast", process="general", - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="Cast", process="general", framework="onnxrt_qlinearops, onnxrt_integerops") class CastONNXTransform(BaseTransform): """Convert image to given dtype. @@ -979,9 +1052,9 @@ class CastONNXTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, dtype='float32'): + def __init__(self, dtype="float32"): """Initialize `CastONNXTransform` class.""" - assert dtype in np_dtype_map.keys(), 'Unknown dtype' + assert dtype in np_dtype_map.keys(), "Unknown dtype" self.dtype = dtype def __call__(self, sample): @@ -990,6 +1063,7 @@ def __call__(self, sample): image = image.astype(np_dtype_map[self.dtype]) return (image, label) + @transform_registry(transform_type="Cast", process="general", framework="pytorch") class CastPyTorchTransform(BaseTransform): """Convert image to given dtype. @@ -1001,13 +1075,23 @@ class CastPyTorchTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, dtype='float32'): + def __init__(self, dtype="float32"): """Initialize `CastPyTorchTransform` class.""" - dtype_map = {'int8': torch.int8, 'uint8': torch.uint8, 'complex128': torch.complex128, - 'int32':torch.int32, 'int64':torch.int64, 'complex64': torch.complex64, - 'bfloat16':torch.bfloat16, 'float64':torch.float64, 'bool': torch.bool, - 'float16':torch.float16, 'int16':torch.int16, 'float32': torch.float32} - assert dtype in dtype_map.keys(), 'Unknown dtype' + dtype_map = { + "int8": torch.int8, + "uint8": torch.uint8, + "complex128": torch.complex128, + "int32": torch.int32, + "int64": torch.int64, + "complex64": torch.complex64, + "bfloat16": torch.bfloat16, + "float64": torch.float64, + "bool": torch.bool, + "float16": torch.float16, + "int16": torch.int16, + "float32": torch.float32, + } + assert dtype in dtype_map.keys(), "Unknown dtype" self.dtype = dtype_map[dtype] def __call__(self, sample): @@ -1016,8 +1100,8 @@ def __call__(self, sample): image = image.type(self.dtype) return (image, label) -@transform_registry(transform_type="CenterCrop", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="CenterCrop", process="preprocess", framework="tensorflow, tensorflow_itex") class CenterCropTFTransform(BaseTransform): """Crops the given image at the center to the given size. @@ -1058,8 +1142,8 @@ def __call__(self, sample): image, label = transform(sample) return (image, label) -@transform_registry(transform_type="PaddedCenterCrop", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="PaddedCenterCrop", process="preprocess", framework="tensorflow, tensorflow_itex") class PaddedCenterCropTransform(BaseTransform): """Crops the given image at the center to the given size with padding. @@ -1089,16 +1173,15 @@ def __call__(self, sample): image, label = sample h, w = image.shape[0], image.shape[1] - padded_center_crop_size = \ - int((self.image_size / (self.image_size + self.crop_padding)) * min(h, w)) + padded_center_crop_size = int((self.image_size / (self.image_size + self.crop_padding)) * min(h, w)) y0 = (h - padded_center_crop_size + 1) // 2 x0 = (w - padded_center_crop_size + 1) // 2 - image = image[y0:y0 + padded_center_crop_size, x0:x0 + padded_center_crop_size, :] + image = image[y0 : y0 + padded_center_crop_size, x0 : x0 + padded_center_crop_size, :] return (image, label) -@transform_registry(transform_type="Resize", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="Resize", process="preprocess", framework="tensorflow, tensorflow_itex") class ResizeTFTransform(BaseTransform): """Resize the input image to the given size. @@ -1111,7 +1194,7 @@ class ResizeTFTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, interpolation='bilinear'): + def __init__(self, size, interpolation="bilinear"): """Initialize `ResizeTFTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1122,8 +1205,8 @@ def __init__(self, size, interpolation='bilinear'): self.size = size[0], size[1] self.interpolation = interpolation - if self.interpolation not in ['bilinear', 'nearest', 'bicubic']: - raise ValueError('Unsupported interpolation type!') + if self.interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") def __call__(self, sample): """Resize the input image in sample to the given size.""" @@ -1131,12 +1214,11 @@ def __call__(self, sample): if isinstance(image, tf.Tensor): image = tf.image.resize(image, self.size, method=self.interpolation) else: - image = cv2.resize(image, self.size, - interpolation=interpolation_map[self.interpolation]) + image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation]) return (image, label) -@transform_registry(transform_type="Resize", process="preprocess", \ - framework="pytorch") + +@transform_registry(transform_type="Resize", process="preprocess", framework="pytorch") class ResizePytorchTransform(BaseTransform): """Resize the input image to the given size. @@ -1149,7 +1231,7 @@ class ResizePytorchTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, interpolation='bilinear'): + def __init__(self, size, interpolation="bilinear"): """Initialize `ResizePytorchTransform` class.""" self.size = size if interpolation in interpolation_pytorch_map.keys(): @@ -1160,12 +1242,11 @@ def __init__(self, size, interpolation='bilinear'): def __call__(self, sample): """Resize the input image in sample to the given size.""" image, label = sample - transformer = torchvision.transforms.Resize(size=self.size, - interpolation=self.interpolation) + transformer = torchvision.transforms.Resize(size=self.size, interpolation=self.interpolation) return (transformer(image), label) -@transform_registry(transform_type="RandomCrop", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="RandomCrop", process="preprocess", framework="tensorflow, tensorflow_itex") class RandomCropTFTransform(BaseTransform): """Crop the image at a random location to the given size. @@ -1196,7 +1277,7 @@ def __call__(self, sample): height, width = image.shape[1:3] if self.size[0] > height or self.size[1] > width: - raise ValueError('Crop size must be smaller than image size') + raise ValueError("Crop size must be smaller than image size") if self.size[0] == height and self.size[1] == width: return (image, label) @@ -1208,15 +1289,14 @@ def __call__(self, sample): offset_height = tf.cast(offset_height, dtype=tf.int32) offset_width = tf.cast(offset_width, dtype=tf.int32) - image = tf.image.crop_to_bounding_box(image, offset_height, - offset_width, self.size[0], self.size[1]) + image = tf.image.crop_to_bounding_box(image, offset_height, offset_width, self.size[0], self.size[1]) else: transform = RandomCropTransform(self.size) image, label = transform(sample) return (image, label) -@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ - framework="pytorch") + +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", framework="pytorch") class RandomResizedCropPytorchTransform(BaseTransform): """Crop the given image to random size and aspect ratio. @@ -1234,8 +1314,7 @@ class RandomResizedCropPytorchTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), - interpolation='bilinear'): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="bilinear"): """Initialize `RandomResizedCropPytorchTransform` class.""" self.size = size self.scale = scale @@ -1252,12 +1331,13 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), def __call__(self, sample): """Crop the image in sample to the random size.""" image, label = sample - transformer = torchvision.transforms.RandomResizedCrop(size=self.size, - scale=self.scale, ratio=self.ratio, interpolation=self.interpolation) + transformer = torchvision.transforms.RandomResizedCrop( + size=self.size, scale=self.scale, ratio=self.ratio, interpolation=self.interpolation + ) return (transformer(image), label) -@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ - framework="mxnet") + +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", framework="mxnet") class RandomResizedCropMXNetTransform(BaseTransform): """Crop the given image to random size and aspect ratio. @@ -1275,8 +1355,7 @@ class RandomResizedCropMXNetTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), - interpolation='bilinear'): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="bilinear"): """Initialize `RandomResizedCropMXNetTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1299,13 +1378,13 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), def __call__(self, sample): """Crop the image in sample to the random size.""" image, label = sample - transformer = mx.gluon.data.vision.transforms.RandomResizedCrop(size=self.size, - scale=self.scale, ratio=self.ratio, interpolation=self.interpolation) + transformer = mx.gluon.data.vision.transforms.RandomResizedCrop( + size=self.size, scale=self.scale, ratio=self.ratio, interpolation=self.interpolation + ) return (transformer(image), label) -@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ - framework="tensorflow, tensorflow_itex") +@transform_registry(transform_type="RandomResizedCrop", process="preprocess", framework="tensorflow, tensorflow_itex") class RandomResizedCropTFTransform(BaseTransform): """Crop the given image to random size and aspect ratio. @@ -1323,8 +1402,7 @@ class RandomResizedCropTFTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, scale=(0.08, 1.0), ratio=( - 3. / 4., 4. / 3.), interpolation='bilinear'): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="bilinear"): """Initialize `RandomResizedCropTFTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1337,8 +1415,8 @@ def __init__(self, size, scale=(0.08, 1.0), ratio=( self.scale = scale self.ratio = ratio self.interpolation = interpolation - if self.interpolation not in ['bilinear', 'nearest']: - raise ValueError('Unsupported interpolation type!') + if self.interpolation not in ["bilinear", "nearest"]: + raise ValueError("Unsupported interpolation type!") if scale[0] > scale[1] or ratio[0] > ratio[1]: raise ValueError("Scale and ratio should be of kind (min, max)") @@ -1354,29 +1432,32 @@ def get_params(self, image, scale, ratio): log_ratio = (np.log(ratio[0]), np.log(ratio[1])) new_ratio = np.exp(np.random.uniform(log_ratio[0], log_ratio[1])) - new_w = tf.math.round( - tf.math.sqrt(tf.math.multiply(target_area, new_ratio))) - new_h = tf.math.round( - tf.math.sqrt(tf.math.divide(target_area, new_ratio))) + new_w = tf.math.round(tf.math.sqrt(tf.math.multiply(target_area, new_ratio))) + new_h = tf.math.round(tf.math.sqrt(tf.math.divide(target_area, new_ratio))) x0, y0 = tf.case( - [(tf.math.logical_and( - tf.math.greater(width, new_w), tf.math.greater(height, new_h)), - lambda: (tf.random.uniform( - shape=[], maxval=tf.math.subtract(width, new_w)), - tf.random.uniform( - shape=[], maxval=tf.math.subtract(height, new_h))) - )], - default=lambda: (-1.0, -1.0)) + [ + ( + tf.math.logical_and(tf.math.greater(width, new_w), tf.math.greater(height, new_h)), + lambda: ( + tf.random.uniform(shape=[], maxval=tf.math.subtract(width, new_w)), + tf.random.uniform(shape=[], maxval=tf.math.subtract(height, new_h)), + ), + ) + ], + default=lambda: (-1.0, -1.0), + ) if x0 != -1.0 and y0 != -1.0: return y0, x0, new_h, new_w in_ratio = width / height - new_w, new_h = tf.case([(tf.math.greater(min(ratio), in_ratio), - lambda: (width, tf.math.round(width / min(ratio)))), - (tf.math.greater(in_ratio, max(ratio)), - lambda: (height, tf.math.round(height * max(ratio))))], - default=lambda: (width, height)) + new_w, new_h = tf.case( + [ + (tf.math.greater(min(ratio), in_ratio), lambda: (width, tf.math.round(width / min(ratio)))), + (tf.math.greater(in_ratio, max(ratio)), lambda: (height, tf.math.round(height * max(ratio)))), + ], + default=lambda: (width, height), + ) y0 = (height - new_h) / 2 x0 = (width - new_w) / 2 @@ -1395,20 +1476,18 @@ def __call__(self, sample): height = tf.cast(height, dtype=tf.float32) width = tf.cast(width, dtype=tf.float32) box_indices = tf.range(0, image.shape[0], dtype=tf.int32) - boxes = [y0/height, x0/width, (y0+h)/height, (x0+w)/width] + boxes = [y0 / height, x0 / width, (y0 + h) / height, (x0 + w) / width] boxes = tf.broadcast_to(boxes, [image.shape[0], 4]) - image = tf.image.crop_and_resize(image, boxes, box_indices, - self.size, self.interpolation) + image = tf.image.crop_and_resize(image, boxes, box_indices, self.size, self.interpolation) if squeeze: image = tf.squeeze(image, axis=0) else: - transform = RandomResizedCropTransform(self.size, self.scale, - self.ratio, self.interpolation) + transform = RandomResizedCropTransform(self.size, self.scale, self.ratio, self.interpolation) image, label = transform(sample) return (image, label) -@transform_registry(transform_type="Normalize", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="Normalize", process="preprocess", framework="tensorflow, tensorflow_itex") class NormalizeTFTransform(BaseTransform): """Normalize a image with mean and standard deviation. @@ -1449,11 +1528,11 @@ def __call__(self, sample): image, label = transform(sample) if self.rescale: image /= self.rescale[0] - image -= self.rescale[1] + image -= self.rescale[1] return (image, label) -@transform_registry(transform_type='KerasRescale', process="preprocess", \ - framework='tensorflow, tensorflow_itex') + +@transform_registry(transform_type="KerasRescale", process="preprocess", framework="tensorflow, tensorflow_itex") class RescaleKerasPretrainTransform(BaseTransform): """Scale the values of image to [0,1]. @@ -1468,15 +1547,15 @@ def __init__(self, rescale=None): def __call__(self, sample): """Scale the values of the image in sample.""" image, label = sample - if image.dtype == np.dtype('uint8'): - self.rescale = np.array(self.rescale).astype('uint8') + if image.dtype == np.dtype("uint8"): + self.rescale = np.array(self.rescale).astype("uint8") if len(self.rescale) == 2: image = image / self.rescale[0] - image = image - self.rescale[1] + image = image - self.rescale[1] return (image, label) -@transform_registry(transform_type='Rescale', process="preprocess", \ - framework='tensorflow, tensorflow_itex') + +@transform_registry(transform_type="Rescale", process="preprocess", framework="tensorflow, tensorflow_itex") class RescaleTFTransform(BaseTransform): """Scale the values of image to [0,1]. @@ -1488,13 +1567,13 @@ def __call__(self, sample): """Scale the values of the image in sample.""" image, label = sample if isinstance(image, tf.Tensor): - image = tf.cast(image, tf.float32) / 255. + image = tf.cast(image, tf.float32) / 255.0 else: - image = image.astype('float32') / 255. + image = image.astype("float32") / 255.0 return (image, label) -@transform_registry(transform_type='Rescale', process="preprocess", \ - framework='onnxrt_qlinearops, onnxrt_integerops') + +@transform_registry(transform_type="Rescale", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class RescaleTransform(BaseTransform): """Scale the values of image to [0,1]. @@ -1506,12 +1585,16 @@ def __call__(self, sample): """Scale the values of the image in sample.""" image, label = sample if isinstance(image, np.ndarray): - image = image.astype('float32') / 255. + image = image.astype("float32") / 255.0 return (image, label) -@transform_registry(transform_type='AlignImageChannel', process="preprocess", \ - framework='tensorflow, tensorflow_itex, \ - onnxrt_qlinearops, onnxrt_integerops, mxnet') + +@transform_registry( + transform_type="AlignImageChannel", + process="preprocess", + framework="tensorflow, tensorflow_itex, \ + onnxrt_qlinearops, onnxrt_integerops, mxnet", +) class AlignImageChannelTransform(BaseTransform): """Align image channel, now just support [H,W]->[H,W,dim], [H,W,4]->[H,W,3] and [H,W,3]->[H,W]. @@ -1525,14 +1608,14 @@ def __init__(self, dim=3): """Initialize `AlignImageChannelTransform` class.""" logger.warning("This transform is going to be deprecated") if dim < 1 or dim > 4: - raise ValueError('Unsupport image dim!') + raise ValueError("Unsupport image dim!") self.dim = dim def __call__(self, sample): """Align channel of the image in sample.""" image, label = sample if len(image.shape) == 2: - image = np.dstack([image]*self.dim) + image = np.dstack([image] * self.dim) if isinstance(image, np.ndarray) and image.shape[-1] != self.dim: if image.shape[-1] == 4 and self.dim == 3: image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) @@ -1540,11 +1623,11 @@ def __call__(self, sample): image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) image = np.expand_dims(image, axis=-1) else: - raise ValueError('Unsupport conversion!') + raise ValueError("Unsupport conversion!") return (image, label) -@transform_registry(transform_type='AlignImageChannel', process="preprocess", \ - framework='pytorch') + +@transform_registry(transform_type="AlignImageChannel", process="preprocess", framework="pytorch") class PyTorchAlignImageChannel(BaseTransform): """Align image channel, now just support [H,W,4]->[H,W,3] and [H,W,3]->[H,W]. @@ -1558,24 +1641,25 @@ def __init__(self, dim=3): """Initialize `PyTorchAlignImageChannel` class.""" logger.warning("This transform is going to be deprecated") if dim != 1 and dim != 3: - raise ValueError('Unsupport image dim!') + raise ValueError("Unsupport image dim!") self.dim = dim def __call__(self, sample): """Align channel of the image in sample.""" from PIL import Image + image, label = sample - assert isinstance(image, Image.Image), 'Input image must be PIL Image' + assert isinstance(image, Image.Image), "Input image must be PIL Image" if self.dim == 3: - image = image.convert('RGB') + image = image.convert("RGB") elif self.dim == 1: - image = image.convert('L') + image = image.convert("L") else: - raise ValueError('Unsupport conversion!') + raise ValueError("Unsupport conversion!") return (image, label) -@transform_registry(transform_type="ToNDArray", process="preprocess", \ - framework="mxnet") + +@transform_registry(transform_type="ToNDArray", process="preprocess", framework="mxnet") class ToNDArrayTransform(BaseTransform): """Convert np.array to NDArray. @@ -1589,6 +1673,7 @@ def __call__(self, sample): image = mx.nd.array(image) return image, label + @transform_registry(transform_type="Resize", process="preprocess", framework="mxnet") class ResizeMXNetTransform(BaseTransform): """Resize the input image to the given size. @@ -1602,7 +1687,7 @@ class ResizeMXNetTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, interpolation='bilinear'): + def __init__(self, size, interpolation="bilinear"): """Initialize `ResizeMXNetTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1620,13 +1705,11 @@ def __init__(self, size, interpolation='bilinear'): def __call__(self, sample): """Resize the input image in sample to the given size.""" image, label = sample - transformer = mx.gluon.data.vision.transforms.Resize(size=self.size, - interpolation=self.interpolation) + transformer = mx.gluon.data.vision.transforms.Resize(size=self.size, interpolation=self.interpolation) return (transformer(image), label) -@transform_registry(transform_type="Resize", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") +@transform_registry(transform_type="Resize", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class ResizeTransform(BaseTransform): """Resize the input image to the given size. @@ -1639,7 +1722,7 @@ class ResizeTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, interpolation='bilinear'): + def __init__(self, size, interpolation="bilinear"): """Initialize `ResizeTransform` class.""" if isinstance(size, int): self.size = size, size @@ -1662,8 +1745,8 @@ def __call__(self, sample): image = np.expand_dims(image, -1) return (image, label) -@transform_registry(transform_type="CropResize", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="CropResize", process="preprocess", framework="tensorflow, tensorflow_itex") class CropResizeTFTransform(BaseTransform): """Crop the input image with given location and resize it. @@ -1680,10 +1763,10 @@ class CropResizeTFTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, x, y, width, height, size, interpolation='bilinear'): + def __init__(self, x, y, width, height, size, interpolation="bilinear"): """Initialize `CropResizeTFTransform` class.""" - if interpolation not in ['bilinear', 'nearest', 'bicubic']: - raise ValueError('Unsupported interpolation type!') + if interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") self.interpolation = interpolation self.x = x self.y = y @@ -1701,15 +1784,14 @@ def __call__(self, sample): """Resize the input image in sample with given location.""" image, label = sample if isinstance(image, tf.Tensor): - image = tf.image.crop_to_bounding_box( - image, self.y, self.x, self.height, self.width) + image = tf.image.crop_to_bounding_box(image, self.y, self.x, self.height, self.width) image = tf.image.resize(image, self.size, method=self.interpolation) else: - transform = CropResizeTransform(self.x, self.y, self.width, - self.height, self.size, self.interpolation) + transform = CropResizeTransform(self.x, self.y, self.width, self.height, self.size, self.interpolation) image, label = transform(sample) return (image, label) + @transform_registry(transform_type="CropResize", process="preprocess", framework="pytorch") class PyTorchCropResizeTransform(BaseTransform): """Crop the input image with given location and resize it. @@ -1727,7 +1809,7 @@ class PyTorchCropResizeTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, x, y, width, height, size, interpolation='bilinear'): + def __init__(self, x, y, width, height, size, interpolation="bilinear"): """Initialize `PyTorchCropResizeTransform` class.""" if interpolation in interpolation_pytorch_map.keys(): self.interpolation = get_torchvision_map(interpolation_pytorch_map[interpolation]) @@ -1743,10 +1825,10 @@ def __call__(self, sample): """Resize the input image in sample with given location.""" image, label = sample image = image.crop((self.x, self.y, self.x + self.width, self.y + self.height)) - transformer = torchvision.transforms.Resize(size=self.size, - interpolation=self.interpolation) + transformer = torchvision.transforms.Resize(size=self.size, interpolation=self.interpolation) return (transformer(image), label) + @transform_registry(transform_type="CropResize", process="preprocess", framework="mxnet") class MXNetCropResizeTransform(BaseTransform): """Crop the input image with given location and resize it. @@ -1764,7 +1846,7 @@ class MXNetCropResizeTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, x, y, width, height, size, interpolation='bilinear'): + def __init__(self, x, y, width, height, size, interpolation="bilinear"): """Initialize `MXNetCropResizeTransform` class.""" if interpolation in interpolation_mxnet_map.keys(): self.interpolation = interpolation_mxnet_map[interpolation] @@ -1779,12 +1861,13 @@ def __init__(self, x, y, width, height, size, interpolation='bilinear'): def __call__(self, sample): """Resize the input image in sample with given location.""" image, label = sample - transformer = mx.gluon.data.vision.transforms.CropResize(self.x, self.y, self.width, - self.height, self.size, self.interpolation) + transformer = mx.gluon.data.vision.transforms.CropResize( + self.x, self.y, self.width, self.height, self.size, self.interpolation + ) return (transformer(image), label) -@transform_registry(transform_type="CropResize", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="CropResize", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class CropResizeTransform(BaseTransform): """Crop the input image with given location and resize it. @@ -1801,7 +1884,7 @@ class CropResizeTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, x, y, width, height, size, interpolation='bilinear'): + def __init__(self, x, y, width, height, size, interpolation="bilinear"): """Initialize `CropResizeTransform` class.""" if interpolation in interpolation_map.keys(): self.interpolation = interpolation_map[interpolation] @@ -1822,12 +1905,12 @@ def __init__(self, x, y, width, height, size, interpolation='bilinear'): def __call__(self, sample): """Crop the input image in sample with given location.""" image, label = sample - image = image[self.y:self.y+self.height, self.x:self.x+self.width, :] + image = image[self.y : self.y + self.height, self.x : self.x + self.width, :] image = cv2.resize(image, self.size, interpolation=self.interpolation) return (image, label) -@transform_registry(transform_type="CenterCrop", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="CenterCrop", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class CenterCropTransform(BaseTransform): """Crops the given image at the center to the given size. @@ -1854,17 +1937,18 @@ def __call__(self, sample): h, w = image.shape[0], image.shape[1] if h + 1 < self.height or w + 1 < self.width: raise ValueError( - "Required crop size {} is larger then input image size {}".format( - (self.height, self.width), (h, w))) + "Required crop size {} is larger then input image size {}".format((self.height, self.width), (h, w)) + ) if self.height == h and self.width == w: return (image, label) y0 = (h - self.height) // 2 x0 = (w - self.width) // 2 - image = image[y0:y0 + self.height, x0:x0 + self.width, :] + image = image[y0 : y0 + self.height, x0 : x0 + self.width, :] return (image, label) + @transform_registry(transform_type="Normalize", process="preprocess", framework="mxnet") class MXNetNormalizeTransform(BaseTransform): """Normalize a image with mean and standard deviation. @@ -1893,9 +1977,9 @@ def __call__(self, sample): """Normalize the image in sample.""" image, label = sample axes = [len(image.shape) - 1] - axes.extend(list(np.arange(len(image.shape)-1))) + axes.extend(list(np.arange(len(image.shape) - 1))) image = mx.ndarray.transpose(image, axes) - assert len(self.mean) == image.shape[0], 'Mean channel must match image channel' + assert len(self.mean) == image.shape[0], "Mean channel must match image channel" transformer = mx.gluon.data.vision.transforms.Normalize(self.mean, self.std) image = transformer(image) axes = list(np.arange(1, len(image.shape))) @@ -1903,6 +1987,7 @@ def __call__(self, sample): image = mx.ndarray.transpose(image, axes) return (image, label) + @transform_registry(transform_type="Normalize", process="preprocess", framework="pytorch") class PyTorchNormalizeTransform(MXNetNormalizeTransform): """Normalize a image with mean and standard deviation. @@ -1926,8 +2011,8 @@ def __call__(self, sample): image = transformer(image) return (image, label) -@transform_registry(transform_type="Normalize", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry(transform_type="Normalize", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops") class NormalizeTransform(BaseTransform): """Normalize a image with mean and standard deviation. @@ -1954,12 +2039,14 @@ def __init__(self, mean=[0.0], std=[1.0]): def __call__(self, sample): """Normalize the image in sample.""" image, label = sample - assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel' + assert len(self.mean) == image.shape[-1], "Mean channel must match image channel" image = (image - self.mean) / self.std return (image, label) -@transform_registry(transform_type="RandomCrop", process="preprocess", \ - framework="mxnet, onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="RandomCrop", process="preprocess", framework="mxnet, onnxrt_qlinearops, onnxrt_integerops" +) class RandomCropTransform(BaseTransform): """Crop the image at a random location to the given size. @@ -1986,8 +2073,8 @@ def __call__(self, sample): h, w = image.shape[0], image.shape[1] if h + 1 < self.height or w + 1 < self.width: raise ValueError( - "Required crop size {} is larger then input image size {}".format( - (self.height, self.width), (h, w))) + "Required crop size {} is larger then input image size {}".format((self.height, self.width), (h, w)) + ) if self.height == h and self.width == w: return (image, label) @@ -1995,13 +2082,15 @@ def __call__(self, sample): rand_h = np.random.randint(0, h - self.height + 1) rand_w = np.random.randint(0, w - self.width + 1) if len(image.shape) == 2: - image = image[rand_h:rand_h + self.height, rand_w:rand_w + self.width] + image = image[rand_h : rand_h + self.height, rand_w : rand_w + self.width] else: - image = image[rand_h:rand_h + self.height, rand_w:rand_w + self.width, :] + image = image[rand_h : rand_h + self.height, rand_w : rand_w + self.width, :] return (image, label) -@transform_registry(transform_type="RandomResizedCrop", process="preprocess", \ - framework="onnxrt_qlinearops, onnxrt_integerops") + +@transform_registry( + transform_type="RandomResizedCrop", process="preprocess", framework="onnxrt_qlinearops, onnxrt_integerops" +) class RandomResizedCropTransform(BaseTransform): """Crop the given image to random size and aspect ratio. @@ -2019,8 +2108,7 @@ class RandomResizedCropTransform(BaseTransform): tuple of processed image and label """ - def __init__(self, size, scale=(0.08, 1.0), ratio=( - 3. / 4., 4. / 3.), interpolation='bilinear'): + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation="bilinear"): """Initialize `RandomResizedCropTransform` class.""" if isinstance(size, int): self.size = size, size @@ -2077,13 +2165,15 @@ def __call__(self, sample): """Crop the image in sample to random size.""" image, label = sample y0, x0, h, w = self.get_params(image, self.scale, self.ratio) - crop_img = image[y0:y0 + h, x0:x0 + w, :] + crop_img = image[y0 : y0 + h, x0 : x0 + w, :] image = cv2.resize(crop_img, self.size, interpolation=self.interpolation) return (image, label) + def _compute_softmax(scores): """Compute softmax probability over raw logits.""" import math + if not scores: return [] @@ -2104,6 +2194,7 @@ def _compute_softmax(scores): probs.append(score / total_sum) return probs + def _get_best_indexes(logits, n_best_size): """Get the n-best logits from a list.""" index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) @@ -2115,14 +2206,17 @@ def _get_best_indexes(logits, n_best_size): best_indexes.append(index_and_score[i][0]) return best_indexes + def get_final_text(pred_text, orig_text, do_lower_case): """Project the tokenized prediction back to the original text.""" import six + from . import tokenization + def _strip_spaces(text): ns_chars = [] ns_to_s_map = collections.OrderedDict() - for (i, c) in enumerate(text): + for i, c in enumerate(text): if c == " ": continue ns_to_s_map[len(ns_chars)] = i @@ -2144,7 +2238,7 @@ def _strip_spaces(text): return orig_text tok_s_to_ns_map = {} - for (i, tok_index) in six.iteritems(tok_ns_to_s_map): + for i, tok_index in six.iteritems(tok_ns_to_s_map): tok_s_to_ns_map[tok_index] = i orig_start_position = None @@ -2154,7 +2248,7 @@ def _strip_spaces(text): orig_start_position = orig_ns_to_s_map[ns_start_position] if orig_start_position is None: - return orig_text + return orig_text orig_end_position = None if end_position in tok_s_to_ns_map: @@ -2165,23 +2259,26 @@ def _strip_spaces(text): if orig_end_position is None: return orig_text - output_text = orig_text[orig_start_position:(orig_end_position + 1)] + output_text = orig_text[orig_start_position : (orig_end_position + 1)] return output_text + class SquadExample(object): """A single training/test example for simple sequence classification. For examples without an answer, the start and end position are -1. """ - def __init__(self, + def __init__( + self, qas_id, question_text, doc_tokens, orig_answer_text=None, start_position=None, end_position=None, - is_impossible=False): + is_impossible=False, + ): """Initialize `SquadExample` class.""" self.qas_id = qas_id self.question_text = question_text @@ -2191,22 +2288,25 @@ def __init__(self, self.end_position = end_position self.is_impossible = is_impossible + class InputFeatures(object): """A single set of features of data.""" - def __init__(self, - unique_id, - example_index, - doc_span_index, - tokens, - token_to_orig_map, - token_is_max_context, - input_ids, - input_mask, - segment_ids, - start_position=None, - end_position=None, - is_impossible=None): + def __init__( + self, + unique_id, + example_index, + doc_span_index, + tokens, + token_to_orig_map, + token_is_max_context, + input_ids, + input_mask, + segment_ids, + start_position=None, + end_position=None, + is_impossible=None, + ): """Initialize `InputFeatures` class.""" self.unique_id = unique_id self.example_index = example_index @@ -2221,9 +2321,11 @@ def __init__(self, self.end_position = end_position self.is_impossible = is_impossible + def read_squad_examples(input_file): """Read a SQuAD json file into a list of SquadExample.""" import json + with tf.io.gfile.GFile(input_file, "r") as reader: input_data = json.load(reader)["data"] @@ -2264,15 +2366,17 @@ def is_whitespace(c): orig_answer_text=orig_answer_text, start_position=start_position, end_position=end_position, - is_impossible=is_impossible) + is_impossible=is_impossible, + ) examples.append(example) return examples + def _check_is_max_context(doc_spans, cur_span_index, position): """Check if this is the 'max context' doc span for the token.""" best_score = None best_span_index = None - for (span_index, doc_span) in enumerate(doc_spans): + for span_index, doc_span in enumerate(doc_spans): end = doc_span.start + doc_span.length - 1 if position < doc_span.start: continue @@ -2287,11 +2391,11 @@ def _check_is_max_context(doc_spans, cur_span_index, position): return cur_span_index == best_span_index -def convert_examples_to_features(examples, tokenizer, max_seq_length, - doc_stride, max_query_length, output_fn): + +def convert_examples_to_features(examples, tokenizer, max_seq_length, doc_stride, max_query_length, output_fn): """Load a data file into a list of `InputBatch`s.""" unique_id = 1000000000 - for (example_index, example) in enumerate(examples): + for example_index, example in enumerate(examples): query_tokens = tokenizer.tokenize(example.question_text) if len(query_tokens) > max_query_length: query_tokens = query_tokens[0:max_query_length] @@ -2299,7 +2403,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, tok_to_orig_index = [] orig_to_tok_index = [] all_doc_tokens = [] - for (i, token) in enumerate(example.doc_tokens): + for i, token in enumerate(example.doc_tokens): orig_to_tok_index.append(len(all_doc_tokens)) sub_tokens = tokenizer.tokenize(token) for sub_token in sub_tokens: @@ -2315,8 +2419,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, # We can have documents that are longer than the maximum sequence length. # To deal with this we do a sliding window approach, where we take chunks # of the up to our max length with a stride of `doc_stride`. - _DocSpan = collections.namedtuple( # pylint: disable=invalid-name - "DocSpan", ["start", "length"]) + _DocSpan = collections.namedtuple("DocSpan", ["start", "length"]) # pylint: disable=invalid-name doc_spans = [] start_offset = 0 while start_offset < len(all_doc_tokens): @@ -2327,7 +2430,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, if start_offset + length == len(all_doc_tokens): break start_offset += min(length, doc_stride) - for (doc_span_index, doc_span) in enumerate(doc_spans): + for doc_span_index, doc_span in enumerate(doc_spans): tokens = [] token_to_orig_map = {} token_is_max_context = {} @@ -2344,8 +2447,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, split_token_index = doc_span.start + i token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) + is_max_context = _check_is_max_context(doc_spans, doc_span_index, split_token_index) token_is_max_context[len(tokens)] = is_max_context tokens.append(all_doc_tokens[split_token_index]) segment_ids.append(1) @@ -2383,13 +2485,14 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length, segment_ids=segment_ids, start_position=start_position, end_position=end_position, - is_impossible=example.is_impossible) + is_impossible=example.is_impossible, + ) # Run callback output_fn(feature) unique_id += 1 -@transform_registry(transform_type="Collect", \ - process="postprocess", framework="tensorflow") + +@transform_registry(transform_type="Collect", process="postprocess", framework="tensorflow") class CollectTransform(BaseTransform): """Postprocess the predictions, collect data.""" @@ -2408,7 +2511,7 @@ def __call__(self, sample): result_list = [np.expand_dims(result, 0) for result in all_results] for result in result_list: if len(self.unique_id) < self.length: - result = result.transpose(2,0,1) + result = result.transpose(2, 0, 1) self.unique_id.append(self.idx) self.start_logits.append(result[0]) self.end_logits.append(result[1]) @@ -2417,8 +2520,8 @@ def __call__(self, sample): self.all_sample = ([self.unique_id, self.start_logits, self.end_logits], label) return self.all_sample -@transform_registry(transform_type="SquadV1", process="postprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="SquadV1", process="postprocess", framework="tensorflow, tensorflow_itex") class TFSquadV1PostTransform(BaseTransform): """Postprocess the predictions of bert on SQuAD. @@ -2447,15 +2550,25 @@ class TFSquadV1PostTransform(BaseTransform): tuple of processed prediction and label """ - def __init__(self, label_file, vocab_file, n_best_size=20, max_seq_length=384, \ - max_query_length=64, max_answer_length=30, do_lower_case=True, doc_stride=128): + def __init__( + self, + label_file, + vocab_file, + n_best_size=20, + max_seq_length=384, + max_query_length=64, + max_answer_length=30, + do_lower_case=True, + doc_stride=128, + ): """Initialize `TFSquadV1PostTransform` class.""" from . import tokenization + self.eval_examples = read_squad_examples(label_file) - tokenizer = tokenization.FullTokenizer( - vocab_file=vocab_file, do_lower_case=do_lower_case) + tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case) self.eval_features = [] + def append_feature(feature): self.eval_features.append(feature) @@ -2465,13 +2578,13 @@ def append_feature(feature): max_seq_length=max_seq_length, doc_stride=doc_stride, max_query_length=max_query_length, - output_fn=append_feature) + output_fn=append_feature, + ) self.n_best_size = n_best_size self.max_answer_length = max_answer_length self.do_lower_case = do_lower_case - self.RawResult = collections.namedtuple("RawResult", - ["unique_id", "start_logits", "end_logits"]) + self.RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) def process_result(self, results): """Get the processed results.""" @@ -2482,7 +2595,9 @@ def process_result(self, results): self.RawResult( unique_id=int(unique_id), start_logits=[float(x) for x in start_logits.flat], - end_logits=[float(x) for x in end_logits.flat])) + end_logits=[float(x) for x in end_logits.flat], + ) + ) return processed_results @@ -2501,11 +2616,11 @@ def get_postprocess_result(self, sample): unique_id_to_result[result.unique_id] = result _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name - "PrelimPrediction", - ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) + "PrelimPrediction", ["feature_index", "start_index", "end_index", "start_logit", "end_logit"] + ) all_predictions = collections.OrderedDict() - for (example_index, example) in enumerate(self.eval_examples): + for example_index, example in enumerate(self.eval_examples): features = example_index_to_features[example_index] prelim_predictions = [] @@ -2514,9 +2629,9 @@ def get_postprocess_result(self, sample): min_null_feature_index = 0 # the paragraph slice with min mull score null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score - for (feature_index, feature) in enumerate(features): + for feature_index, feature in enumerate(features): # skip the case that is not predicted - if not feature.unique_id in unique_id_to_result: + if feature.unique_id not in unique_id_to_result: all_predictions[example.qas_id] = "*#skip this example#*" continue result = unique_id_to_result[feature.unique_id] @@ -2549,14 +2664,16 @@ def get_postprocess_result(self, sample): start_index=start_index, end_index=end_index, start_logit=result.start_logits[start_index], - end_logit=result.end_logits[end_index])) + end_logit=result.end_logits[end_index], + ) + ) prelim_predictions = sorted( - prelim_predictions, - key=lambda x: (x.start_logit + x.end_logit), - reverse=True) + prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True + ) _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name - "NbestPrediction", ["text", "start_logit", "end_logit"]) + "NbestPrediction", ["text", "start_logit", "end_logit"] + ) seen_predictions = {} nbest = [] @@ -2565,10 +2682,10 @@ def get_postprocess_result(self, sample): break feature = features[pred.feature_index] if pred.start_index > 0: # this is a non-null prediction - tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + tok_tokens = feature.tokens[pred.start_index : (pred.end_index + 1)] orig_doc_start = feature.token_to_orig_map[pred.start_index] orig_doc_end = feature.token_to_orig_map[pred.end_index] - orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + orig_tokens = example.doc_tokens[orig_doc_start : (orig_doc_end + 1)] tok_text = " ".join(tok_tokens) # De-tokenize WordPieces that have been split off. @@ -2590,16 +2707,13 @@ def get_postprocess_result(self, sample): seen_predictions[final_text] = True nbest.append( - _NbestPrediction( - text=final_text, - start_logit=pred.start_logit, - end_logit=pred.end_logit)) + _NbestPrediction(text=final_text, start_logit=pred.start_logit, end_logit=pred.end_logit) + ) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: - nbest.append( - _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) assert len(nbest) >= 1 @@ -2613,7 +2727,7 @@ def get_postprocess_result(self, sample): probs = _compute_softmax(total_scores) nbest_json = [] - for (i, entry) in enumerate(nbest): + for i, entry in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] @@ -2630,8 +2744,7 @@ def __call__(self, sample): return self.get_postprocess_result(sample) -@transform_registry(transform_type="ModelZooCollect", \ - process="postprocess", framework="tensorflow, tensorflow_itex") +@transform_registry(transform_type="ModelZooCollect", process="postprocess", framework="tensorflow, tensorflow_itex") class TFModelZooCollectTransform(CollectTransform): """Postprocess the predictions of model zoo, collect data.""" @@ -2649,20 +2762,41 @@ def __call__(self, sample): self.all_sample = ([self.unique_id, self.start_logits, self.end_logits], label) return self.all_sample -@transform_registry(transform_type="SquadV1ModelZoo", \ - process="postprocess", framework="tensorflow, \ - tensorflow_itex") + +@transform_registry( + transform_type="SquadV1ModelZoo", + process="postprocess", + framework="tensorflow, \ + tensorflow_itex", +) class TFSquadV1ModelZooPostTransform(TFSquadV1PostTransform): """Postprocess the predictions of bert on SQuADV1.1. See class TFSquadV1PostTransform for more details """ - def __init__(self, label_file, vocab_file, n_best_size=20, max_seq_length=384, \ - max_query_length=64, max_answer_length=30, do_lower_case=True, doc_stride=128): + def __init__( + self, + label_file, + vocab_file, + n_best_size=20, + max_seq_length=384, + max_query_length=64, + max_answer_length=30, + do_lower_case=True, + doc_stride=128, + ): """Initialize `TFSquadV1ModelZooPostTransform` class.""" - super().__init__(label_file, vocab_file, n_best_size, max_seq_length, \ - max_query_length, max_answer_length, do_lower_case, doc_stride) + super().__init__( + label_file, + vocab_file, + n_best_size, + max_seq_length, + max_query_length, + max_answer_length, + do_lower_case, + doc_stride, + ) self.length = len(self.eval_features) self.collect_data = TFModelZooCollectTransform(length=self.length) @@ -2671,8 +2805,8 @@ def __call__(self, sample): sample = self.collect_data(sample) return self.get_postprocess_result(sample) -@transform_registry(transform_type="ParseDecodeVoc", process="preprocess", \ - framework="tensorflow, tensorflow_itex") + +@transform_registry(transform_type="ParseDecodeVoc", process="preprocess", framework="tensorflow, tensorflow_itex") class ParseDecodeVocTransform(BaseTransform): """Parse features in Example proto. diff --git a/neural_compressor/experimental/distillation.py b/neural_compressor/experimental/distillation.py index eb267905a98..faa74334251 100644 --- a/neural_compressor/experimental/distillation.py +++ b/neural_compressor/experimental/distillation.py @@ -18,30 +18,33 @@ # limitations under the License. import copy -from .component import Component -from ..utils import logger -from ..utils.create_obj_from_config import create_dataloader, create_eval_func, create_train_func -from ..model import BaseModel -from .common import Model -from ..adaptor import FRAMEWORKS + from neural_compressor.experimental.common import Criterions, Optimizers + +from ..adaptor import FRAMEWORKS from ..conf.config import DistillationConf from ..conf.pythonic_config import Config, DotDict +from ..model import BaseModel +from ..utils import logger +from ..utils.create_obj_from_config import create_dataloader, create_eval_func, create_train_func +from .common import Model +from .component import Component + class Distillation(Component): """Distillation class derived from Component class. - - Distillation class abstracted the pipeline of knowledge distillation, + + Distillation class abstracted the pipeline of knowledge distillation, transfer the knowledge of the teacher model to the student model. - + Args: conf_fname_or_obj (string or obj): The path to the YAML configuration file or Distillation_Conf containing accuracy goal, distillation objective and related dataloaders etc. - + Attributes: _epoch_ran: A integer indicating how much epochs ran. - eval_frequency: The frequency for doing evaluation of the student model + eval_frequency: The frequency for doing evaluation of the student model in terms of epoch. best_score: The best metric of the student model in the training. best_model: The best student model found in the training. @@ -71,20 +74,17 @@ def __init__(self, conf_fname_or_obj=None): def _on_train_begin(self, dataloader=None): """Operations called on the begining of the training. - Called before training, evaluate the teacher model and the student model. + Called before training, evaluate the teacher model and the student model. """ - assert self._model, 'student_model must be set.' + assert self._model, "student_model must be set." if self._eval_func is not None: if self.teacher_model: score = self._eval_func( - self.teacher_model if getattr(self._eval_func, 'builtin', None) - else self.teacher_model.model + self.teacher_model if getattr(self._eval_func, "builtin", None) else self.teacher_model.model ) logger.info("teacher model score is {}.".format(str(score))) - score = self._eval_func( - self._model if getattr(self._eval_func, 'builtin', None) else self._model.model - ) + score = self._eval_func(self._model if getattr(self._eval_func, "builtin", None) else self._model.model) logger.info("initial model score is {}.".format(str(score))) if self.eval_frequency > 0: self.best_score = score @@ -95,7 +95,7 @@ def _on_train_begin(self, dataloader=None): def _on_step_begin(self, batch_id): """Operations called on the beginning of batches.""" - if self.criterion is not None and hasattr(self.criterion, 'clear_features'): + if self.criterion is not None and hasattr(self.criterion, "clear_features"): self.criterion.clear_features() def _on_after_compute_loss(self, input, student_output, student_loss, teacher_output=None): @@ -112,13 +112,10 @@ def _on_after_compute_loss(self, input, student_output, student_loss, teacher_ou """ if self.criterion is None: self.create_criterion() - assert self.criterion, \ - 'criterion must be set in yaml config file.' + assert self.criterion, "criterion must be set in yaml config file." if teacher_output is None: - assert self.teacher_model, 'teacher_model must be set.' - teacher_output = self.criterion.teacher_model_forward( - input, teacher_model=self.teacher_model._model - ) + assert self.teacher_model, "teacher_model must be set." + teacher_output = self.criterion.teacher_model_forward(input, teacher_model=self.teacher_model._model) return self.criterion.loss_cal_sloss(student_output, teacher_output, student_loss) def on_post_forward(self, input, teacher_output=None): # pragma: no cover @@ -126,24 +123,24 @@ def on_post_forward(self, input, teacher_output=None): # pragma: no cover Deprecated. """ - assert False, "This method is deprecated. please use `on_after_compute_loss` instead." \ - "on_after_compute_loss(input, student_output, student_loss, teacher_output=None)" + assert False, ( + "This method is deprecated. please use `on_after_compute_loss` instead." + "on_after_compute_loss(input, student_output, student_loss, teacher_output=None)" + ) def _on_epoch_end(self): """Operations called on the end of every epochs. - Called on the end of every epochs, evaluate the student model + Called on the end of every epochs, evaluate the student model and record the best one regularly. """ self._epoch_ran += 1 - if self._eval_func is not None and self.eval_frequency > 0 and \ - self._epoch_ran % self.eval_frequency == 0: - score = self._eval_func( - self._model if getattr(self._eval_func, 'builtin', None) else self._model.model - ) + if self._eval_func is not None and self.eval_frequency > 0 and self._epoch_ran % self.eval_frequency == 0: + score = self._eval_func(self._model if getattr(self._eval_func, "builtin", None) else self._model.model) logger.info("model score of epoch {} is {}.".format(self._epoch_ran, str(score))) - if (isinstance(score, list) and all([s > b_s for s, b_s in - zip(score, self.best_score)])) or score > self.best_score: + if ( + isinstance(score, list) and all([s > b_s for s, b_s in zip(score, self.best_score)]) + ) or score > self.best_score: self.best_score = score if self.framework == "pytorch": self.best_model = copy.deepcopy(self._model) @@ -155,30 +152,34 @@ def init_train_cfg(self): if self._train_cfg is None: # train section of distillation section in yaml file should be configured. self._train_cfg = self.cfg.distillation.train - assert self._train_cfg, "train field of distillation section in yaml file must " \ - "be configured for distillation if train_func is NOT set." + assert self._train_cfg, ( + "train field of distillation section in yaml file must " + "be configured for distillation if train_func is NOT set." + ) def create_criterion(self): """Create the criterion for training.""" self.init_train_cfg() if self.criterion is None: - assert 'criterion' in self._train_cfg.keys(), \ - "criterion part in train field of distillation section in yaml file " \ + assert "criterion" in self._train_cfg.keys(), ( + "criterion part in train field of distillation section in yaml file " "must be configured for distillation if criterion is NOT set." - + ) + if isinstance(self._train_cfg.criterion, DotDict): criterion_cfg = self._train_cfg.criterion else: criterion_cfg = self._train_cfg.criterion.config - assert len(criterion_cfg) == 1, "There must be exactly one loss in " \ - "criterion part, instead got {} loss.".format(len(criterion_cfg)) + assert ( + len(criterion_cfg) == 1 + ), "There must be exactly one loss in " "criterion part, instead got {} loss.".format(len(criterion_cfg)) loss = [i for i in criterion_cfg.keys()][0] loss_cfg = criterion_cfg[loss] criterion_builder = Criterions(self.framework)[loss](loss_cfg) criterion_tuple = criterion_builder() if self.teacher_model and self.student_model: - if self.framework == 'tensorflow': # new, for tf + if self.framework == "tensorflow": # new, for tf teacher_model = self.teacher_model._model student_model = self.student_model._model else: # for pytorch and other frameworks @@ -188,8 +189,7 @@ def create_criterion(self): criterion_tuple[1]["teacher_model"] = teacher_model self.criterion = criterion_tuple[0](**criterion_tuple[1]) else: - logger.warning("Use user defined criterion, " - "ignoring the criterion setting in yaml file.") + logger.warning("Use user defined criterion, " "ignoring the criterion setting in yaml file.") self._train_cfg.criterion = self.criterion @@ -197,25 +197,27 @@ def create_optimizer(self): """Create the optimizer for training.""" self.init_train_cfg() if self.optimizer is None: - assert 'optimizer' in self._train_cfg.keys(), \ - "optimizer part in train field of distillation section in yaml file " \ + assert "optimizer" in self._train_cfg.keys(), ( + "optimizer part in train field of distillation section in yaml file " "must be configured for distillation if optimizer is NOT set." + ) optimizer_cfg = self._train_cfg.optimizer - assert len(optimizer_cfg) == 1, "There must be exactly one optimizer in " \ - "optimizer part, instead got {} optimizer.".format(len(optimizer_cfg)) + assert ( + len(optimizer_cfg) == 1 + ), "There must be exactly one optimizer in " "optimizer part, instead got {} optimizer.".format( + len(optimizer_cfg) + ) optimizer_name = list(optimizer_cfg.keys())[0] optimizer_cfg_ = optimizer_cfg[optimizer_name] optimizer_builder = Optimizers(self.framework)[optimizer_name](optimizer_cfg_) optimizer_tuple = optimizer_builder() - if self.framework == 'tensorflow': + if self.framework == "tensorflow": self.optimizer = optimizer_tuple[0](**optimizer_tuple[1]) - elif self.framework == 'pytorch': + elif self.framework == "pytorch": # pylint: disable=no-member - self.optimizer = optimizer_tuple[0](self.model.model.parameters(), - **optimizer_tuple[1]) + self.optimizer = optimizer_tuple[0](self.model.model.parameters(), **optimizer_tuple[1]) else: - logger.warning("Use user defined optimizer, " - "ignoring the optimizer setting in yaml file.") + logger.warning("Use user defined optimizer, " "ignoring the optimizer setting in yaml file.") self._train_cfg.optimizer = self.optimizer @@ -226,38 +228,47 @@ def prepare(self): def pre_process(self): """Preprocessing before the disillation pipeline. - - Initialize necessary parts for distillation pipeline. - """ - framework_specific_info = {'device': self.cfg.device, - 'random_seed': self.cfg.tuning.random_seed, - 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None, - 'format': 'default', - 'backend': 'default'} - if self.framework == 'tensorflow': - framework_specific_info.update( - {"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) + Initialize necessary parts for distillation pipeline. + """ + framework_specific_info = { + "device": self.cfg.device, + "random_seed": self.cfg.tuning.random_seed, + "workspace_path": self.cfg.tuning.workspace.path, + "q_dataloader": None, + "format": "default", + "backend": "default", + } + + if self.framework == "tensorflow": + framework_specific_info.update({"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) self.generate_hooks() - assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for distillation....' + assert isinstance(self._model, BaseModel), "need set neural_compressor Model for distillation...." - if self._train_dataloader is None and self._train_func is None and \ - self.cfg.distillation.train.dataloader is not None: + if ( + self._train_dataloader is None + and self._train_func is None + and self.cfg.distillation.train.dataloader is not None + ): train_dataloader_cfg = self.cfg.distillation.train.dataloader self._train_dataloader = create_dataloader(self.framework, train_dataloader_cfg) - if self.cfg.evaluation and self.cfg.evaluation.accuracy and \ - self.cfg.evaluation.accuracy.dataloader and \ - self._eval_dataloader is None and self._eval_func is None: + if ( + self.cfg.evaluation + and self.cfg.evaluation.accuracy + and self.cfg.evaluation.accuracy.dataloader + and self._eval_dataloader is None + and self._eval_func is None + ): eval_dataloader_cfg = self.cfg.evaluation.accuracy.dataloader - assert eval_dataloader_cfg is not None, \ - 'dataloader field of evaluation ' \ - 'in yaml file should be configured as eval_dataloader property is NOT set!' + assert eval_dataloader_cfg is not None, ( + "dataloader field of evaluation " + "in yaml file should be configured as eval_dataloader property is NOT set!" + ) self._eval_dataloader = create_dataloader(self.framework, eval_dataloader_cfg) @@ -266,44 +277,42 @@ def pre_process(self): self.create_criterion() self.create_optimizer() if self._train_dataloader is not None: - self._train_func = create_train_func(self.framework, \ - self.train_dataloader, \ - self.adaptor, \ - self._train_cfg, \ - hooks=self.hooks) + self._train_func = create_train_func( + self.framework, self.train_dataloader, self.adaptor, self._train_cfg, hooks=self.hooks + ) if self.cfg.evaluation and self.eval_dataloader and self._eval_func is None: # eval section in yaml file should be configured. eval_cfg = self.cfg.evaluation - assert eval_cfg, "eval field of distillation section in yaml file must " \ - "be configured for distillation if eval_func is NOT set." - self._eval_func = create_eval_func(self.framework, - self.eval_dataloader, - self.adaptor, - eval_cfg.accuracy.metric, - eval_cfg.accuracy.postprocess, - fp32_baseline=False) + assert eval_cfg, ( + "eval field of distillation section in yaml file must " + "be configured for distillation if eval_func is NOT set." + ) + self._eval_func = create_eval_func( + self.framework, + self.eval_dataloader, + self.adaptor, + eval_cfg.accuracy.metric, + eval_cfg.accuracy.postprocess, + fp32_baseline=False, + ) def execute(self): """Do distillation pipeline. - First train the student model with the teacher model, after training, + First train the student model with the teacher model, after training, evaluating the best student model if any. Returns: Best distilled model found. """ - self._train_func( - self._model if getattr(self._train_func, 'builtin', None) else self._model.model - ) - if self.criterion is not None and hasattr(self.criterion, 'remove_all_hooks'): + self._train_func(self._model if getattr(self._train_func, "builtin", None) else self._model.model) + if self.criterion is not None and hasattr(self.criterion, "remove_all_hooks"): self.criterion.remove_all_hooks() logger.info("Model distillation is done.") if self._eval_func is not None: logger.info("Start to evaluate the distilled model.") self._model = self.best_model if self.best_model else self._model - score = self._eval_func( - self._model if getattr(self._eval_func, 'builtin', None) else self._model.model - ) + score = self._eval_func(self._model if getattr(self._eval_func, "builtin", None) else self._model.model) logger.info("distilled model score is {}.".format(str(score))) return self._model @@ -313,10 +322,10 @@ def generate_hooks(self): Register necessary hooks for distillation pipeline. """ - self.register_hook('on_train_begin', self._on_train_begin) - self.register_hook('on_step_begin', self._on_step_begin) - self.register_hook('on_after_compute_loss', self._on_after_compute_loss) - self.register_hook('on_epoch_end', self._on_epoch_end) + self.register_hook("on_train_begin", self._on_train_begin) + self.register_hook("on_step_begin", self._on_step_begin) + self.register_hook("on_after_compute_loss", self._on_after_compute_loss) + self.register_hook("on_epoch_end", self._on_epoch_end) def __call__(self): """Do distillation workflow. @@ -339,7 +348,7 @@ def __call__(self): After that, User specifies fp32 "model", training dataset "train_dataloader" and evaluation dataset "eval_dataloader". - For this usage, student_model, teacher_model, train_dataloader and eval_dataloader + For this usage, student_model, teacher_model, train_dataloader and eval_dataloader parameters are mandatory. c) Partial yaml configuration: User specifies dataloaders used in training phase @@ -350,12 +359,11 @@ def __call__(self): The "eval_func" tells the tuner whether the distilled model meets the accuracy criteria. If not, the Tuner starts a new training and tuning flow. - For this usage, student_model, teacher_model, train_dataloader and eval_func + For this usage, student_model, teacher_model, train_dataloader and eval_func parameters are mandatory. Returns: distilled model: best distilled model found, otherwise return None - """ return super(Distillation, self).__call__() @@ -373,8 +381,8 @@ def criterion(self): @criterion.setter def criterion(self, user_criterion): """Setter of criterion used in the distillation process. - - Set the user defined criterion. When using built-in train_func, user can + + Set the user defined criterion. When using built-in train_func, user can specify the customized criterion through this setter. Args: @@ -394,8 +402,8 @@ def optimizer(self): @optimizer.setter def optimizer(self, user_optimizer): """Setter of optimizer used in the distillation process. - - Set the user defined optimizer. When using built-in train_func, user can + + Set the user defined optimizer. When using built-in train_func, user can specify the customized optimizer through this setter. Args: @@ -428,7 +436,6 @@ def teacher_model(self, user_model): Another corner case is slim model of tensorflow, be careful of the name of model configured in yaml file, make sure the name is in supported slim model list. - """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") @@ -461,7 +468,6 @@ def student_model(self, user_model): Another corner case is slim model of tensorflow, be careful of the name of model configured in yaml file, make sure the name is in supported slim model list. - """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") @@ -500,4 +506,4 @@ def train_distributed(self, distributed): def __repr__(self): """Class representation.""" - return 'Distillation' + return "Distillation" diff --git a/neural_compressor/experimental/export/__init__.py b/neural_compressor/experimental/export/__init__.py index 53da29c9a4c..56c3604ffe8 100644 --- a/neural_compressor/experimental/export/__init__.py +++ b/neural_compressor/experimental/export/__init__.py @@ -14,8 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - """Intel Neural Compressor Export.""" from .torch2onnx import torch_to_fp32_onnx, torch_to_int8_onnx diff --git a/neural_compressor/experimental/export/qlinear2qdq.py b/neural_compressor/experimental/export/qlinear2qdq.py index 10c0b74d7ef..b77778c901e 100644 --- a/neural_compressor/experimental/export/qlinear2qdq.py +++ b/neural_compressor/experimental/export/qlinear2qdq.py @@ -14,14 +14,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Helper functions to export onnx model from QLinearops to QDQ.""" -from neural_compressor.utils import logger from neural_compressor.adaptor.ox_utils.util import find_by_name +from neural_compressor.utils import logger from neural_compressor.utils.utility import LazyImport -numpy_helper = LazyImport('onnx.numpy_helper') +numpy_helper = LazyImport("onnx.numpy_helper") + def check_model(model): """Check optype for input model. @@ -32,16 +32,15 @@ def check_model(model): has_integerop = False has_qlinearop = False for node in model.graph.node: - if node.op_type.endswith('Integer'): + if node.op_type.endswith("Integer"): has_integerop = True - elif node.op_type.startswith('QLinear'): + elif node.op_type.startswith("QLinear"): has_qlinearop = True - elif node.op_type in ['QAttention', 'QGemm', 'QEmbedLayerNormalization']: + elif node.op_type in ["QAttention", "QGemm", "QEmbedLayerNormalization"]: has_qlinearop = True - elif node.op_type in ['Gather']: + elif node.op_type in ["Gather"]: input_data = find_by_name(node.input[0], model.graph.initializer) - if input_data is not None and \ - numpy_helper.to_array(input_data).dtype in ['int8', 'uint8']: + if input_data is not None and numpy_helper.to_array(input_data).dtype in ["int8", "uint8"]: has_qlinearop = True if has_integerop: logger.info("This model has Integer ops, these ops will be skipped.") @@ -51,6 +50,7 @@ def check_model(model): logger.info("This model has no QLinear ops, save the original model.") return False + def onnx_qlinear_to_qdq( model, input_name_to_nodes, @@ -59,9 +59,10 @@ def onnx_qlinear_to_qdq( Args: model (ModelProto): int8 onnx model. - input_name_to_nodes (dict): the mapping of tensor name and its destination nodes. + input_name_to_nodes (dict): the mapping of tensor name and its destination nodes. """ from neural_compressor.adaptor.ox_utils.operators import QOPERATORS + add_nodes = [] remove_nodes = [] inits = [] @@ -73,10 +74,7 @@ def onnx_qlinear_to_qdq( children = [] for out in node.output: children.extend(input_name_to_nodes[node.output[0]]) - converter = QOPERATORS[node.op_type]( - node, - children, - model.graph.initializer) + converter = QOPERATORS[node.op_type](node, children, model.graph.initializer) done, add_node, init = converter.convert() if done: add_nodes.extend(add_node) diff --git a/neural_compressor/experimental/export/tf2onnx.py b/neural_compressor/experimental/export/tf2onnx.py index e9215fa7d24..8e0bf77b8e5 100644 --- a/neural_compressor/experimental/export/tf2onnx.py +++ b/neural_compressor/experimental/export/tf2onnx.py @@ -16,11 +16,13 @@ # limitations under the License. """Helper functions to export model from TensorFlow to ONNX.""" +import re + from neural_compressor.utils import logger from neural_compressor.utils.utility import LazyImport -import re -t2o = LazyImport('tf2onnx') +t2o = LazyImport("tf2onnx") + def _split_nodename_and_shape(name): """Split input name with shape into name and shape.""" @@ -33,23 +35,17 @@ def _split_nodename_and_shape(name): name_pattern = r"(?:([\w\d/\-\._:]+)(\[[\-\d,]+\])?),?" splits = re.split(name_pattern, name) for i in range(1, len(splits), 3): - inputs.append(splits[i]+':0') + inputs.append(splits[i] + ":0") if splits[i + 1] is not None: shape = [int(n) for n in splits[i + 1][1:-1].split(",")] shape = [n if n >= 0 else None for n in shape] - shapes[splits[i]+':0'] = shape + shapes[splits[i] + ":0"] = shape if not shapes: shapes = None return inputs, shapes -def tf_to_fp32_onnx( - graph_def, - save_path, - opset_version=14, - input_names=None, - output_names=None, - inputs_as_nchw=None -): + +def tf_to_fp32_onnx(graph_def, save_path, opset_version=14, input_names=None, output_names=None, inputs_as_nchw=None): """Export FP32 Tensorflow model into FP32 ONNX model using tf2onnx tool. Args: @@ -64,24 +60,25 @@ def tf_to_fp32_onnx( if isinstance(input_names, str): input_names, shape_override = _split_nodename_and_shape(input_names) else: - input_names[:] = [o+":0" for o in input_names] - output_names[:] = [o+":0" for o in output_names] - t2o.convert.from_graph_def(graph_def=graph_def, input_names=input_names, - output_names=output_names, inputs_as_nchw=inputs_as_nchw, - shape_override=shape_override, opset=opset_version, output_path=save_path) + input_names[:] = [o + ":0" for o in input_names] + output_names[:] = [o + ":0" for o in output_names] + t2o.convert.from_graph_def( + graph_def=graph_def, + input_names=input_names, + output_names=output_names, + inputs_as_nchw=inputs_as_nchw, + shape_override=shape_override, + opset=opset_version, + output_path=save_path, + ) info = "The FP32 ONNX Model exported to path: {0}".format(save_path) - logger.info("*"*len(info)) + logger.info("*" * len(info)) logger.info(info) - logger.info("*"*len(info)) + logger.info("*" * len(info)) def tf_to_int8_onnx( - int8_model, - save_path, - opset_version: int = 14, - input_names=None, - output_names=None, - inputs_as_nchw=None + int8_model, save_path, opset_version: int = 14, input_names=None, output_names=None, inputs_as_nchw=None ): """Export INT8 Tensorflow model into INT8 ONNX model. @@ -97,21 +94,25 @@ def tf_to_int8_onnx( if isinstance(input_names, str): input_names, shape_override = _split_nodename_and_shape(input_names) else: - input_names[:] = [o+":0" for o in input_names] - output_names[:] = [o+":0" for o in output_names] + input_names[:] = [o + ":0" for o in input_names] + output_names[:] = [o + ":0" for o in output_names] onnx_convert_graph = "./converted_graph.onnx" from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter - TensorflowQDQToOnnxQDQConverter(int8_model, input_names, \ - output_names, shape_override, inputs_as_nchw, opset_version).convert(onnx_convert_graph) + + TensorflowQDQToOnnxQDQConverter( + int8_model, input_names, output_names, shape_override, inputs_as_nchw, opset_version + ).convert(onnx_convert_graph) import onnxruntime as ort + sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL sess_options.optimized_model_filepath = save_path import onnx + model = onnx.load(onnx_convert_graph) ort.InferenceSession(model.SerializeToString(), sess_options) info = "The INT8 ONNX Model is exported to path: {0}".format(save_path) - logger.info("*"*len(info)) + logger.info("*" * len(info)) logger.info(info) - logger.info("*"*len(info)) + logger.info("*" * len(info)) diff --git a/neural_compressor/experimental/export/torch2onnx.py b/neural_compressor/experimental/export/torch2onnx.py index ffa4aff7963..0e76c49070b 100644 --- a/neural_compressor/experimental/export/torch2onnx.py +++ b/neural_compressor/experimental/export/torch2onnx.py @@ -14,21 +14,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Helper functions to export model from PyTorch/TensorFlow to ONNX.""" import os import sys -import numpy as np from collections import UserDict + +import numpy as np + from neural_compressor.adaptor.torch_utils.util import input2tuple from neural_compressor.utils import logger from neural_compressor.utils.utility import LazyImport -torch = LazyImport('torch') -onnx = LazyImport('onnx') -ort = LazyImport('onnxruntime') -ortq = LazyImport('onnxruntime.quantization') +torch = LazyImport("torch") +onnx = LazyImport("onnx") +ort = LazyImport("onnxruntime") +ortq = LazyImport("onnxruntime.quantization") + def _prepare_inputs(pt_model, input_names, example_inputs): """Prepare input_names and example_inputs.""" @@ -39,8 +41,9 @@ def _prepare_inputs(pt_model, input_names, example_inputs): # match input_names with inspected input_order, especailly for bert in hugginface. elif input_names and len(input_names) > 1: import inspect + input_order = inspect.signature(pt_model.forward).parameters.keys() - flag = [name in input_order for name in input_names] # whether should be checked + flag = [name in input_order for name in input_names] # whether should be checked if all(flag): new_input_names = [] new_example_inputs = [] @@ -54,6 +57,7 @@ def _prepare_inputs(pt_model, input_names, example_inputs): example_inputs = input2tuple(example_inputs) return input_names, example_inputs + def get_node_mapping( fp32_model, fp32_onnx_path, @@ -67,27 +71,30 @@ def get_node_mapping( Returns: module_node_mapping: op mapping from PyTorch to ONNX. """ + def check_data(op_type, data, module_dict): for name, value in module_dict.items(): if value.shape == data.shape: if (value == data).all(): module_dict.pop(name) return name - elif op_type == 'Conv': + elif op_type == "Conv": # Convolution weight data have fluction and BN fusion will insert scale. # We use the weight scale of the first output channel to check. weight_scale = value[0] / data[0] - if np.allclose(weight_scale - np.mean(weight_scale), 0, atol=1.e-5): + if np.allclose(weight_scale - np.mean(weight_scale), 0, atol=1.0e-5): module_dict.pop(name) return name return None module_dict = {} for name, module in fp32_model.named_modules(): - if 'Conv' in str(module.__class__.__name__) or \ - 'Embedding' in str(module.__class__.__name__) or \ - 'Linear' in str(module.__class__.__name__): - if hasattr(module, 'weight'): + if ( + "Conv" in str(module.__class__.__name__) + or "Embedding" in str(module.__class__.__name__) + or "Linear" in str(module.__class__.__name__) + ): + if hasattr(module, "weight"): value = module.weight.detach().cpu().numpy() module_dict[name] = value @@ -95,13 +102,14 @@ def check_data(op_type, data, module_dict): fp32_onnx_model = onnx.load(fp32_onnx_path) initializer_data = {tensor.name: tensor for tensor in fp32_onnx_model.graph.initializer} from onnx import numpy_helper + for node in fp32_onnx_model.graph.node: if node.op_type in op_types_to_quantize: - if node.op_type == 'MatMul' and node.input[1] in initializer_data: + if node.op_type == "MatMul" and node.input[1] in initializer_data: data = numpy_helper.to_array(initializer_data[node.input[1]]).T - elif node.op_type == 'Gather' and node.input[0] in initializer_data: + elif node.op_type == "Gather" and node.input[0] in initializer_data: data = numpy_helper.to_array(initializer_data[node.input[0]]) - elif node.op_type in ['Conv', 'Gemm']: + elif node.op_type in ["Conv", "Gemm"]: data = numpy_helper.to_array(initializer_data[node.input[1]]) else: continue @@ -110,10 +118,8 @@ def check_data(op_type, data, module_dict): module_node_mapping[pt_name] = node.name return module_node_mapping -def get_quantizable_onnx_ops( - int8_model, - module_node_mapping -): + +def get_quantizable_onnx_ops(int8_model, module_node_mapping): """Get quantizable onnx ops. Args: @@ -125,16 +131,19 @@ def get_quantizable_onnx_ops( """ quantize_nodes = [] for name, module in int8_model.named_modules(): - if 'Conv' in str(module.__class__.__name__) or \ - 'Embedding' in str(module.__class__.__name__) or \ - 'Linear' in str(module.__class__.__name__): - if hasattr(module, 'weight') and callable(module.weight): + if ( + "Conv" in str(module.__class__.__name__) + or "Embedding" in str(module.__class__.__name__) + or "Linear" in str(module.__class__.__name__) + ): + if hasattr(module, "weight") and callable(module.weight): if module.weight().dtype in [torch.qint8, torch.quint8]: - if name.split('.module')[0] in module_node_mapping: - node = module_node_mapping[name.split('.module')[0]] + if name.split(".module")[0] in module_node_mapping: + node = module_node_mapping[name.split(".module")[0]] quantize_nodes.append(node) return quantize_nodes + def dynamic_quant_export( pt_fp32_model, pt_int8_model, @@ -156,18 +165,18 @@ def dynamic_quant_export( example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model. q_config (dict): containing quantization configuration. opset_version (int, optional): opset version. Defaults to 14. - dynamic_axes (dict, optional): dynamic axes. Defaults to + dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, "output": {0: "batch_size"}}. input_names (dict, optional): input names. Defaults to None. output_names (dict, optional): output names. Defaults to None. - weight_type (str, optional): data types of weight of ONNX model + weight_type (str, optional): data types of weight of ONNX model (only needed for exporting dynamic quantized model). Defaults to 'S8'. """ global op_types_to_quantize - op_types_to_quantize=['MatMul', 'Gemm', 'Gather'] + op_types_to_quantize = ["MatMul", "Gemm", "Gather"] # pylint: disable=E1101 - fp32_onnx_path = save_path + '.tmp' if save_path else 'int8-model.onnx.tmp' + fp32_onnx_path = save_path + ".tmp" if save_path else "int8-model.onnx.tmp" torch_to_fp32_onnx( pt_fp32_model, fp32_onnx_path, @@ -182,19 +191,18 @@ def dynamic_quant_export( module_node_mapping = get_node_mapping(pt_fp32_model, fp32_onnx_path) quantize_nodes = get_quantizable_onnx_ops(pt_int8_model, module_node_mapping) - REDUCE_RANGE = q_config['reduce_range'] + REDUCE_RANGE = q_config["reduce_range"] if REDUCE_RANGE: logger.info("Reduce range is {}".format(str(REDUCE_RANGE))) logger.info("Quantization format is not avalible when executing dynamic quantization.") - if weight_type.upper() == 'S8': + if weight_type.upper() == "S8": weight_type = ortq.QuantType.QInt8 - elif weight_type.upper() == 'U8': + elif weight_type.upper() == "U8": weight_type = ortq.QuantType.QUInt8 else: - assert False, "Right now, we don't support weight type: {}, " \ - "please use S8/U8.".format(weight_type) + assert False, "Right now, we don't support weight type: {}, " "please use S8/U8.".format(weight_type) ortq.quantize_dynamic( fp32_onnx_path, @@ -204,11 +212,12 @@ def dynamic_quant_export( weight_type=weight_type, nodes_to_quantize=quantize_nodes, nodes_to_exclude=[], - extra_options={} + extra_options={}, ) os.remove(fp32_onnx_path) + def static_quant_export( pt_int8_model, save_path, @@ -228,7 +237,7 @@ def static_quant_export( example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model. q_config (dict): containing quantization configuration. opset_version (int, optional): opset version. Defaults to 14. - dynamic_axes (dict, optional): dynamic axes. Defaults to + dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, "output": {0: "batch_size"}}. input_names (dict, optional): input names. Defaults to None. output_names (dict, optional): output names. Defaults to None. @@ -245,7 +254,9 @@ def wrapper(*args, **kwargs): return tuple(v for v in output.values() if v is not None) else: return output + return wrapper + pt_int8_model.forward = model_wrapper(pt_int8_model.forward) with torch.no_grad(): @@ -253,38 +264,45 @@ def wrapper(*args, **kwargs): torch.onnx.export( pt_int8_model, input2tuple(example_inputs), - save_path, + save_path, opset_version=opset_version, input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, - ) + ) except TypeError: - config_name = "QuantizationAwareTrainingConfig" \ - if q_config['approach'] == "quant_aware_training" else "PostTrainingQuantConfig" - logger.error("Export failed, possibly because unsupported quantized ops. Check " - "neural-compressor/docs/source/export.md#supported-quantized-ops " - "for supported ops.") - logger.error("Please fallback unsupported quantized ops by setting 'op_type_dict' or " - "'op_name_dict' in '{}' config. ".format(config_name)) + config_name = ( + "QuantizationAwareTrainingConfig" + if q_config["approach"] == "quant_aware_training" + else "PostTrainingQuantConfig" + ) + logger.error( + "Export failed, possibly because unsupported quantized ops. Check " + "neural-compressor/docs/source/export.md#supported-quantized-ops " + "for supported ops." + ) + logger.error( + "Please fallback unsupported quantized ops by setting 'op_type_dict' or " + "'op_name_dict' in '{}' config. ".format(config_name) + ) exit(0) except Exception as e: logger.error(e) exit(0) - + if quant_format != "QDQ": sess_options = ort.SessionOptions() - sess_options.graph_optimization_level=ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED - sess_options.optimized_model_filepath=save_path + sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED + sess_options.optimized_model_filepath = save_path ort.InferenceSession(save_path, sess_options) - + + def torch_to_fp32_onnx( pt_fp32_model, save_path, example_inputs, opset_version=14, - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, input_names=None, output_names=None, do_constant_folding=True, @@ -297,7 +315,7 @@ def torch_to_fp32_onnx( save_path (str): save path of ONNX model. example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model. opset_version (int, optional): opset version. Defaults to 14. - dynamic_axes (dict, optional): dynamic axes. Defaults to + dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, "output": {0: "batch_size"}}. input_names (dict, optional): input names. Defaults to None. output_names (dict, optional): output names. Defaults to None. @@ -305,28 +323,31 @@ def torch_to_fp32_onnx( verbose (bool, optional): dump verbose or not. Defaults to True. """ from neural_compressor.utils.pytorch import is_int8_model - assert is_int8_model(pt_fp32_model) == False, "The fp32 model is replaced during quantization. " + \ - "please customize a eval_func when quantizing, if not, such as `lambda x: 1`." - + + assert is_int8_model(pt_fp32_model) is False, ( + "The fp32 model is replaced during quantization. " + + "please customize a eval_func when quantizing, if not, such as `lambda x: 1`." + ) + input_names, example_inputs = _prepare_inputs(pt_fp32_model, input_names, example_inputs) with torch.no_grad(): torch.onnx.export( pt_fp32_model, example_inputs, - save_path, + save_path, opset_version=opset_version, input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, do_constant_folding=do_constant_folding, - ) - + ) + if verbose: info = "The FP32 ONNX Model exported to path: {0}".format(save_path) - logger.info("*"*len(info)) + logger.info("*" * len(info)) logger.info(info) - logger.info("*"*len(info)) + logger.info("*" * len(info)) def torch_to_int8_onnx( @@ -336,12 +357,11 @@ def torch_to_int8_onnx( example_inputs, q_config, opset_version=14, - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, input_names=None, output_names=None, - quant_format: str = 'QDQ', - weight_type: str = 'S8', + quant_format: str = "QDQ", + weight_type: str = "S8", verbose=True, ): """Export INT8 PyTorch model into INT8 ONNX model. @@ -353,40 +373,62 @@ def torch_to_int8_onnx( example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model. q_config (dict): containing quantization configuration. opset_version (int, optional): opset version. Defaults to 14. - dynamic_axes (dict, optional): dynamic axes. Defaults to + dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, "output": {0: "batch_size"}}. input_names (dict, optional): input names. Defaults to None. output_names (dict, optional): output names. Defaults to None. quant_format (str, optional): _quantization format of ONNX model. Defaults to 'QDQ'. - weight_type (str, optional): data types of weight of ONNX model + weight_type (str, optional): data types of weight of ONNX model (only needed for exporting dynamic quantized model). Defaults to 'S8'. verbose (bool, optional): dump verbose or not. Defaults to True. """ from neural_compressor.utils.pytorch import is_int8_model - assert is_int8_model(pt_int8_model), "The exported model is not INT8 model, "\ - "please reset 'dtype' to 'FP32' or check your model." - - assert not q_config is None, "'q_config' is needed when export an INT8 model." + + assert is_int8_model(pt_int8_model), ( + "The exported model is not INT8 model, " "please reset 'dtype' to 'FP32' or check your model." + ) + + assert q_config is not None, "'q_config' is needed when export an INT8 model." quant_format = quant_format.upper() - if quant_format == 'QDQ' and opset_version < 13: # pragma: no cover + if quant_format == "QDQ" and opset_version < 13: # pragma: no cover opset_version = 13 - logger.warning("QDQ format requires opset_version >= 13, " + - "we reset opset_version={} here".format(opset_version)) + logger.warning( + "QDQ format requires opset_version >= 13, " + "we reset opset_version={} here".format(opset_version) + ) - if q_config['approach'] == 'post_training_dynamic_quant': + if q_config["approach"] == "post_training_dynamic_quant": # dynamic quantization export follow these steps: - # "1. export FP32 PyTorch model to FP32 ONNX model. " + # "1. export FP32 PyTorch model to FP32 ONNX model. " # "2. use FP32 ONNX model as the input model for post training dynamic quantization." # TODO: will be removed once torch supports dynamic quantization export - dynamic_quant_export(pt_fp32_model, pt_int8_model, save_path, example_inputs, q_config, - opset_version, dynamic_axes, input_names, output_names, weight_type) + dynamic_quant_export( + pt_fp32_model, + pt_int8_model, + save_path, + example_inputs, + q_config, + opset_version, + dynamic_axes, + input_names, + output_names, + weight_type, + ) else: - static_quant_export(pt_int8_model, save_path, example_inputs, q_config, opset_version, - dynamic_axes, input_names, output_names, quant_format) - + static_quant_export( + pt_int8_model, + save_path, + example_inputs, + q_config, + opset_version, + dynamic_axes, + input_names, + output_names, + quant_format, + ) + if verbose: info = "The INT8 ONNX Model exported to path: {0}".format(save_path) - logger.info("*"*len(info)) + logger.info("*" * len(info)) logger.info(info) - logger.info("*"*len(info)) + logger.info("*" * len(info)) diff --git a/neural_compressor/experimental/graph_optimization.py b/neural_compressor/experimental/graph_optimization.py index a47a221c02d..ab885b12608 100644 --- a/neural_compressor/experimental/graph_optimization.py +++ b/neural_compressor/experimental/graph_optimization.py @@ -19,23 +19,26 @@ import os import pickle import random -import tempfile import sys +import tempfile + import numpy as np import yaml + from ..conf.config import Graph_Optimization_Conf -from ..conf.dotdict import deep_get, deep_set, DotDict -from .strategy import EXP_STRATEGIES +from ..conf.dotdict import DotDict, deep_get, deep_set +from ..model import BaseModel +from ..model.model import get_model_fwk_name from ..utils import logger from ..utils.create_obj_from_config import create_dataloader from ..utils.utility import CpuInfo, time_limit from .common import Model as NCModel -from ..model import BaseModel -from ..model.model import get_model_fwk_name +from .strategy import EXP_STRATEGIES -class Graph_Optimization(): + +class Graph_Optimization: """Graph_Optimization class. - + automatically searches for optimal quantization recipes for low precision model inference, achieving best tuning objectives like inference performance within accuracy loss constraints. @@ -59,7 +62,7 @@ def __init__(self, conf_fname_or_obj=None): self._model = None self._eval_dataloader = None self._eval_func = None - self._precisions = 'fp32' + self._precisions = "fp32" self._input = [] self._output = [] self.conf = None @@ -69,10 +72,10 @@ def __init__(self, conf_fname_or_obj=None): else: self.conf = Graph_Optimization_Conf(conf_fname_or_obj) cfg = self.conf.usr_cfg - if cfg.model.framework != 'NA': + if cfg.model.framework != "NA": self.framework = cfg.model.framework.lower() - cfg.tuning.strategy.name = 'automixedprecision' + cfg.tuning.strategy.name = "automixedprecision" seed = cfg.tuning.random_seed random.seed(seed) np.random.seed(seed) @@ -107,22 +110,24 @@ def __call__(self): The "eval_func" tells the tuner whether the converted model meets the accuracy criteria. If not, the Tuner starts a new calibration and tuning flow. For this usage, model, calib_dataloader and eval_func parameters are mandatory. - + Returns: converted model: best converted model found, otherwise return None """ - assert isinstance(self._model, BaseModel), 'need set your Model for quantization....' + assert isinstance(self._model, BaseModel), "need set your Model for quantization...." cfg = self.conf.usr_cfg - if self.framework == 'tensorflow': + if self.framework == "tensorflow": self._model.name = cfg.model.name self._model.output_tensor_names = cfg.model.outputs self._model.input_tensor_names = cfg.model.inputs self._model.workspace_path = cfg.tuning.workspace.path - if 'bf16' in self._precisions or \ - (cfg.mixed_precision and 'bf16' in cfg.mixed_precision.precisions) or \ - (cfg.graph_optimization and 'bf16' in cfg.graph_optimization.precisions): + if ( + "bf16" in self._precisions + or (cfg.mixed_precision and "bf16" in cfg.mixed_precision.precisions) + or (cfg.graph_optimization and "bf16" in cfg.graph_optimization.precisions) + ): cfg.use_bf16 = True else: logger.warning("Only TensorFlow graph optimization is supported at current stage.") @@ -131,7 +136,7 @@ def __call__(self): # when eval_func is set, will be directly used and eval_dataloader can be None if self._eval_func is None: if self._eval_dataloader is None: - eval_dataloader_cfg = deep_get(cfg, 'evaluation.accuracy.dataloader') + eval_dataloader_cfg = deep_get(cfg, "evaluation.accuracy.dataloader") if eval_dataloader_cfg is None: self._eval_func = None else: @@ -144,22 +149,21 @@ def __call__(self): _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(cfg.tuning.workspace.resume)) \ - if cfg.tuning.workspace and cfg.tuning.workspace.resume else None + self.resume_file = ( + os.path.abspath(os.path.expanduser(cfg.tuning.workspace.resume)) + if cfg.tuning.workspace and cfg.tuning.workspace.resume + else None + ) if self.resume_file: - assert os.path.exists(self.resume_file), \ - "The specified resume file {} doesn't exist!".format(self.resume_file) - with open(self.resume_file, 'rb') as f: + assert os.path.exists(self.resume_file), "The specified resume file {} doesn't exist!".format( + self.resume_file + ) + with open(self.resume_file, "rb") as f: _resume = pickle.load(f).__dict__ self.strategy = EXP_STRATEGIES[strategy]( - self._model, - self.conf, - None, - None, - self._eval_dataloader, - self._eval_func, - _resume) + self._model, self.conf, None, None, self._eval_dataloader, self._eval_func, _resume + ) try: with time_limit(self.conf.usr_cfg.tuning.exit_policy.timeout): @@ -168,19 +172,20 @@ def __call__(self): pass except Exception as e: logger.info("Unexpected exception {} happened during turing.".format(repr(e))) - finally: + finally: if self.strategy.best_qmodel: logger.info( "Specified timeout or max trials is reached! " - "Found a converted model which meet accuracy goal. Exit.") + "Found a converted model which meet accuracy goal. Exit." + ) self.strategy.deploy_config() else: logger.info( "Specified timeout or max trials is reached! " - "Not found any converted model which meet accuracy goal. Exit.") + "Not found any converted model which meet accuracy goal. Exit." + ) - logger.info("Graph optimization is done. Please invoke model.save() to save " \ - "optimized model to disk.") + logger.info("Graph optimization is done. Please invoke model.save() to save " "optimized model to disk.") return self.strategy.best_qmodel @@ -189,29 +194,32 @@ def __call__(self): def dataset(self, dataset_type, *args, **kwargs): """Get dataset.""" from .data import Datasets + return Datasets(self.framework)[dataset_type](*args, **kwargs) def set_config_by_model(self, model_obj): """Set model config.""" - if model_obj.framework() != 'tensorflow': + if model_obj.framework() != "tensorflow": logger.warning("Only TensorFlow graph optimization is supported at current stage.") sys.exit(0) self.conf.usr_cfg.model.framework = model_obj.framework() - if self._precisions == ['bf16'] and not CpuInfo().bf16: - if os.getenv('FORCE_BF16') == '1': - logger.warning("Graph optimization will generate bf16 graph although " \ - "the hardware doesn't support bf16 instruction.") + if self._precisions == ["bf16"] and not CpuInfo().bf16: + if os.getenv("FORCE_BF16") == "1": + logger.warning( + "Graph optimization will generate bf16 graph although " + "the hardware doesn't support bf16 instruction." + ) else: - logger.warning("Graph optimization exits due to the hardware " \ - "doesn't support bf16 instruction.") + logger.warning("Graph optimization exits due to the hardware " "doesn't support bf16 instruction.") sys.exit(0) - self.conf.usr_cfg.graph_optimization.precisions = self._precisions if \ - isinstance(self._precisions, list) else [self._precisions] + self.conf.usr_cfg.graph_optimization.precisions = ( + self._precisions if isinstance(self._precisions, list) else [self._precisions] + ) self.conf.usr_cfg.model.inputs = self._input - if isinstance(self._output, str) and ',' in self._output: - self.conf.usr_cfg.model.outputs = [s.strip() for s in self._output.split(',')] + if isinstance(self._output, str) and "," in self._output: + self.conf.usr_cfg.model.outputs = [s.strip() for s in self._output.split(",")] else: self.conf.usr_cfg.model.outputs = self._output @@ -225,8 +233,7 @@ def precisions(self, customized_precisions): if isinstance(customized_precisions, list): self._precisions = sorted([i.strip() for i in customized_precisions]) elif isinstance(customized_precisions, str): - self._precisions = sorted([i.strip() for i in customized_precisions.split(',')]) - + self._precisions = sorted([i.strip() for i in customized_precisions.split(",")]) @property def input(self): @@ -255,11 +262,11 @@ def eval_dataloader(self): def eval_dataloader(self, dataloader): """Set Data loader for evaluation. - It is iterable and the batched data should consists of a tuple like (input, label), - when eval_dataloader is set, user should configure postprocess(optional) and metric - in yaml file or set postprocess and metric cls. Notice evaluation dataloader will be + It is iterable and the batched data should consists of a tuple like (input, label), + when eval_dataloader is set, user should configure postprocess(optional) and metric + in yaml file or set postprocess and metric cls. Notice evaluation dataloader will be used to generate data for model inference, make sure the input data can be feed to model. - + Args: dataloader(generator): user are supported to set a user defined dataloader which meet the requirements that can yield tuple of @@ -276,8 +283,8 @@ def eval_dataloader(self, dataloader): creating iterable dataloader from neural_compressor.common.DataLoader """ from .common import _generate_common_dataloader - self._eval_dataloader = _generate_common_dataloader( - dataloader, self.framework) + + self._eval_dataloader = _generate_common_dataloader(dataloader, self.framework) @property def model(self): @@ -287,7 +294,7 @@ def model(self): @model.setter def model(self, user_model): """Set the user model and dispatch to framework specific internal model object. - + Args: user_model: user are supported to set model from original framework model format (eg, tensorflow frozen_pb or path to a saved model), but not recommended. @@ -300,7 +307,7 @@ def model(self, user_model): """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") - if self.conf.usr_cfg.model.framework == 'NA': + if self.conf.usr_cfg.model.framework == "NA": self.framework = get_model_fwk_name(user_model) if self.framework == "pytorch": if self.conf.usr_cfg.model.backend == "default": @@ -313,20 +320,21 @@ def model(self, user_model): else: self._model = NCModel(user_model, framework=self.framework) else: - assert self.conf.usr_cfg.model.framework != 'NA', \ - "Please pass an original framework model but not neural compressor model!" + assert ( + self.conf.usr_cfg.model.framework != "NA" + ), "Please pass an original framework model but not neural compressor model!" self._model = user_model @property def metric(self): """Get metric.""" - assert False, 'Should not try to get the value of `metric` attribute.' + assert False, "Should not try to get the value of `metric` attribute." return None @metric.setter def metric(self, user_metric): """Set metric class. - + neural_compressor will initialize this class when evaluation neural_compressor have many built-in metrics, but user can set specific metric through this api. The metric class should take the outputs of the model or @@ -334,7 +342,7 @@ def metric(self, user_metric): (predictions, labels) as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric or user defined metric object - + Args: user_metric(neural_compressor.common.Metric): user_metric should be object initialized from neural_compressor.common.Metric, in this method the @@ -342,21 +350,23 @@ def metric(self, user_metric): specific frameworks and initialized. """ if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"): - logger.warning("Override the value of `metric` field defined in yaml file" \ - " as user defines the value of `metric` attribute by code.") - - from .common import Metric as NCMetric + logger.warning( + "Override the value of `metric` field defined in yaml file" + " as user defines the value of `metric` attribute by code." + ) + from ..metric import METRICS + from .common import Metric as NCMetric + if isinstance(user_metric, NCMetric): name = user_metric.name metric_cls = user_metric.metric_cls metric_cfg = {name: {**user_metric.kwargs}} else: - for i in ['reset', 'update', 'result']: - assert hasattr(user_metric, i), 'Please realise {} function' \ - 'in user defined metric'.format(i) + for i in ["reset", "update", "result"]: + assert hasattr(user_metric, i), "Please realise {} function" "in user defined metric".format(i) metric_cls = type(user_metric).__name__ - name = 'user_' + metric_cls + name = "user_" + metric_cls metric_cfg = {name: id(user_metric)} deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg) @@ -368,7 +378,7 @@ def metric(self, user_metric): @property def postprocess(self, user_postprocess): """Get postprocess.""" - assert False, 'Should not try to get the value of `postprocess` attribute.' + assert False, "Should not try to get the value of `postprocess` attribute." return None @postprocess.setter @@ -379,34 +389,39 @@ def postprocess(self, user_postprocess): The postprocess class should take the outputs of the model as inputs, and output (predictions, labels) as inputs for metric update. user_postprocess.postprocess_cls should be sub_class of neural_compressor.data.BaseTransform. - + Args: - user_postprocess(neural_compressor.common.Postprocess): user_postprocess should be object - initialized from neural_compressor.common.Postprocess, in this method the + user_postprocess(neural_compressor.common.Postprocess): user_postprocess should be object + initialized from neural_compressor.common.Postprocess, in this method the user_postprocess.postprocess_cls will be registered to specific frameworks and initialized. """ from neural_compressor.data import Postprocess as NCPostprocess - assert isinstance(user_postprocess, NCPostprocess), \ - 'please initialize a neural_compressor.common.Postprocess and set....' - postprocess_cfg = {user_postprocess.name : {**user_postprocess.kwargs}} + + assert isinstance( + user_postprocess, NCPostprocess + ), "please initialize a neural_compressor.common.Postprocess and set...." + postprocess_cfg = {user_postprocess.name: {**user_postprocess.kwargs}} if deep_get(self.conf.usr_cfg, "evaluation.accuracy.postprocess"): - logger.warning("Override the value of `postprocess` field defined in yaml file" \ - " as user defines the value of `postprocess` attribute by code.") + logger.warning( + "Override the value of `postprocess` field defined in yaml file" + " as user defines the value of `postprocess` attribute by code." + ) deep_set(self.conf.usr_cfg, "evaluation.accuracy.postprocess.transform", postprocess_cfg) from neural_compressor.data import TRANSFORMS - postprocesses = TRANSFORMS(self.framework, 'postprocess') + + postprocesses = TRANSFORMS(self.framework, "postprocess") postprocesses.register(user_postprocess.name, user_postprocess.postprocess_cls) @property def eval_func(self): """Get evaluation function.""" - assert False, 'Should not try to get the value of `eval_func` attribute.' + assert False, "Should not try to get the value of `eval_func` attribute." return None @eval_func.setter def eval_func(self, user_eval_func): """Set evaluation function provided by user. - + Args: user_eval_func: This function takes model as parameter, and evaluation dataset and metrics should be @@ -424,6 +439,7 @@ def eval_func(model): def __repr__(self): """Return name.""" - return 'GraphOptimization' + return "GraphOptimization" + GraphOptimization = Graph_Optimization diff --git a/neural_compressor/experimental/metric/__init__.py b/neural_compressor/experimental/metric/__init__.py index f471d6654de..363fc66764f 100644 --- a/neural_compressor/experimental/metric/__init__.py +++ b/neural_compressor/experimental/metric/__init__.py @@ -14,8 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - """Intel Neural Compressor Metric.""" from .metric import METRICS, BaseMetric, metric_registry @@ -25,7 +23,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) diff --git a/neural_compressor/experimental/metric/bleu.py b/neural_compressor/experimental/metric/bleu.py index 9a5e09df572..c0b41be60b3 100644 --- a/neural_compressor/experimental/metric/bleu.py +++ b/neural_compressor/experimental/metric/bleu.py @@ -17,11 +17,12 @@ """Script for BLEU metric.""" import re -import six import sys import unicodedata from typing import List, Sequence +import six + from .bleu_util import compute_bleu from .metric import metric_registry @@ -31,8 +32,8 @@ class UnicodeRegex(object): Attributes: nondigit_punct_re: The compiled regular expressions to recognize - punctuation preceded with a digit. - punct_nondigit_re: The compiled regular expressions to recognize + punctuation preceded with a digit. + punct_nondigit_re: The compiled regular expressions to recognize punctuation followed by a digit. symbol_re: The compiled regular expressions to recognize symbols. """ @@ -54,8 +55,9 @@ def property_chars(self, prefix: str) -> str: punctuation: The join result of all Unicode strings starting with a specific prefix. """ - punctuation = "".join(six.unichr(x) for x in range(sys.maxunicode) \ - if unicodedata.category(six.unichr(x)).startswith(prefix)) + punctuation = "".join( + six.unichr(x) for x in range(sys.maxunicode) if unicodedata.category(six.unichr(x)).startswith(prefix) + ) return punctuation @@ -81,14 +83,14 @@ def bleu_tokenize(string: str) -> List[str]: return tokens -@metric_registry('BLEU', 'tensorflow, tensorflow_itex') +@metric_registry("BLEU", "tensorflow, tensorflow_itex") class BLEU(object): """Computes the BLEU (Bilingual Evaluation Understudy) score. - BLEU is an algorithm for evaluating the quality of text which has - been machine-translated from one natural language to another. - This implementent approximate the BLEU score since we do not - glue word pieces or decode the ids and tokenize the output. + BLEU is an algorithm for evaluating the quality of text which has + been machine-translated from one natural language to another. + This implementent approximate the BLEU score since we do not + glue word pieces or decode the ids and tokenize the output. By default, we use ngram order of 4 and use brevity penalty. Also, this does not have beam search. @@ -119,9 +121,11 @@ def update(self, prediction: Sequence[str], label: Sequence[str]) -> None: and label are different. """ if len(label) != len(prediction): - raise ValueError("Reference and prediction files have different number " - "of lines. If training only a few steps (100-200), the " - "translation may be empty.") + raise ValueError( + "Reference and prediction files have different number " + "of lines. If training only a few steps (100-200), the " + "translation may be empty." + ) label = [x.lower() for x in label] prediction = [x.lower() for x in prediction] label = [bleu_tokenize(x) for x in label] diff --git a/neural_compressor/experimental/metric/bleu_util.py b/neural_compressor/experimental/metric/bleu_util.py index 875321f4dd3..678c3ffbf3e 100644 --- a/neural_compressor/experimental/metric/bleu_util.py +++ b/neural_compressor/experimental/metric/bleu_util.py @@ -35,50 +35,50 @@ https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import collections import math +from typing import List, Sequence, Union import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin from neural_compressor.utils.utility import LazyImport -from six.moves import xrange # pylint: disable=redefined-builtin -from typing import List, Sequence, Union -tf = LazyImport('tensorflow') +tf = LazyImport("tensorflow") + -def _get_ngrams_with_counter(segment: Sequence[str], - max_order: List[int]) -> collections.Counter: +def _get_ngrams_with_counter(segment: Sequence[str], max_order: List[int]) -> collections.Counter: """Extract all n-grams up to a given maximum order from an input segment. Args: segment: The text segment from which n-grams will be extracted. - max_order: The maximum length in tokens of the n-grams returned + max_order: The maximum length in tokens of the n-grams returned by this methods. Returns: - ngram_counts: The Counter containing all n-grams up to max_order + ngram_counts: The Counter containing all n-grams up to max_order in segment with a count of how many times each n-gram occurred. """ ngram_counts = collections.Counter() for order in xrange(1, max_order + 1): for i in xrange(0, len(segment) - order + 1): - ngram = tuple(segment[i:i + order]) + ngram = tuple(segment[i : i + order]) ngram_counts[ngram] += 1 return ngram_counts -def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]], - translation_corpus: Sequence[str], - max_order: int = 4, - use_bp: bool = True) -> float: +def compute_bleu( + reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]], + translation_corpus: Sequence[str], + max_order: int = 4, + use_bp: bool = True, +) -> float: """Compute the BLEU score of translated segments against its references. Args: - reference_corpus: List of references for each translation. + reference_corpus: List of references for each translation. Each reference should be tokenized into a list of tokens. translation_corpus: List of translations to score. Each translation should be tokenized into a list of tokens. @@ -97,21 +97,20 @@ def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]] possible_matches_by_order = [0] * max_order precisions = [] - for (references, translations) in zip(reference_corpus, translation_corpus): + for references, translations in zip(reference_corpus, translation_corpus): reference_length += len(references) translation_length += len(translations) ref_ngram_counts = _get_ngrams_with_counter(references, max_order) translation_ngram_counts = _get_ngrams_with_counter(translations, max_order) - overlap = dict((ngram, - min(count, translation_ngram_counts[ngram])) - for ngram, count in ref_ngram_counts.items()) + overlap = dict( + (ngram, min(count, translation_ngram_counts[ngram])) for ngram, count in ref_ngram_counts.items() + ) for ngram in overlap: matches_by_order[len(ngram) - 1] += overlap[ngram] for ngram in translation_ngram_counts: - possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ - ngram] + possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ngram] precisions = [0] * max_order smooth = 1.0 @@ -120,8 +119,7 @@ def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]] if possible_matches_by_order[i] > 0: precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] if matches_by_order[i] > 0: - precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[ - i] + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] else: smooth *= 2 precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) @@ -134,6 +132,6 @@ def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]] if use_bp: ratio = translation_length / reference_length - bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0 + bp = math.exp(1 - 1.0 / ratio) if ratio < 1.0 else 1.0 bleu_score = np.float32(geo_mean * bp) return bleu_score diff --git a/neural_compressor/experimental/metric/coco_label_map.py b/neural_compressor/experimental/metric/coco_label_map.py index 82327cb6ce1..724842b7d40 100644 --- a/neural_compressor/experimental/metric/coco_label_map.py +++ b/neural_compressor/experimental/metric/coco_label_map.py @@ -16,88 +16,87 @@ # limitations under the License. # # - """The dict mapping category IDs to its names of labels.""" category_map = { - 1: 'person', - 2: 'bicycle', - 3: 'car', - 4: 'motorcycle', - 5: 'airplane', - 6: 'bus', - 7: 'train', - 8: 'truck', - 9: 'boat', - 10: 'traffic light', - 11: 'fire hydrant', - 13: 'stop sign', - 14: 'parking meter', - 15: 'bench', - 16: 'bird', - 17: 'cat', - 18: 'dog', - 19: 'horse', - 20: 'sheep', - 21: 'cow', - 22: 'elephant', - 23: 'bear', - 24: 'zebra', - 25: 'giraffe', - 27: 'backpack', - 28: 'umbrella', - 31: 'handbag', - 32: 'tie', - 33: 'suitcase', - 34: 'frisbee', - 35: 'skis', - 36: 'snowboard', - 37: 'sports ball', - 38: 'kite', - 39: 'baseball bat', - 40: 'baseball glove', - 41: 'skateboard', - 42: 'surfboard', - 43: 'tennis racket', - 44: 'bottle', - 46: 'wine glass', - 47: 'cup', - 48: 'fork', - 49: 'knife', - 50: 'spoon', - 51: 'bowl', - 52: 'banana', - 53: 'apple', - 54: 'sandwich', - 55: 'orange', - 56: 'broccoli', - 57: 'carrot', - 58: 'hot dog', - 59: 'pizza', - 60: 'donut', - 61: 'cake', - 62: 'chair', - 63: 'couch', - 64: 'potted plant', - 65: 'bed', - 67: 'dining table', - 70: 'toilet', - 72: 'tv', - 73: 'laptop', - 74: 'mouse', - 75: 'remote', - 76: 'keyboard', - 77: 'cell phone', - 78: 'microwave', - 79: 'oven', - 80: 'toaster', - 81: 'sink', - 82: 'refrigerator', - 84: 'book', - 85: 'clock', - 86: 'vase', - 87: 'scissors', - 88: 'teddy bear', - 89: 'hair drier', - 90: 'toothbrush' + 1: "person", + 2: "bicycle", + 3: "car", + 4: "motorcycle", + 5: "airplane", + 6: "bus", + 7: "train", + 8: "truck", + 9: "boat", + 10: "traffic light", + 11: "fire hydrant", + 13: "stop sign", + 14: "parking meter", + 15: "bench", + 16: "bird", + 17: "cat", + 18: "dog", + 19: "horse", + 20: "sheep", + 21: "cow", + 22: "elephant", + 23: "bear", + 24: "zebra", + 25: "giraffe", + 27: "backpack", + 28: "umbrella", + 31: "handbag", + 32: "tie", + 33: "suitcase", + 34: "frisbee", + 35: "skis", + 36: "snowboard", + 37: "sports ball", + 38: "kite", + 39: "baseball bat", + 40: "baseball glove", + 41: "skateboard", + 42: "surfboard", + 43: "tennis racket", + 44: "bottle", + 46: "wine glass", + 47: "cup", + 48: "fork", + 49: "knife", + 50: "spoon", + 51: "bowl", + 52: "banana", + 53: "apple", + 54: "sandwich", + 55: "orange", + 56: "broccoli", + 57: "carrot", + 58: "hot dog", + 59: "pizza", + 60: "donut", + 61: "cake", + 62: "chair", + 63: "couch", + 64: "potted plant", + 65: "bed", + 67: "dining table", + 70: "toilet", + 72: "tv", + 73: "laptop", + 74: "mouse", + 75: "remote", + 76: "keyboard", + 77: "cell phone", + 78: "microwave", + 79: "oven", + 80: "toaster", + 81: "sink", + 82: "refrigerator", + 84: "book", + 85: "clock", + 86: "vase", + 87: "scissors", + 88: "teddy bear", + 89: "hair drier", + 90: "toothbrush", } diff --git a/neural_compressor/experimental/metric/coco_tools.py b/neural_compressor/experimental/metric/coco_tools.py index 265185baf10..2f9369798df 100644 --- a/neural_compressor/experimental/metric/coco_tools.py +++ b/neural_compressor/experimental/metric/coco_tools.py @@ -14,8 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - """Wrappers for third party pycocotools to be used within object_detection. Note that nothing in this file is tensorflow related and thus cannot @@ -41,32 +39,29 @@ evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, agnostic_mode=False) metrics = evaluator.ComputeMetrics() - """ import copy import time +from collections import OrderedDict +from typing import Any, Dict, List, Set, Union import numpy as np +from pycocotools import coco, cocoeval, mask -from collections import OrderedDict from neural_compressor.utils import logger -from pycocotools import coco -from pycocotools import cocoeval -from pycocotools import mask -from typing import Any, Dict, List, Set, Union class COCOWrapper(coco.COCO): """Wrapper for the pycocotools COCO class. - + Attributes: dataset: a dictionary holding bounding box annotations in the COCO format. detection_type: type of detections being wrapped. Can be one of ['bbox', 'segmentation'] """ - def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"): """Construct a COCOWrapper. See http://mscoco.org/dataset/#format for a description of the format. @@ -82,11 +77,12 @@ def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): Raises: ValueError: if detection_type is unsupported. """ - supported_detection_types = ['bbox', 'segmentation'] + supported_detection_types = ["bbox", "segmentation"] if detection_type not in supported_detection_types: - raise ValueError('Unsupported detection type: {}. ' - 'Supported values are: {}'.format( - detection_type, supported_detection_types)) + raise ValueError( + "Unsupported detection type: {}. " + "Supported values are: {}".format(detection_type, supported_detection_types) + ) self._detection_type = detection_type coco.COCO.__init__(self) self.dataset = dataset @@ -109,38 +105,36 @@ def LoadAnnotations(self, annotations: list) -> coco.COCO: a coco.COCO datastructure holding object detection annotations results Raises: - ValueError: if (1) annotations is not a list or annotations do not + ValueError: if (1) annotations is not a list or annotations do not correspond to the images contained in self. """ results = coco.COCO() - results.dataset['images'] = [img for img in self.dataset['images']] + results.dataset["images"] = [img for img in self.dataset["images"]] logger.info("Load and prepare annotation results.") tic = time.time() if not isinstance(annotations, list): - raise ValueError('annotations is not a list of objects') - annotation_img_ids = [ann['image_id'] for ann in annotations] - if (set(annotation_img_ids) != (set(annotation_img_ids) - & set(self.getImgIds()))): - raise ValueError('Results do not correspond to current coco set') - results.dataset['categories'] = copy.deepcopy( - self.dataset['categories']) - if self._detection_type == 'bbox': + raise ValueError("annotations is not a list of objects") + annotation_img_ids = [ann["image_id"] for ann in annotations] + if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())): + raise ValueError("Results do not correspond to current coco set") + results.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + if self._detection_type == "bbox": for idx, ann in enumerate(annotations): - bb = ann['bbox'] - ann['area'] = bb[2] * bb[3] - ann['id'] = idx + 1 - ann['iscrowd'] = 0 - elif self._detection_type == 'segmentation': + bb = ann["bbox"] + ann["area"] = bb[2] * bb[3] + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + elif self._detection_type == "segmentation": for idx, ann in enumerate(annotations): - ann['area'] = mask.area(ann['segmentation']) - ann['bbox'] = mask.toBbox(ann['segmentation']) - ann['id'] = idx + 1 - ann['iscrowd'] = 0 - logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + ann["area"] = mask.area(ann["segmentation"]) + ann["bbox"] = mask.toBbox(ann["segmentation"]) + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + logger.info("DONE (t=%0.2fs)", (time.time() - tic)) - results.dataset['annotations'] = annotations + results.dataset["annotations"] = annotations results.createIndex() return results @@ -159,13 +153,15 @@ class COCOEvalWrapper(cocoeval.COCOeval): metrics = evaluator.ComputeMetrics() """ - def __init__(self, - groundtruth: coco.COCO = None, - detections: coco.COCO = None, - agnostic_mode = False, - iou_type: str = 'bbox', - iou_thrs: Union[str, float] = None, - map_points=None): + def __init__( + self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode=False, + iou_type: str = "bbox", + iou_thrs: Union[str, float] = None, + map_points=None, + ): """Construct a COCOEvalWrapper. Note that for the area-based metrics to be meaningful, detection and @@ -186,27 +182,20 @@ class labels, treating all detections as proposals. map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. """ - cocoeval.COCOeval.__init__(self, - groundtruth, - detections, - iouType=iou_type) + cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type) if agnostic_mode: self.params.useCats = 0 - if iou_thrs == '0.5:0.05:0.95': - self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ - endpoint=True) + if iou_thrs == "0.5:0.05:0.95": + self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) elif isinstance(iou_thrs, float): - self.params.iouThrs = [iou_thrs] + self.params.iouThrs = [iou_thrs] if map_points == 101: - self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ - endpoint=True) + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) if map_points == 11: - self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ - endpoint=True) + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True) if map_points == 0: - self.params.recThrs = [-1] - + self.params.recThrs = [-1] def GetCategory(self, category_id: int) -> dict: """Fetch dictionary holding category information given category id. @@ -229,65 +218,67 @@ def GetCategoryIdList(self) -> List[int]: def accumulate(self, p: cocoeval.Params = None): """Accumulate evaluation results per image and store it to self.eval. - + Args: p: input params for evaluation """ - print('Accumulating evaluation results...') + print("Accumulating evaluation results...") tic = time.time() if not self.evalImgs: - print('Please run evaluate() first') + print("Please run evaluate() first") # allows input customized parameters if p is None: p = self.params p.catIds = p.catIds if p.useCats == 1 else [-1] - T = len(p.iouThrs) - R = len(p.recThrs) - K = len(p.catIds) if p.useCats else 1 - A = len(p.areaRng) - M = len(p.maxDets) - precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories - recall = -np.ones((T,K,A,M)) - scores = -np.ones((T,R,K,A,M)) + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories + recall = -np.ones((T, K, A, M)) + scores = -np.ones((T, R, K, A, M)) # create dictionary for future indexing _pe = self._paramsEval - print('-pe', _pe) + print("-pe", _pe) catIds = _pe.catIds if _pe.useCats else [-1] setK = set(catIds) setA = set(map(tuple, _pe.areaRng)) setM = set(_pe.maxDets) setI = set(_pe.imgIds) # get inds to evaluate - k_list = [n for n, k in enumerate(p.catIds) if k in setK] + k_list = [n for n, k in enumerate(p.catIds) if k in setK] m_list = [m for n, m in enumerate(p.maxDets) if m in setM] a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] - i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] I0 = len(_pe.imgIds) A0 = len(_pe.areaRng) # retrieve E at each category, area range, and max number of detections for k, k0 in enumerate(k_list): - Nk = k0*A0*I0 + Nk = k0 * A0 * I0 for a, a0 in enumerate(a_list): - Na = a0*I0 + Na = a0 * I0 for m, maxDet in enumerate(m_list): E = [self.evalImgs[Nk + Na + i] for i in i_list] - E = [e for e in E if not e is None] - if len(E) == 0: continue - dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) # different sorting method generates slightly different results. # mergesort is used to be consistent as Matlab implementation. - inds = np.argsort(-dtScores, kind='mergesort') + inds = np.argsort(-dtScores, kind="mergesort") dtScoresSorted = dtScores[inds] - dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] - dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] - gtIg = np.concatenate([e['gtIgnore'] for e in E]) - npig = np.count_nonzero(gtIg==0 ) - if npig == 0: continue - tps = np.logical_and( dtm, np.logical_not(dtIg) ) - fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) @@ -296,73 +287,72 @@ def accumulate(self, p: cocoeval.Params = None): fp = np.array(fp) nd = len(tp) rc = tp / npig - pr = tp / (fp+tp+np.spacing(1)) + pr = tp / (fp + tp + np.spacing(1)) # calculate precision if R == 1: - rc = np.concatenate(([0.], rc, [1.])) - pr = np.concatenate(([0.], pr, [0.])) - - # compute the precision envelope - for i in range(pr.size - 1, 0, -1): - pr[i - 1] = np.maximum(pr[i - 1], pr[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - change_point = np.where(rc[1:] != rc[:-1])[0] - # and sum (\Delta recall) * recall - res = np.sum((rc[change_point + 1] - rc[change_point]) \ - * pr[change_point + 1]) - precision[t,:,k,a,m] = np.array([res]) + rc = np.concatenate(([0.0], rc, [1.0])) + pr = np.concatenate(([0.0], pr, [0.0])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1]) + precision[t, :, k, a, m] = np.array([res]) else: - q = np.zeros((R,)) + q = np.zeros((R,)) - # numpy is slow without cython optimization for accessing elements - # use python array gets significant speed improvement - pr = pr.tolist(); q = q.tolist() + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() - for i in range(nd-1, 0, -1): - if pr[i] > pr[i-1]: - pr[i-1] = pr[i] + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t, :, k, a, m] = np.array(q) - inds = np.searchsorted(rc, p.recThrs, side='left') - try: - for ri, pi in enumerate(inds): - q[ri] = pr[pi] - except: - pass - precision[t,:,k,a,m] = np.array(q) - # calculate recall if nd: - recall[t,k,a,m] = rc[-1] + recall[t, k, a, m] = rc[-1] else: - recall[t,k,a,m] = 0 + recall[t, k, a, m] = 0 # calculate score ss = np.zeros((R,)) - inds = np.searchsorted(rc, p.recThrs, side='left') + inds = np.searchsorted(rc, p.recThrs, side="left") try: - for ri, pi in enumerate(inds): - ss[ri] = dtScoresSorted[pi] + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] except: - pass - scores[t,:,k,a,m] = np.array(ss) + pass + scores[t, :, k, a, m] = np.array(ss) # exit(0) self.eval = { - 'params': p, - 'counts': [T, R, K, A, M], - 'precision': precision, - 'recall': recall, - 'scores': scores, + "params": p, + "counts": [T, R, K, A, M], + "precision": precision, + "recall": recall, + "scores": scores, } toc = time.time() - print('DONE (t={:0.2f}s).'.format( toc-tic)) - + print("DONE (t={:0.2f}s).".format(toc - tic)) - def ComputeMetrics(self, - include_metrics_per_category: bool = False, - all_metrics_per_category: bool = False): # pragma: no cover + def ComputeMetrics( + self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False + ): # pragma: no cover """Compute detection metrics. Args: @@ -393,7 +383,7 @@ def ComputeMetrics(self, 'Recall/AR@100 (medium)': average recall for medium objects with 100 detections; 'Recall/AR@100 (large)': average recall for large objects with 100 - detections; + detections; and (2) per_category_ap is a dictionary holding category specific results with keys of the form: 'Precision mAP ByCategory/category' (without the supercategory part if no supercategories exist). @@ -409,57 +399,63 @@ def ComputeMetrics(self, self.accumulate() self.summarize() - summary_metrics = OrderedDict([ - ('Precision/mAP', self.stats[0]), - ('Precision/mAP@.50IOU', self.stats[1]), - ('Precision/mAP@.75IOU', self.stats[2]), - ('Precision/mAP (small)', self.stats[3]), - ('Precision/mAP (medium)', self.stats[4]), - ('Precision/mAP (large)', self.stats[5]), - ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), - ('Recall/AR@100', self.stats[8]), - ('Recall/AR@100 (small)', self.stats[9]), - ('Recall/AR@100 (medium)', self.stats[10]), - ('Recall/AR@100 (large)', self.stats[11]) - ]) + summary_metrics = OrderedDict( + [ + ("Precision/mAP", self.stats[0]), + ("Precision/mAP@.50IOU", self.stats[1]), + ("Precision/mAP@.75IOU", self.stats[2]), + ("Precision/mAP (small)", self.stats[3]), + ("Precision/mAP (medium)", self.stats[4]), + ("Precision/mAP (large)", self.stats[5]), + ("Recall/AR@1", self.stats[6]), + ("Recall/AR@10", self.stats[7]), + ("Recall/AR@100", self.stats[8]), + ("Recall/AR@100 (small)", self.stats[9]), + ("Recall/AR@100 (medium)", self.stats[10]), + ("Recall/AR@100 (large)", self.stats[11]), + ] + ) if not include_metrics_per_category: return summary_metrics, {} - if not hasattr(self, 'category_stats'): - raise ValueError('Category stats do not exist') + if not hasattr(self, "category_stats"): + raise ValueError("Category stats do not exist") per_category_ap = OrderedDict([]) if self.GetAgnosticMode(): return summary_metrics, per_category_ap for category_index, category_id in enumerate(self.GetCategoryIdList()): - category = self.GetCategory(category_id)['name'] + category = self.GetCategory(category_id)["name"] # Kept for backward compatilbility # pylint: disable=no-member - per_category_ap['PerformanceByCategory/mAP/{}'.format( - category)] = self.category_stats[0][category_index] + per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index] if all_metrics_per_category: - per_category_ap['Precision mAP ByCategory/{}'.format( - category)] = self.category_stats[0][category_index] - per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( - category)] = self.category_stats[1][category_index] - per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( - category)] = self.category_stats[2][category_index] - per_category_ap['Precision mAP (small) ByCategory/{}'.format( - category)] = self.category_stats[3][category_index] - per_category_ap['Precision mAP (medium) ByCategory/{}'.format( - category)] = self.category_stats[4][category_index] - per_category_ap['Precision mAP (large) ByCategory/{}'.format( - category)] = self.category_stats[5][category_index] - per_category_ap['Recall AR@1 ByCategory/{}'.format( - category)] = self.category_stats[6][category_index] - per_category_ap['Recall AR@10 ByCategory/{}'.format( - category)] = self.category_stats[7][category_index] - per_category_ap['Recall AR@100 ByCategory/{}'.format( - category)] = self.category_stats[8][category_index] - per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( - category)] = self.category_stats[9][category_index] - per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( - category)] = self.category_stats[10][category_index] - per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( - category)] = self.category_stats[11][category_index] + per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index] + per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][ + category_index + ] + per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][ + category_index + ] + per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][ + category_index + ] + per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][ + category_index + ] + per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][ + category_index + ] + per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index] + per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index] + per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index] + per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][ + category_index + ] + per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][ + category_index + ] + per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][ + category_index + ] return summary_metrics, per_category_ap @@ -477,12 +473,7 @@ def _ConvertBoxToCOCOFormat(box): Returns: A list of floats, in COCO format, representing [xmin, ymin, width, height] """ - return [ - float(box[1]), - float(box[0]), - float(box[3] - box[1]), - float(box[2] - box[0]) - ] + return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])] def _RleCompress(masks): @@ -498,13 +489,15 @@ def _RleCompress(masks): return mask.encode(np.asfortranarray(masks)) -def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], - next_annotation_id: int, - category_id_set: Set[str], - groundtruth_boxes: np.array, - groundtruth_classes: np.array, - groundtruth_masks: Union[np.array, None] = None, - groundtruth_is_crowd: Union[np.array, None] = None) -> list: +def ExportSingleImageGroundtruthToCoco( + image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None, +) -> list: """Export groundtruth of a single image to COCO format. This function converts groundtruth detection annotations represented as numpy @@ -540,54 +533,51 @@ def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], have the correct shapes or (3) if image_ids are not integers """ if len(groundtruth_classes.shape) != 1: - raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + raise ValueError("groundtruth_classes is " "expected to be of rank 1.") if len(groundtruth_boxes.shape) != 2: - raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + raise ValueError("groundtruth_boxes is expected to be of " "rank 2.") if groundtruth_boxes.shape[1] != 4: - raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + raise ValueError("groundtruth_boxes should have " "shape[1] == 4.") num_boxes = groundtruth_classes.shape[0] if num_boxes != groundtruth_boxes.shape[0]: raise ValueError( - 'Corresponding entries in groundtruth_classes, ' - 'and groundtruth_boxes should have ' - 'compatible shapes (i.e., agree on the 0th dimension).' - 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % - (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], - image_id)) + "Corresponding entries in groundtruth_classes, " + "and groundtruth_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension)." + "Classes shape: %d. Boxes shape: %d. Image ID: %s" + % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id) + ) has_is_crowd = groundtruth_is_crowd is not None if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: - raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + raise ValueError("groundtruth_is_crowd is expected to be of rank 1.") groundtruth_list = [] for i in range(num_boxes): if groundtruth_classes[i] in category_id_set: iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 export_dict = { - 'id': - next_annotation_id + i, - 'image_id': - image_id, - 'category_id': - int(groundtruth_classes[i]), - 'bbox': - list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), - 'area': - float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * - (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), - 'iscrowd': - iscrowd + "id": next_annotation_id + i, + "image_id": image_id, + "category_id": int(groundtruth_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + "area": float( + (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) + * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]) + ), + "iscrowd": iscrowd, } if groundtruth_masks is not None: - export_dict['segmentation'] = _RleCompress( - groundtruth_masks[i]) + export_dict["segmentation"] = _RleCompress(groundtruth_masks[i]) groundtruth_list.append(export_dict) return groundtruth_list -def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], - category_id_set: Set[int], - detection_boxes: np.array, - detection_scores: np.array, - detection_classes: np.array) -> list: +def ExportSingleImageDetectionBoxesToCoco( + image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: """Export detections of a single image to COCO format. This function converts detections represented as numpy arrays to dictionaries @@ -617,46 +607,41 @@ def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], lists do not have the correct shapes or (3) if image_ids are not integers. """ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: - raise ValueError( - 'All entries in detection_classes and detection_scores' - 'expected to be of rank 1.') + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") if len(detection_boxes.shape) != 2: - raise ValueError('All entries in detection_boxes expected to be of ' - 'rank 2.') + raise ValueError("All entries in detection_boxes expected to be of " "rank 2.") if detection_boxes.shape[1] != 4: - raise ValueError('All entries in detection_boxes should have ' - 'shape[1] == 4.') + raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.") num_boxes = detection_classes.shape[0] if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: raise ValueError( - 'Corresponding entries in detection_classes, ' - 'detection_scores and detection_boxes should have ' - 'compatible shapes (i.e., agree on the 0th dimension). ' - 'Classes shape: %d. Boxes shape: %d. ' - 'Scores shape: %d' % - (detection_classes.shape[0], detection_boxes.shape[0], - detection_scores.shape[0])) + "Corresponding entries in detection_classes, " + "detection_scores and detection_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension). " + "Classes shape: %d. Boxes shape: %d. " + "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0]) + ) detections_list = [] for i in range(num_boxes): if detection_classes[i] in category_id_set: - detections_list.append({ - 'image_id': - image_id, - 'category_id': - int(detection_classes[i]), - 'bbox': - list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), - 'score': - float(detection_scores[i]) - }) + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + "score": float(detection_scores[i]), + } + ) return detections_list -def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], - category_id_set: Set[int], - detection_masks: np.array, - detection_scores: np.array, - detection_classes: np.array) -> list: +def ExportSingleImageDetectionMasksToCoco( + image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: """Export detection masks of a single image to COCO format. This function converts detections represented as numpy arrays to dictionaries @@ -685,29 +670,25 @@ def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], lists do not have the correct shapes or (3) if image_ids are not integers. """ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: - raise ValueError( - 'All entries in detection_classes and detection_scores' - 'expected to be of rank 1.') + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") num_boxes = detection_classes.shape[0] if not num_boxes == len(detection_masks) == detection_scores.shape[0]: - raise ValueError('Corresponding entries in detection_classes, ' - 'detection_scores and detection_masks should have ' - 'compatible lengths and shapes ' - 'Classes length: %d. Masks length: %d. ' - 'Scores length: %d' % - (detection_classes.shape[0], len(detection_masks), - detection_scores.shape[0])) + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_masks should have " + "compatible lengths and shapes " + "Classes length: %d. Masks length: %d. " + "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0]) + ) detections_list = [] for i in range(num_boxes): if detection_classes[i] in category_id_set: - detections_list.append({ - 'image_id': - image_id, - 'category_id': - int(detection_classes[i]), - 'segmentation': - _RleCompress(detection_masks[i]), - 'score': - float(detection_scores[i]) - }) + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "segmentation": _RleCompress(detection_masks[i]), + "score": float(detection_scores[i]), + } + ) return detections_list diff --git a/neural_compressor/experimental/metric/evaluate_squad.py b/neural_compressor/experimental/metric/evaluate_squad.py index 20fedd74538..1065c63fde1 100644 --- a/neural_compressor/experimental/metric/evaluate_squad.py +++ b/neural_compressor/experimental/metric/evaluate_squad.py @@ -14,25 +14,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Official evaluation script for v1.1 of the SQuAD dataset. From https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py """ from __future__ import print_function + import sys from collections import Counter + from .f1 import normalize_answer def f1_score(prediction, ground_truth): """Calculate the F1 score of the prediction and the ground_truth. - + Args: prediction: The predicted result. ground_truth: The ground truth. - + Returns: The F1 score of prediction. Float point number. """ @@ -53,12 +54,12 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): For each answer in ground_truths, evaluate the metric of prediction with this answer, and return the max metric. - + Args: metric_fn: The function to calculate the metric. prediction: The prediction result. ground_truths: A list of correct answers. - + Returns: The max metric. Float point number. """ @@ -73,48 +74,44 @@ def exact_match_score(prediction, ground_truth): """Compute the exact match score between prediction and ground truth. Args: - prediction: The result of predictions to be evaluated. + prediction: The result of predictions to be evaluated. ground_truth: The ground truth. Returns: The exact match score. """ - return (normalize_answer(prediction) == normalize_answer(ground_truth)) + return normalize_answer(prediction) == normalize_answer(ground_truth) def evaluate(dataset, predictions): """Evaluate the average F1 score and the exact match score for Question-Answering results. Args: - dataset: The dataset to evaluate the prediction. A list instance of articles. - An article contains a list of paragraphs, a paragraph contains a list of - question-and-answers (qas), and a question-and-answer cantains an id, a question, + dataset: The dataset to evaluate the prediction. A list instance of articles. + An article contains a list of paragraphs, a paragraph contains a list of + question-and-answers (qas), and a question-and-answer cantains an id, a question, and a list of correct answers. For example: predictions: The result of predictions to be evaluated. A dict mapping the id of - a question to the predicted answer of the question. - + a question to the predicted answer of the question. + Returns: The F1 score and the exact match score. - """ f1 = exact_match = total = 0 for article in dataset: - for paragraph in article['paragraphs']: - for qa in paragraph['qas']: + for paragraph in article["paragraphs"]: + for qa in paragraph["qas"]: total += 1 - if qa['id'] not in predictions: - message = 'Unanswered question ' + qa['id'] + \ - ' will receive score 0.' + if qa["id"] not in predictions: + message = "Unanswered question " + qa["id"] + " will receive score 0." print(message, file=sys.stderr) continue - ground_truths = list(map(lambda x: x['text'], qa['answers'])) - prediction = predictions[qa['id']] - exact_match += metric_max_over_ground_truths( - exact_match_score, prediction, ground_truths) - f1 += metric_max_over_ground_truths( - f1_score, prediction, ground_truths) + ground_truths = list(map(lambda x: x["text"], qa["answers"])) + prediction = predictions[qa["id"]] + exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths) + f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths) exact_match = 100.0 * exact_match / total f1 = 100.0 * f1 / total - return {'exact_match': exact_match, 'f1': f1} \ No newline at end of file + return {"exact_match": exact_match, "f1": f1} diff --git a/neural_compressor/experimental/metric/f1.py b/neural_compressor/experimental/metric/f1.py index d6b0811ae3c..32c4bbfbe8d 100644 --- a/neural_compressor/experimental/metric/f1.py +++ b/neural_compressor/experimental/metric/f1.py @@ -14,22 +14,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Official evaluation script for v1.1 of the SQuAD dataset. From https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py """ -from collections import Counter, abc -import string import re +import string +from collections import Counter, abc from typing import Any, Callable, Dict, List, TypeVar + from neural_compressor.utils import logger + def normalize_answer(text: str) -> str: """Normalize the answer text. - Lower text, remove punctuation, articles and extra whitespace, + Lower text, remove punctuation, articles and extra whitespace, and replace other whitespace (newline, tab, etc.) to space. Args: @@ -38,16 +39,16 @@ def normalize_answer(text: str) -> str: Returns: The normalized text. """ - + def _remove_articles(text): - return re.sub(r'\b(a|an|the)\b', ' ', text) + return re.sub(r"\b(a|an|the)\b", " ", text) def _white_space_fix(text): - return ' '.join(text.split()) + return " ".join(text.split()) def _remove_punc(text): exclude = set(string.punctuation) - return ''.join(ch for ch in text if ch not in exclude) + return "".join(ch for ch in text if ch not in exclude) def _lower(text): return text.lower() @@ -65,8 +66,9 @@ def f1_score(prediction: abc.Sequence, ground_truth: abc.Sequence): Returns: The F1 score of prediction. Float point number. """ - assert isinstance(prediction, abc.Sequence) and isinstance(ground_truth, abc.Sequence),\ - 'prediction and ground_truth should be Sequence' + assert isinstance(prediction, abc.Sequence) and isinstance( + ground_truth, abc.Sequence + ), "prediction and ground_truth should be Sequence" common = Counter(prediction) & Counter(ground_truth) num_same = sum(common.values()) if num_same == 0: @@ -76,9 +78,13 @@ def f1_score(prediction: abc.Sequence, ground_truth: abc.Sequence): f1 = (2 * precision * recall) / (precision + recall) return f1 -T = TypeVar('T') -def metric_max_over_ground_truths(metric_fn: Callable[[T, T], float], - prediction: str, ground_truths: List[str]) -> float: + +T = TypeVar("T") + + +def metric_max_over_ground_truths( + metric_fn: Callable[[T, T], float], prediction: str, ground_truths: List[str] +) -> float: """Calculate the max metric for each ground truth. For each answer in ground_truths, evaluate the metric of prediction with @@ -100,45 +106,44 @@ def metric_max_over_ground_truths(metric_fn: Callable[[T, T], float], scores_for_ground_truths.append(score) return max(scores_for_ground_truths) + def evaluate(predictions: Dict[str, str], dataset: List[Dict[str, Any]]) -> float: """Evaluate the average F1 score of Question-Answering results. - The F1 score is the harmonic mean of the precision and recall. It can be computed - with the equation: F1 = 2 * (precision * recall) / (precision + recall). - For all question-and-answers in dataset, it evaluates the f1-score + The F1 score is the harmonic mean of the precision and recall. It can be computed + with the equation: F1 = 2 * (precision * recall) / (precision + recall). + For all question-and-answers in dataset, it evaluates the f1-score Args: predictions: The result of predictions to be evaluated. A dict mapping the id of - a question to the predicted answer of the question. - dataset: The dataset to evaluate the prediction. A list instance of articles. - An article contains a list of paragraphs, a paragraph contains a list of - question-and-answers (qas), and a question-and-answer cantains an id, a question, + a question to the predicted answer of the question. + dataset: The dataset to evaluate the prediction. A list instance of articles. + An article contains a list of paragraphs, a paragraph contains a list of + question-and-answers (qas), and a question-and-answer cantains an id, a question, and a list of correct answers. For example: - + [{'paragraphs': [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, ...], 'question': 'Which NFL team represented the AFC at Super Bowl 50?', 'id': '56be4db0acb8001400a502ec'}]}]}] - + Returns: - The F1 score of this prediction. Float point number in forms of a percentage. + The F1 score of this prediction. Float point number in forms of a percentage. """ f1 = total = 0 for article in dataset: - for paragraph in article['paragraphs']: - for qa in paragraph['qas']: + for paragraph in article["paragraphs"]: + for qa in paragraph["qas"]: total += 1 - if qa['id'] not in predictions: - message = 'Unanswered question ' + qa['id'] + \ - ' will receive score 0.' + if qa["id"] not in predictions: + message = "Unanswered question " + qa["id"] + " will receive score 0." logger.warning(message) continue - ground_truths = list(map(lambda x: x['text'], qa['answers'])) - prediction = predictions[qa['id']] + ground_truths = list(map(lambda x: x["text"], qa["answers"])) + prediction = predictions[qa["id"]] - f1 += metric_max_over_ground_truths( - f1_score, prediction, ground_truths) + f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths) f1 = 100.0 * f1 / total return f1 diff --git a/neural_compressor/experimental/metric/metric.py b/neural_compressor/experimental/metric/metric.py index 6efce83038d..1a0ab53764f 100644 --- a/neural_compressor/experimental/metric/metric.py +++ b/neural_compressor/experimental/metric/metric.py @@ -14,22 +14,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Compressor metrics.""" - -import numpy as np from abc import abstractmethod from ctypes import Union -from neural_compressor.utils.utility import LazyImport, singleton -from neural_compressor.utils import logger + +import numpy as np from sklearn.metrics import accuracy_score -torch = LazyImport('torch') -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -transformers = LazyImport('transformers') +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport, singleton + +torch = LazyImport("torch") +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +transformers = LazyImport("transformers") + @singleton class TensorflowMetrics(object): @@ -38,7 +39,7 @@ class TensorflowMetrics(object): Attributes: metrics: A dict to maintain all metrics for Tensorflow model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" self.metrics = {} @@ -52,7 +53,7 @@ class PyTorchMetrics(object): Attributes: metrics: A dict to maintain all metrics for PyTorch model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" self.metrics = {} @@ -66,11 +67,12 @@ class MXNetMetrics(object): Attributes: metrics: A dict to maintain all metrics for MXNet model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" from ...adaptor.mxnet_utils.util import check_mx_version - if check_mx_version('2.0.0'): # pragma: no cover + + if check_mx_version("2.0.0"): # pragma: no cover import mxnet.gluon.metric as mx_metrics else: import mxnet.metric as mx_metrics @@ -90,7 +92,7 @@ class ONNXRTQLMetrics(object): Attributes: metrics: A dict to maintain all metrics for ONNXRT QLinear model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" self.metrics = {} @@ -104,24 +106,26 @@ class ONNXRTITMetrics(object): Attributes: metrics: A dict to maintain all metrics for ONNXRT Integer model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" self.metrics = {} self.metrics.update(ONNXRT_IT_METRICS) -framework_metrics = {"tensorflow": TensorflowMetrics, - "tensorflow_itex": TensorflowMetrics, - "keras": TensorflowMetrics, - "mxnet": MXNetMetrics, - "pytorch": PyTorchMetrics, - "pytorch_ipex": PyTorchMetrics, - "pytorch_fx": PyTorchMetrics, - "onnxrt_qlinearops": ONNXRTQLMetrics, - "onnxrt_integerops": ONNXRTITMetrics, - "onnxrt_qdq": ONNXRTQLMetrics, - "onnxruntime": ONNXRTQLMetrics} +framework_metrics = { + "tensorflow": TensorflowMetrics, + "tensorflow_itex": TensorflowMetrics, + "keras": TensorflowMetrics, + "mxnet": MXNetMetrics, + "pytorch": PyTorchMetrics, + "pytorch_ipex": PyTorchMetrics, + "pytorch_fx": PyTorchMetrics, + "onnxrt_qlinearops": ONNXRTQLMetrics, + "onnxrt_integerops": ONNXRTITMetrics, + "onnxrt_qdq": ONNXRTQLMetrics, + "onnxruntime": ONNXRTQLMetrics, +} # user/model specific metrics will be registered here TENSORFLOW_METRICS = {} @@ -131,18 +135,19 @@ def __init__(self) -> None: ONNXRT_QL_METRICS = {} ONNXRT_IT_METRICS = {} -registry_metrics = {"tensorflow": TENSORFLOW_METRICS, - "tensorflow_itex": TENSORFLOW_ITEX_METRICS, - "keras": TENSORFLOW_METRICS, - "mxnet": MXNET_METRICS, - "pytorch": PYTORCH_METRICS, - "pytorch_ipex": PYTORCH_METRICS, - "pytorch_fx": PYTORCH_METRICS, - "onnxrt_qlinearops": ONNXRT_QL_METRICS, - "onnxrt_qdq": ONNXRT_QL_METRICS, - "onnxrt_integerops": ONNXRT_IT_METRICS, - "onnxruntime": ONNXRT_QL_METRICS, - } +registry_metrics = { + "tensorflow": TENSORFLOW_METRICS, + "tensorflow_itex": TENSORFLOW_ITEX_METRICS, + "keras": TENSORFLOW_METRICS, + "mxnet": MXNET_METRICS, + "pytorch": PYTORCH_METRICS, + "pytorch_ipex": PYTORCH_METRICS, + "pytorch_fx": PYTORCH_METRICS, + "onnxrt_qlinearops": ONNXRT_QL_METRICS, + "onnxrt_qdq": ONNXRT_QL_METRICS, + "onnxrt_integerops": ONNXRT_IT_METRICS, + "onnxruntime": ONNXRT_QL_METRICS, +} class METRICS(object): @@ -151,18 +156,26 @@ class METRICS(object): Attributes: metrics: The collection of registered metrics for the specified framework. """ - + def __init__(self, framework: str): """Initialize the metrics collection based on the framework name. Args: framework: The framwork name. """ - assert framework in ("tensorflow", "tensorflow_itex", "keras", - "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", - "onnxrt_qlinearops", "onnxrt_integerops", "mxnet", - "onnxruntime"), \ - "framework support tensorflow pytorch mxnet onnxrt" + assert framework in ( + "tensorflow", + "tensorflow_itex", + "keras", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxrt_qdq", + "onnxrt_qlinearops", + "onnxrt_integerops", + "mxnet", + "onnxruntime", + ), "framework support tensorflow pytorch mxnet onnxrt" self.metrics = framework_metrics[framework]().metrics def __getitem__(self, metric_type: str): @@ -174,8 +187,7 @@ def __getitem__(self, metric_type: str): Returns: The metric with the specified type. """ - assert metric_type in self.metrics.keys(), "only support metrics in {}".\ - format(self.metrics.keys()) + assert metric_type in self.metrics.keys(), "only support metrics in {}".format(self.metrics.keys()) return self.metrics[metric_type] @@ -186,25 +198,26 @@ def register(self, name, metric_cls) -> None: name: The name of metric. metric_cls: The metric class. """ - assert name not in self.metrics.keys(), 'registered metric name already exists.' + assert name not in self.metrics.keys(), "registered metric name already exists." self.metrics.update({name: metric_cls}) + def metric_registry(metric_type: str, framework: str): """Decorate for registering all Metric subclasses. - + The cross-framework metric is supported by specifying the framework param as one of tensorflow, pytorch, mxnet, onnxrt. - + Args: metric_type: The metric type. framework: The framework name. - + Returns: decorator_metric: The function to register metric class. """ - + def decorator_metric(cls): - for single_framework in [fwk.strip() for fwk in framework.split(',')]: + for single_framework in [fwk.strip() for fwk in framework.split(",")]: assert single_framework in [ "tensorflow", "tensorflow_itex", @@ -216,19 +229,20 @@ def decorator_metric(cls): "pytorch", "pytorch_ipex", "pytorch_fx", - ], "The framework support tensorflow mxnet pytorch onnxrt" + ], "The framework support tensorflow mxnet pytorch onnxrt" - if metric_type in registry_metrics[single_framework].keys(): # pragma: no cover - raise ValueError('Cannot have two metrics with the same name') + if metric_type in registry_metrics[single_framework].keys(): # pragma: no cover + raise ValueError("Cannot have two metrics with the same name") registry_metrics[single_framework][metric_type] = cls return cls + return decorator_metric class BaseMetric(object): """The base class of Metric.""" - - def __init__(self, metric, single_output = False, hvd = None): + + def __init__(self, metric, single_output=False, hvd=None): """Initialize the basic metric. Args: @@ -311,16 +325,16 @@ def hvd(self, hvd): class WrapPyTorchMetric(BaseMetric): """The wrapper of Metric class for PyTorch.""" - + def update(self, preds, labels=None, sample_weight=None): """Convert the prediction to torch. - + Args: preds: The prediction result. labels: The reference. Defaults to None. sample_weight: The sampling weight. Defaults to None. """ - if self._single_output: # pragma: no cover + if self._single_output: # pragma: no cover output = torch.as_tensor(preds) else: output = (torch.as_tensor(preds), torch.as_tensor(labels)) @@ -337,10 +351,10 @@ def result(self): class WrapMXNetMetric(BaseMetric): """The wrapper of Metric class for MXNet.""" - + def update(self, preds, labels=None, sample_weight=None): """Convert the prediction to MXNet array. - + Args: preds: The prediction result. labels: The reference. Defaults to None. @@ -356,19 +370,20 @@ def reset(self): def result(self): """Evaluate the difference between predictions and labels. - + Returns: acc: The evaluated result. """ acc_name, acc = self._metric.get() return acc + class WrapONNXRTMetric(BaseMetric): """The wrapper of Metric class for ONNXRT.""" - + def update(self, preds, labels=None, sample_weight=None): """Convert the prediction to NumPy array. - + Args: preds: The prediction result. labels: The reference. Defaults to None. @@ -384,13 +399,14 @@ def reset(self): def result(self): """Evaluate the difference between predictions and labels. - + Returns: acc: The evaluated result. """ acc_name, acc = self._metric_cls.result() return acc + def _topk_shape_validate(preds, labels): # preds shape can be Nxclass_num or class_num(N=1 by default) # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax @@ -433,16 +449,17 @@ def _topk_shape_validate(preds, labels): class_num = preds.shape[1] label_N = labels.shape[0] - assert label_N == N, 'labels batch size should same with preds' + assert label_N == N, "labels batch size should same with preds" labels = labels.reshape([N, -1]) # one-hot labels will have 2 dimension not equal 1 if labels.shape[1] != 1: labels = labels.argsort()[..., -1:] return preds, labels + def _shape_validate(preds, labels): - assert type(preds) in [int, list, np.ndarray], 'preds must be in int or list, ndarray' - assert type(labels) in [int, list, np.ndarray], 'labels must be in int or list, ndarray' + assert type(preds) in [int, list, np.ndarray], "preds must be in int or list, ndarray" + assert type(labels) in [int, list, np.ndarray], "labels must be in int or list, ndarray" if isinstance(preds, int): preds = [np.array([preds])] elif isinstance(preds[0], int): @@ -455,25 +472,26 @@ def _shape_validate(preds, labels): labels = [np.array(labels)] else: labels = [np.array(label) for label in labels] - for (pred, label) in zip(preds, labels): - assert pred.shape == label.shape, \ - 'Shape mismatch, label shape {} vs pred shape {}'.format(label.shape, pred.shape) + for pred, label in zip(preds, labels): + assert pred.shape == label.shape, "Shape mismatch, label shape {} vs pred shape {}".format( + label.shape, pred.shape + ) return preds, labels -@metric_registry('F1', 'tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("F1", "tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") class F1(BaseMetric): """F1 score of a binary classification problem. - - The F1 score is the harmonic mean of the precision and recall. - It can be computed with the equation: + + The F1 score is the harmonic mean of the precision and recall. + It can be computed with the equation: F1 = 2 * (precision * recall) / (precision + recall) """ - + def __init__(self): """Initialize the F1 score list.""" self._score_list = [] - + def update(self, preds, labels): """Add the predictions and labels. @@ -482,7 +500,8 @@ def update(self, preds, labels): labels: The labels corresponding to the predictions. """ from .f1 import f1_score - if getattr(self, '_hvd', None) is not None: + + if getattr(self, "_hvd", None) is not None: gathered_preds_list = self._hvd.allgather_object(preds) gathered_labels_list = self._hvd.allgather_object(labels) temp_preds_list, temp_labels_list = [], [] @@ -502,6 +521,7 @@ def result(self): """Compute the F1 score.""" return np.array(self._score_list).mean() + def _accuracy_shape_check(preds, labels): """Check and conver the shape of predictions and labels. @@ -519,13 +539,15 @@ def _accuracy_shape_check(preds, labels): if isinstance(labels, int): labels = [labels] labels = np.array(labels) - if len(labels.shape) != len(preds.shape) and len(labels.shape)+1 != len(preds.shape): + if len(labels.shape) != len(preds.shape) and len(labels.shape) + 1 != len(preds.shape): raise ValueError( - 'labels must have shape of (batch_size, ..) and preds must have' - 'shape of (batch_size, num_classes, ...) or (batch_size, ..),' - 'but given {} and {}.'.format(labels.shape, preds.shape)) + "labels must have shape of (batch_size, ..) and preds must have" + "shape of (batch_size, num_classes, ...) or (batch_size, ..)," + "but given {} and {}.".format(labels.shape, preds.shape) + ) return preds, labels + def _accuracy_type_check(preds, labels): """Determine the type of prediction. @@ -536,24 +558,24 @@ def _accuracy_type_check(preds, labels): Returns: update_type: The type of predictions. """ - if len(preds.shape) == len(labels.shape)+1: + if len(preds.shape) == len(labels.shape) + 1: num_classes = preds.shape[1] if num_classes == 1: - update_type = 'binary' + update_type = "binary" else: - update_type = 'multiclass' + update_type = "multiclass" elif len(preds.shape) == len(labels.shape): - if len(preds.shape) == 1 or preds.shape[1] ==1: - update_type = 'binary' + if len(preds.shape) == 1 or preds.shape[1] == 1: + update_type = "binary" else: - update_type = 'multilabel' + update_type = "multilabel" return update_type -@metric_registry('Accuracy', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("Accuracy", "tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops") class Accuracy(BaseMetric): """The Accuracy for the classification tasks. - + The accuracy score is the proportion of the total number of predictions that were correct classified. @@ -562,7 +584,7 @@ class Accuracy(BaseMetric): label_list: List of labels to score. sample: The total number of samples. """ - + def __init__(self): """Initialize predictions, labels and sample.""" self.pred_list = [] @@ -579,25 +601,25 @@ def update(self, preds, labels, sample_weight=None): """ preds, labels = _accuracy_shape_check(preds, labels) update_type = _accuracy_type_check(preds, labels) - if update_type == 'binary': + if update_type == "binary": self.pred_list.extend(preds) self.label_list.extend(labels) self.sample += labels.shape[0] - elif update_type == 'multiclass': - self.pred_list.extend(np.argmax(preds, axis=1).astype('int32')) + elif update_type == "multiclass": + self.pred_list.extend(np.argmax(preds, axis=1).astype("int32")) self.label_list.extend(labels) self.sample += labels.shape[0] - elif update_type == 'multilabel': - #(N, C, ...) -> (N*..., C) + elif update_type == "multilabel": + # (N, C, ...) -> (N*..., C) num_label = preds.shape[1] last_dim = len(preds.shape) - if last_dim-1 != 1: + if last_dim - 1 != 1: trans_list = [0] trans_list.extend(list(range(2, len(preds.shape)))) trans_list.extend([1]) preds = preds.transpose(trans_list).reshape(-1, num_label) labels = labels.transpose(trans_list).reshape(-1, num_label) - self.sample += preds.shape[0]*preds.shape[1] + self.sample += preds.shape[0] * preds.shape[1] self.pred_list.append(preds) self.label_list.append(labels) @@ -609,25 +631,27 @@ def reset(self): def result(self): """Compute the accuracy.""" - correct_num = np.sum( - np.array(self.pred_list) == np.array(self.label_list)) - if getattr(self, '_hvd', None) is not None: # pragma: no cover + correct_num = np.sum(np.array(self.pred_list) == np.array(self.label_list)) + if getattr(self, "_hvd", None) is not None: # pragma: no cover allghter_correct_num = sum(self._hvd.allgather_object(correct_num)) allgather_sample = sum(self._hvd.allgather_object(self.sample)) return allghter_correct_num / allgather_sample return correct_num / self.sample -class PyTorchLoss(): +class PyTorchLoss: """A dummy PyTorch Metric. - + A dummy metric that computes the average of predictions and prints it directly. """ - + def __init__(self): - """Initialize the number of examples, sum of prediction. and device.""" + """Initialize the number of examples, sum of prediction. + + and device. + """ self._num_examples = 0 - self._device = torch.device('cpu') + self._device = torch.device("cpu") self._sum = torch.tensor(0.0, device=self._device) def reset(self): @@ -656,22 +680,24 @@ def compute(self): The dummy loss. """ if self._num_examples == 0: - raise ValueError("Loss must have at least one example \ - before it can be computed.") + raise ValueError( + "Loss must have at least one example \ + before it can be computed." + ) return self._sum.item() / self._num_examples - - -@metric_registry('Loss', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') + + +@metric_registry("Loss", "tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops") class Loss(BaseMetric): """A dummy Metric. - + A dummy metric that computes the average of predictions and prints it directly. - + Attributes: sample: The number of samples. sum: The sum of prediction. """ - + def __init__(self): """Initialize the number of samples, sum of prediction.""" self.sample = 0 @@ -696,36 +722,36 @@ def reset(self): def result(self): """Compute the average of predictions. - + Returns: The dummy loss. """ - if getattr(self, '_hvd', None) is not None: # pragma: no cover + if getattr(self, "_hvd", None) is not None: # pragma: no cover allgather_sum = sum(self._hvd.allgather_object(self.sum)) allgather_sample = sum(self._hvd.allgather_object(self.sample)) return allgather_sum / allgather_sample return self.sum / self.sample -@metric_registry('MAE', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("MAE", "tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops") class MAE(BaseMetric): """Computes Mean Absolute Error (MAE) loss. - - Mean Absolute Error (MAE) is the mean of the magnitude of + + Mean Absolute Error (MAE) is the mean of the magnitude of difference between the predicted and actual numeric values. - + Attributes: pred_list: List of prediction to score. label_list: List of references corresponding to the prediction result. - compare_label (bool): Whether to compare label. False if there are no + compare_label (bool): Whether to compare label. False if there are no labels and will use FP32 preds as labels. """ - + def __init__(self, compare_label=True): """Initialize the list of prediction and labels. Args: - compare_label: Whether to compare label. False if there are no + compare_label: Whether to compare label. False if there are no labels and will use FP32 preds as labels. """ self.label_list = [] @@ -755,25 +781,24 @@ def result(self): Returns: The MAE score. """ - aes = [abs(a-b) for (a,b) in zip(self.label_list, self.pred_list)] + aes = [abs(a - b) for (a, b) in zip(self.label_list, self.pred_list)] aes_sum = sum([np.sum(ae) for ae in aes]) aes_size = sum([ae.size for ae in aes]) assert aes_size, "predictions shouldn't be none" - if getattr(self, '_hvd', None) is not None: # pragma: no cover + if getattr(self, "_hvd", None) is not None: # pragma: no cover aes_sum = sum(self._hvd.allgather_object(aes_sum)) - aes_size = sum(self._hvd.allgather_object(aes_size)) + aes_size = sum(self._hvd.allgather_object(aes_size)) return aes_sum / aes_size -@metric_registry('RMSE', 'tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("RMSE", "tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") class RMSE(BaseMetric): """Computes Root Mean Squared Error (RMSE) loss. - + Attributes: mse: The instance of MSE Metric. - """ - + def __init__(self, compare_label=True): """Initialize the mse. @@ -803,32 +828,31 @@ def result(self): Returns: The RMSE score. """ - if getattr(self, '_hvd', None) is not None: # pragma: no cover + if getattr(self, "_hvd", None) is not None: # pragma: no cover self.mse._hvd = self._hvd return np.sqrt(self.mse.result()) - -@metric_registry('MSE', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("MSE", "tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops") class MSE(BaseMetric): """Computes Mean Squared Error (MSE) loss. - + Mean Squared Error(MSE) represents the average of the squares of errors. For example, the average squared difference between the estimated values and the actual values. - + Attributes: pred_list: List of prediction to score. label_list: List of references corresponding to the prediction result. compare_label (bool): Whether to compare label. False if there are no labels and will use FP32 preds as labels. """ - + def __init__(self, compare_label=True): """Initialize the list of prediction and labels. Args: - compare_label: Whether to compare label. False if there are no + compare_label: Whether to compare label. False if there are no labels and will use FP32 preds as labels. """ self.label_list = [] @@ -858,17 +882,17 @@ def result(self): Returns: The MSE score. """ - squares = [(a-b)**2.0 for (a,b) in zip(self.label_list, self.pred_list)] + squares = [(a - b) ** 2.0 for (a, b) in zip(self.label_list, self.pred_list)] squares_sum = sum([np.sum(square) for square in squares]) squares_size = sum([square.size for square in squares]) assert squares_size, "predictions should't be None" - if getattr(self, '_hvd', None) is not None: # pragma: no cover + if getattr(self, "_hvd", None) is not None: # pragma: no cover squares_sum = sum(self._hvd.allgather_object(squares_sum)) - squares_size = sum(self._hvd.allgather_object(squares_size)) + squares_size = sum(self._hvd.allgather_object(squares_size)) return squares_sum / squares_size -@metric_registry('topk', 'tensorflow, tensorflow_itex') +@metric_registry("topk", "tensorflow, tensorflow_itex") class TensorflowTopK(BaseMetric): """Compute Top-k Accuracy classification score for Tensorflow model. @@ -903,13 +927,14 @@ def update(self, preds, labels, sample_weight=None): labels = labels.reshape([len(labels)]) with tf.Graph().as_default() as acc_graph: - topk = tf.nn.in_top_k(predictions=tf.constant(preds, dtype=tf.float32), - targets=tf.constant(labels, dtype=tf.int32), k=self.k) - fp32_topk = tf.cast(topk, tf.float32) - correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) - with tf.compat.v1.Session() as acc_sess: - correct = acc_sess.run(correct_tensor) + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) self.num_sample += len(labels) self.num_correct += correct @@ -928,26 +953,26 @@ def result(self): if self.num_sample == 0: logger.warning("Sample num during evaluation is 0.") return 0 - elif getattr(self, '_hvd', None) is not None: # pragma: no cover + elif getattr(self, "_hvd", None) is not None: # pragma: no cover allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) - return allgather_num_correct / allgather_num_sample + return allgather_num_correct / allgather_num_sample return self.num_correct / self.num_sample -@metric_registry('topk', 'pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("topk", "pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") class GeneralTopK(BaseMetric): """Compute Top-k Accuracy classification score. - + This metric computes the number of times where the correct label is among the top k labels predicted. - + Attributes: k (int): The number of most likely outcomes considered to find the correct label. num_correct: The number of predictions that were correct classified. num_sample: The total number of predictions. """ - + def __init__(self, k=1): """Initialize the k, number of samples and correct predictions. @@ -967,7 +992,7 @@ def update(self, preds, labels, sample_weight=None): sample_weight: The sample weight. """ preds, labels = _topk_shape_validate(preds, labels) - preds = preds.argsort()[..., -self.k:] + preds = preds.argsort()[..., -self.k :] if self.k == 1: correct = accuracy_score(preds, labels, normalize=False) self.num_correct += correct @@ -976,7 +1001,7 @@ def update(self, preds, labels, sample_weight=None): for p, l in zip(preds, labels): # get top-k labels with np.argpartition # p = np.argpartition(p, -self.k)[-self.k:] - l = l.astype('int32') + l = l.astype("int32") if l in p: self.num_correct += 1 @@ -996,23 +1021,25 @@ def result(self): if self.num_sample == 0: logger.warning("Sample num during evaluation is 0.") return 0 - elif getattr(self, '_hvd', None) is not None: # pragma: no cover + elif getattr(self, "_hvd", None) is not None: # pragma: no cover allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) return allgather_num_correct / allgather_num_sample return self.num_correct / self.num_sample - -@metric_registry('COCOmAPv2', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("COCOmAPv2", "tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops") class COCOmAPv2(BaseMetric): """Compute mean average precision of the detection task.""" - def __init__(self, - anno_path=None, - iou_thrs='0.5:0.05:0.95', - map_points=101, - map_key='DetectionBoxes_Precision/mAP', - output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + def __init__( + self, + anno_path=None, + iou_thrs="0.5:0.05:0.95", + map_points=101, + map_key="DetectionBoxes_Precision/mAP", + output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2}, + ): """Initialize the metric. Args: @@ -1020,22 +1047,25 @@ def __init__(self, iou_thrs: Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. - map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. - map_key: The key that mapping to pycocotools COCOeval. + map_key: The key that mapping to pycocotools COCOeval. Defaults to 'DetectionBoxes_Precision/mAP'. - output_index_mapping: The output index mapping. + output_index_mapping: The output index mapping. Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. """ self.output_index_mapping = output_index_mapping from .coco_label_map import category_map + if anno_path: import os + import yaml - assert os.path.exists(anno_path), 'Annotation path does not exists!' - with open(anno_path, 'r') as f: + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: label_map = yaml.safe_load(f.read()) - self.category_map_reverse = {k: v for k,v in label_map.items()} + self.category_map_reverse = {k: v for k, v in label_map.items()} else: # label: index self.category_map_reverse = {v: k for k, v in category_map.items()} @@ -1044,8 +1074,7 @@ def __init__(self, self.detection_list = [] self.annotation_id = 1 self.category_map = category_map - self.category_id_set = set( - [cat for cat in self.category_map]) #index + self.category_id_set = set([cat for cat in self.category_map]) # index self.iou_thrs = iou_thrs self.map_points = map_points self.map_key = map_key @@ -1058,69 +1087,65 @@ def update(self, predicts, labels, sample_weight=None): labels: The labels corresponding to the predictions. sample_weight: The sample weight. Defaults to None. """ - from .coco_tools import ExportSingleImageGroundtruthToCoco,\ - ExportSingleImageDetectionBoxesToCoco + from .coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + detections = [] - if 'num_detections' in self.output_index_mapping and \ - self.output_index_mapping['num_detections'] > -1: + if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1: for item in zip(*predicts): detection = {} - num = int(item[self.output_index_mapping['num_detections']]) - detection['boxes'] = np.asarray( - item[self.output_index_mapping['boxes']])[0:num] - detection['scores'] = np.asarray( - item[self.output_index_mapping['scores']])[0:num] - detection['classes'] = np.asarray( - item[self.output_index_mapping['classes']])[0:num] + num = int(item[self.output_index_mapping["num_detections"]]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num] + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num] + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num] detections.append(detection) else: for item in zip(*predicts): detection = {} - detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) - detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) - detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]]) + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]]) + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]]) detections.append(detection) - bboxes, str_labels,int_labels, image_ids = labels + bboxes, str_labels, int_labels, image_ids = labels labels = [] if len(int_labels[0]) == 0: for str_label in str_labels: - str_label = [ - x if type(x) == 'str' else x.decode('utf-8') - for x in str_label - ] + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] labels.append([self.category_map_reverse[x] for x in str_label]) elif len(str_labels[0]) == 0: for int_label in int_labels: labels.append([x for x in int_label]) for idx, image_id in enumerate(image_ids): - image_id = image_id if type( - image_id) == 'str' else image_id.decode('utf-8') + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") if image_id in self.image_ids: continue self.image_ids.append(image_id) ground_truth = {} - ground_truth['boxes'] = np.asarray(bboxes[idx]) - ground_truth['classes'] = np.asarray(labels[idx]) + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) self.ground_truth_list.extend( ExportSingleImageGroundtruthToCoco( image_id=image_id, next_annotation_id=self.annotation_id, category_id_set=self.category_id_set, - groundtruth_boxes=ground_truth['boxes'], - groundtruth_classes=ground_truth['classes'])) - self.annotation_id += ground_truth['boxes'].shape[0] + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] self.detection_list.extend( ExportSingleImageDetectionBoxesToCoco( image_id=image_id, category_id_set=self.category_id_set, - detection_boxes=detections[idx]['boxes'], - detection_scores=detections[idx]['scores'], - detection_classes=detections[idx]['classes'])) + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) def reset(self): """Reset the prediction and labels.""" @@ -1135,69 +1160,63 @@ def result(self): Returns: The mean average precision score. """ - from .coco_tools import COCOWrapper, COCOEvalWrapper + from .coco_tools import COCOEvalWrapper, COCOWrapper + if len(self.ground_truth_list) == 0: logger.warning("Sample num during evaluation is 0.") return 0 else: groundtruth_dict = { - 'annotations': - self.ground_truth_list, - 'images': [{ - 'id': image_id - } for image_id in self.image_ids], - 'categories': [{ - 'id': k, - 'name': v - } for k, v in self.category_map.items()] + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], } coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) - coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( - self.detection_list) - box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, - coco_wrapped_detections, - agnostic_mode=False, - iou_thrs = self.iou_thrs, - map_points = self.map_points) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( - include_metrics_per_category=False, all_metrics_per_category=False) + include_metrics_per_category=False, all_metrics_per_category=False + ) box_metrics.update(box_per_category_ap) - box_metrics = { - 'DetectionBoxes_' + key: value - for key, value in iter(box_metrics.items()) - } + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} return box_metrics[self.map_key] -@metric_registry('mAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("mAP", "tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops") class TensorflowMAP(BaseMetric): """Computes mean average precision.""" - - def __init__(self, - anno_path=None, - iou_thrs=0.5, - map_points=0, - map_key='DetectionBoxes_Precision/mAP'): + + def __init__(self, anno_path=None, iou_thrs=0.5, map_points=0, map_key="DetectionBoxes_Precision/mAP"): """Initialize the metric. - + Args: anno_path: The path of annotation file. iou_thrs: Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. - map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. - map_key: The key that mapping to pycocotools COCOeval. + map_key: The key that mapping to pycocotools COCOeval. Defaults to 'DetectionBoxes_Precision/mAP'. """ from .coco_label_map import category_map + if anno_path: import os + import yaml - assert os.path.exists(anno_path), 'Annotation path does not exists!' - with open(anno_path, 'r') as f: + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: label_map = yaml.safe_load(f.read()) - self.category_map_reverse = {k: v for k,v in label_map.items()} + self.category_map_reverse = {k: v for k, v in label_map.items()} else: # label: index self.category_map_reverse = {v: k for k, v in category_map.items()} @@ -1206,13 +1225,11 @@ def __init__(self, self.detection_list = [] self.annotation_id = 1 self.category_map = category_map - self.category_id_set = set( - [cat for cat in self.category_map]) #index + self.category_id_set = set([cat for cat in self.category_map]) # index self.iou_thrs = iou_thrs self.map_points = map_points self.map_key = map_key - def update(self, predicts, labels, sample_weight=None): """Add the predictions and labels. @@ -1221,70 +1238,70 @@ def update(self, predicts, labels, sample_weight=None): labels: The labels corresponding to the predictions. sample_weight: The sample weight. """ - if getattr(self, '_hvd', None) is not None: # pragma: no cover + if getattr(self, "_hvd", None) is not None: # pragma: no cover raise NotImplementedError("Metric TensorflowMAP currently do not support distribued inference.") - from .coco_tools import ExportSingleImageGroundtruthToCoco,\ - ExportSingleImageDetectionBoxesToCoco + from .coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + detections = [] if len(predicts) == 3: for bbox, score, cls in zip(*predicts): detection = {} - detection['boxes'] = np.asarray(bbox) - detection['scores'] = np.asarray(score) - detection['classes'] = np.asarray(cls) + detection["boxes"] = np.asarray(bbox) + detection["scores"] = np.asarray(score) + detection["classes"] = np.asarray(cls) detections.append(detection) elif len(predicts) == 4: for num, bbox, score, cls in zip(*predicts): detection = {} num = int(num) - detection['boxes'] = np.asarray(bbox)[0:num] - detection['scores'] = np.asarray(score)[0:num] - detection['classes'] = np.asarray(cls)[0:num] + detection["boxes"] = np.asarray(bbox)[0:num] + detection["scores"] = np.asarray(score)[0:num] + detection["classes"] = np.asarray(cls)[0:num] detections.append(detection) else: raise ValueError("Unsupported prediction format!") - bboxes, str_labels,int_labels, image_ids = labels + bboxes, str_labels, int_labels, image_ids = labels labels = [] if len(int_labels[0]) == 0: for str_label in str_labels: - str_label = [ - x if type(x) == 'str' else x.decode('utf-8') - for x in str_label - ] + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] labels.append([self.category_map_reverse[x] for x in str_label]) elif len(str_labels[0]) == 0: for int_label in int_labels: labels.append([x for x in int_label]) for idx, image_id in enumerate(image_ids): - image_id = image_id if type( - image_id) == 'str' else image_id.decode('utf-8') + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") if image_id in self.image_ids: continue self.image_ids.append(image_id) ground_truth = {} - ground_truth['boxes'] = np.asarray(bboxes[idx]) - ground_truth['classes'] = np.asarray(labels[idx]) + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) self.ground_truth_list.extend( ExportSingleImageGroundtruthToCoco( image_id=image_id, next_annotation_id=self.annotation_id, category_id_set=self.category_id_set, - groundtruth_boxes=ground_truth['boxes'], - groundtruth_classes=ground_truth['classes'])) - self.annotation_id += ground_truth['boxes'].shape[0] + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] self.detection_list.extend( ExportSingleImageDetectionBoxesToCoco( image_id=image_id, category_id_set=self.category_id_set, - detection_boxes=detections[idx]['boxes'], - detection_scores=detections[idx]['scores'], - detection_classes=detections[idx]['classes'])) + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) def reset(self): """Reset the prediction and labels.""" @@ -1299,49 +1316,40 @@ def result(self): Returns: The mean average precision score. """ - from .coco_tools import COCOWrapper, COCOEvalWrapper + from .coco_tools import COCOEvalWrapper, COCOWrapper + if len(self.ground_truth_list) == 0: logger.warning("Sample num during evaluation is 0.") return 0 else: groundtruth_dict = { - 'annotations': - self.ground_truth_list, - 'images': [{ - 'id': image_id - } for image_id in self.image_ids], - 'categories': [{ - 'id': k, - 'name': v - } for k, v in self.category_map.items()] + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], } coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) - coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( - self.detection_list) - box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, - coco_wrapped_detections, - agnostic_mode=False, - iou_thrs = self.iou_thrs, - map_points = self.map_points) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( - include_metrics_per_category=False, all_metrics_per_category=False) + include_metrics_per_category=False, all_metrics_per_category=False + ) box_metrics.update(box_per_category_ap) - box_metrics = { - 'DetectionBoxes_' + key: value - for key, value in iter(box_metrics.items()) - } + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} return box_metrics[self.map_key] -@metric_registry('COCOmAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("COCOmAP", "tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops") class TensorflowCOCOMAP(TensorflowMAP): """Computes mean average precision using algorithm in COCO.""" - - def __init__(self, - anno_path=None, - iou_thrs=None, - map_points=None, - map_key='DetectionBoxes_Precision/mAP'): + + def __init__(self, anno_path=None, iou_thrs=None, map_points=None, map_key="DetectionBoxes_Precision/mAP"): """Initialize the iou threshold and max points. Args: @@ -1349,24 +1357,21 @@ def __init__(self, iou_thrs: Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. - map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. - map_key: The key that mapping to pycocotools COCOeval. + map_key: The key that mapping to pycocotools COCOeval. Defaults to 'DetectionBoxes_Precision/mAP'. """ super(TensorflowCOCOMAP, self).__init__(anno_path, iou_thrs, map_points, map_key) - self.iou_thrs = '0.5:0.05:0.95' + self.iou_thrs = "0.5:0.05:0.95" self.map_points = 101 -@metric_registry('VOCmAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("VOCmAP", "tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops") class TensorflowVOCMAP(TensorflowMAP): """Computes mean average precision using algorithm in VOC.""" - - def __init__(self, - anno_path=None, - iou_thrs=None, - map_points=None, - map_key='DetectionBoxes_Precision/mAP'): + + def __init__(self, anno_path=None, iou_thrs=None, map_points=None, map_key="DetectionBoxes_Precision/mAP"): """Initialize the iou threshold and max points. Args: @@ -1374,9 +1379,9 @@ def __init__(self, iou_thrs: Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. - map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. - map_key: The key that mapping to pycocotools COCOeval. + map_key: The key that mapping to pycocotools COCOeval. Defaults to 'DetectionBoxes_Precision/mAP'. """ super(TensorflowVOCMAP, self).__init__(anno_path, iou_thrs, map_points, map_key) @@ -1384,14 +1389,14 @@ def __init__(self, self.map_points = 0 -@metric_registry('SquadF1', 'tensorflow, tensorflow_itex') +@metric_registry("SquadF1", "tensorflow, tensorflow_itex") class SquadF1(BaseMetric): """Evaluate for v1.1 of the SQuAD dataset.""" - + def __init__(self): """Initialize the score list.""" - self._score_list = [] # squad metric only work when all data preds collected - + self._score_list = [] # squad metric only work when all data preds collected + def update(self, preds, labels, sample_weight=None): """Add the predictions and labels. @@ -1402,7 +1407,8 @@ def update(self, preds, labels, sample_weight=None): """ if preds: from .evaluate_squad import evaluate - if getattr(self, '_hvd', None) is not None: # pragma: no cover + + if getattr(self, "_hvd", None) is not None: # pragma: no cover gathered_preds_list = self._hvd.allgather_object(preds) gathered_labels_list = self._hvd.allgather_object(labels) temp_preds_list, temp_labels_list = [], [] @@ -1413,21 +1419,22 @@ def update(self, preds, labels, sample_weight=None): labels = temp_labels_list result = evaluate(labels, preds) self._score_list.append(result["f1"]) - + def reset(self): - """Reset the score list.""" - self._score_list = [] - + """Reset the score list.""" + self._score_list = [] + def result(self): """Compute F1 score.""" if len(self._score_list) == 0: - return 0. + return 0.0 return np.array(self._score_list).mean() - -@metric_registry('mIOU', 'tensorflow, tensorflow_itex') + + +@metric_registry("mIOU", "tensorflow, tensorflow_itex") class mIOU(BaseMetric): """Compute the mean IOU(Intersection over Union) score.""" - + def __init__(self, num_classes=21): """Initialize the number of classes. @@ -1448,19 +1455,18 @@ def update(self, preds, labels): labels = labels.flatten() p_dtype = preds.dtype l_dtype = labels.dtype - if getattr(self, '_hvd', None) is not None: # pragma: no cover + if getattr(self, "_hvd", None) is not None: # pragma: no cover preds = self._hvd.allgather_object(preds) labels = self._hvd.allgather_object(labels) - preds_list, labels_list = np.array([], dtype = p_dtype), np.array([], dtype = l_dtype) + preds_list, labels_list = np.array([], dtype=p_dtype), np.array([], dtype=l_dtype) for i in range(self._hvd.size()): preds_list = np.append(preds_list, preds[i]) labels_list = np.append(labels_list, labels[i]) preds, labels = preds_list, labels_list mask = (labels >= 0) & (labels < self.num_classes) self.hist += np.bincount( - self.num_classes * labels[mask].astype(int) + - preds[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, - self.num_classes) + self.num_classes * labels[mask].astype(int) + preds[mask], minlength=self.num_classes**2 + ).reshape(self.num_classes, self.num_classes) def reset(self): """Reset the hist.""" @@ -1472,24 +1478,23 @@ def result(self): Returns: The mean IOU score. """ - iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - - np.diag(self.hist)) + iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist)) mean_iu = np.nanmean(iu) return mean_iu -@metric_registry('GLUE', 'onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("GLUE", "onnxrt_qlinearops, onnxrt_integerops") class ONNXRTGLUE(BaseMetric): """Compute the GLUE score.""" - - def __init__(self, task='mrpc'): + + def __init__(self, task="mrpc"): """Initialize the metric. Args: task:The name of the task (Choices: mrpc, qqp, qnli, rte, sts-b, cola, mnli, wnli.). """ - assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli", "sst-2"], "Unsupported task type" self.pred_list = None self.label_list = None self.task = task @@ -1502,7 +1507,7 @@ def __init__(self, task='mrpc'): "qnli": "acc", "rte": "acc", "wnli": "acc", - "sst-2": "acc" + "sst-2": "acc", } def update(self, preds, labels): @@ -1512,7 +1517,7 @@ def update(self, preds, labels): preds: The predictions. labels: The labels corresponding to the predictions. """ - if getattr(self, '_hvd', None) is not None: + if getattr(self, "_hvd", None) is not None: raise NotImplementedError("Metric ONNXRTGLUE currently do not support distribued inference.") if isinstance(preds, list) and len(preds) == 1: preds = preds[0] @@ -1536,23 +1541,23 @@ def result(self): if output_mode == "classification": processed_preds = np.argmax(self.pred_list, axis=1) - elif output_mode == "regression": # pragma: no cover + elif output_mode == "regression": # pragma: no cover processed_preds = np.squeeze(self.pred_list) - result = transformers.glue_compute_metrics(\ - self.task, processed_preds, self.label_list) + result = transformers.glue_compute_metrics(self.task, processed_preds, self.label_list) return result[self.return_key[self.task]] -@metric_registry('ROC', 'pytorch') + +@metric_registry("ROC", "pytorch") class ROC(BaseMetric): """Computes ROC score.""" - - def __init__(self, task='dlrm'): + + def __init__(self, task="dlrm"): """Initialize the metric. Args: task:The name of the task (Choices: dlrm, dien, wide_deep.). """ - assert task in ['dlrm', 'dien', 'wide_deep'], 'Unsupported task type' + assert task in ["dlrm", "dien", "wide_deep"], "Unsupported task type" self.pred_list = None self.label_list = None self.task = task @@ -1588,6 +1593,7 @@ def reset(self): def result(self): """Compute the ROC score.""" import sklearn.metrics + scores = np.squeeze(self.pred_list) targets = np.squeeze(self.label_list) roc_auc = sklearn.metrics.roc_auc_score(targets, scores) diff --git a/neural_compressor/experimental/mixed_precision.py b/neural_compressor/experimental/mixed_precision.py index 448e3bab6a8..6f61a43dbc4 100644 --- a/neural_compressor/experimental/mixed_precision.py +++ b/neural_compressor/experimental/mixed_precision.py @@ -20,16 +20,19 @@ import pickle import random import sys + import numpy as np + from ..conf.config import MixedPrecision_Conf -from ..conf.pythonic_config import Config from ..conf.dotdict import deep_get -from .strategy import EXP_STRATEGIES +from ..conf.pythonic_config import Config +from ..model import BaseModel from ..utils import logger from ..utils.create_obj_from_config import create_dataloader from ..utils.utility import CpuInfo, time_limit -from ..model import BaseModel from .graph_optimization import GraphOptimization +from .strategy import EXP_STRATEGIES + class MixedPrecision(GraphOptimization): """Class used for generating low precision model. @@ -50,7 +53,7 @@ def __init__(self, conf_fname_or_obj=None): self._model = None self._eval_dataloader = None self._eval_func = None - self._precisions = 'fp32' + self._precisions = "fp32" self._input = None self._output = None self.conf = None @@ -63,10 +66,10 @@ def __init__(self, conf_fname_or_obj=None): else: self.conf = MixedPrecision_Conf(conf_fname_or_obj) cfg = self.conf.usr_cfg - if cfg.model.framework != 'NA': + if cfg.model.framework != "NA": self.framework = cfg.model.framework.lower() - cfg.tuning.strategy.name = 'automixedprecision' + cfg.tuning.strategy.name = "automixedprecision" seed = cfg.tuning.random_seed random.seed(seed) np.random.seed(seed) @@ -97,51 +100,52 @@ def __call__(self): Returns: converted model: best converted model found, otherwise return None - """ - assert isinstance(self._model, BaseModel), 'need set your Model for mixed precision....' - if 'onnx' in self.framework and 'bf16' in self._precisions: + assert isinstance(self._model, BaseModel), "need set your Model for mixed precision...." + if "onnx" in self.framework and "bf16" in self._precisions: logger.warning("Mixed precision doesn't support bf16 for ONNX models.") - self._precisions.remove('bf16') - - if 'bf16' in self._precisions and not CpuInfo().bf16: # pragma: no cover - if os.getenv('FORCE_BF16') == '1': - logger.warning("Mixed precision will generate bf16 graph although " \ - "the hardware doesn't support bf16 instruction.") + self._precisions.remove("bf16") + + if "bf16" in self._precisions and not CpuInfo().bf16: # pragma: no cover + if os.getenv("FORCE_BF16") == "1": + logger.warning( + "Mixed precision will generate bf16 graph although " + "the hardware doesn't support bf16 instruction." + ) else: - logger.warning("Mixed precision exits due to the hardware " \ - "doesn't support bf16 instruction.") - self._precisions.remove('bf16') - - if 'fp16' in self._precisions and 'gpu' not in self.conf.usr_cfg.device: - if os.getenv('FORCE_FP16') == '1': - logger.warning("Mixed precision will generate fp16 graph although " \ - "the hardware doesn't support fp16 instruction.") + logger.warning("Mixed precision exits due to the hardware " "doesn't support bf16 instruction.") + self._precisions.remove("bf16") + + if "fp16" in self._precisions and "gpu" not in self.conf.usr_cfg.device: + if os.getenv("FORCE_FP16") == "1": + logger.warning( + "Mixed precision will generate fp16 graph although " + "the hardware doesn't support fp16 instruction." + ) else: - logger.warning("Mixed precision exits due to the hardware " \ - "doesn't support fp16 instruction.") - self._precisions.remove('fp16') + logger.warning("Mixed precision exits due to the hardware " "doesn't support fp16 instruction.") + self._precisions.remove("fp16") - if self._precisions == ['fp32'] or len(self._precisions) == 0: + if self._precisions == ["fp32"] or len(self._precisions) == 0: sys.exit(0) cfg = self.conf.usr_cfg - if self.framework == 'tensorflow': + if self.framework == "tensorflow": self._model.name = cfg.model.name - self._model.output_tensor_names = cfg.model.outputs if \ - not self._output else self._output - self._model.input_tensor_names = cfg.model.inputs if \ - not self._input else self._input + self._model.output_tensor_names = cfg.model.outputs if not self._output else self._output + self._model.input_tensor_names = cfg.model.inputs if not self._input else self._input self._model.workspace_path = cfg.tuning.workspace.path - if 'bf16' in self._precisions or \ - (cfg.mixed_precision and 'bf16' in cfg.mixed_precision.precisions) or \ - (cfg.graph_optimization and 'bf16' in cfg.graph_optimization.precisions): + if ( + "bf16" in self._precisions + or (cfg.mixed_precision and "bf16" in cfg.mixed_precision.precisions) + or (cfg.graph_optimization and "bf16" in cfg.graph_optimization.precisions) + ): cfg.use_bf16 = True # when eval_func is set, will be directly used and eval_dataloader can be None if self._eval_func is None: if self._eval_dataloader is None: - eval_dataloader_cfg = deep_get(cfg, 'evaluation.accuracy.dataloader') + eval_dataloader_cfg = deep_get(cfg, "evaluation.accuracy.dataloader") if eval_dataloader_cfg is None: self._eval_func = None else: @@ -154,43 +158,43 @@ def __call__(self): _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(cfg.tuning.workspace.resume)) \ - if cfg.tuning.workspace and cfg.tuning.workspace.resume else None - if self.resume_file: # pragma: no cover - assert os.path.exists(self.resume_file), \ - "The specified resume file {} doesn't exist!".format(self.resume_file) - with open(self.resume_file, 'rb') as f: + self.resume_file = ( + os.path.abspath(os.path.expanduser(cfg.tuning.workspace.resume)) + if cfg.tuning.workspace and cfg.tuning.workspace.resume + else None + ) + if self.resume_file: # pragma: no cover + assert os.path.exists(self.resume_file), "The specified resume file {} doesn't exist!".format( + self.resume_file + ) + with open(self.resume_file, "rb") as f: _resume = pickle.load(f).__dict__ self.strategy = EXP_STRATEGIES[strategy]( - self._model, - self.conf, - None, - None, - self._eval_dataloader, - self._eval_func, - _resume) + self._model, self.conf, None, None, self._eval_dataloader, self._eval_func, _resume + ) try: with time_limit(self.conf.usr_cfg.tuning.exit_policy.timeout): self.strategy.traverse() - except KeyboardInterrupt: # pragma: no cover + except KeyboardInterrupt: # pragma: no cover pass - except Exception as e: # pragma: no cover + except Exception as e: # pragma: no cover logger.info("Unexpected exception {} happened during turing.".format(repr(e))) - finally: + finally: if self.strategy.best_qmodel: logger.info( "Specified timeout or max trials is reached! " - "Found a converted model which meet accuracy goal. Exit.") + "Found a converted model which meet accuracy goal. Exit." + ) self.strategy.deploy_config() - else: # pragma: no cover + else: # pragma: no cover logger.info( "Specified timeout or max trials is reached! " - "Not found any converted model which meet accuracy goal. Exit.") + "Not found any converted model which meet accuracy goal. Exit." + ) - logger.info("Mixed Precision is done. Please invoke model.save() to save " \ - "optimized model to disk.") + logger.info("Mixed Precision is done. Please invoke model.save() to save " "optimized model to disk.") return self.strategy.best_qmodel @@ -207,7 +211,7 @@ def precisions(self, customized_precisions): if isinstance(customized_precisions, list): self._precisions = sorted([i.strip() for i in customized_precisions]) elif isinstance(customized_precisions, str): - self._precisions = sorted([i.strip() for i in customized_precisions.split(',')]) + self._precisions = sorted([i.strip() for i in customized_precisions.split(",")]) self.conf.usr_cfg.mixed_precision.precisions = self._precisions def set_config_by_model(self, model_obj): @@ -216,11 +220,11 @@ def set_config_by_model(self, model_obj): if self._input: self.conf.usr_cfg.model.inputs = self._input if self._output: - if isinstance(self._output, str) and ',' in self._output: - self.conf.usr_cfg.model.outputs = [s.strip() for s in self._output.split(',')] + if isinstance(self._output, str) and "," in self._output: + self.conf.usr_cfg.model.outputs = [s.strip() for s in self._output.split(",")] else: self.conf.usr_cfg.model.outputs = self._output def __repr__(self): """Return 'MixedPrecision'.""" - return 'MixedPrecision' + return "MixedPrecision" diff --git a/neural_compressor/experimental/model_conversion.py b/neural_compressor/experimental/model_conversion.py index ae83df88e00..e77290fb7d5 100644 --- a/neural_compressor/experimental/model_conversion.py +++ b/neural_compressor/experimental/model_conversion.py @@ -14,21 +14,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Helps convert one model format to another.""" -import tempfile import datetime +import tempfile + import yaml + from neural_compressor.adaptor import FRAMEWORKS + from ..conf.config import Conf -from ..conf.dotdict import deep_get, deep_set, DotDict +from ..conf.dotdict import DotDict, deep_get, deep_set +from ..model import BaseModel from ..utils import logger from ..utils.create_obj_from_config import create_dataloader, create_eval_func from .common import Model as NCModel -from ..model import BaseModel -class ModelConversion(): # pragma: no cover + +class ModelConversion: # pragma: no cover """ModelConversion class is used to convert one model format to another. Currently Neural Compressor only supports Quantization-aware training TensorFlow model to Default @@ -45,7 +48,6 @@ class ModelConversion(): # pragma: no cover Args: conf_fname_or_obj (string or obj): Optional. The path to the YAML configuration file or Conf class containing model conversion and evaluation setting if not specifed by code. - """ def __init__(self, conf_fname_or_obj=None): @@ -57,7 +59,7 @@ def __init__(self, conf_fname_or_obj=None): """ self.conf_name = conf_fname_or_obj self._model = None - self.framework = 'tensorflow' + self.framework = "tensorflow" self._eval_dataloader = None self._eval_func = None @@ -73,8 +75,9 @@ def __init__(self, conf_fname_or_obj=None): elif isinstance(conf_fname_or_obj, Conf): self.conf = conf_fname_or_obj else: # pragma: no cover - assert False, \ - "Please pass a YAML configuration file path or \ + assert ( + False + ), "Please pass a YAML configuration file path or \ Conf class to model_conversion" else: self.conf = None @@ -95,41 +98,41 @@ def __call__(self): framework_specific_info = {} cfg = self.conf.usr_cfg framework_specific_info.update( - {'name': cfg.model.name, - 'backend': 'default', - 'format': 'default', - 'device': cfg.device, - 'fake_quant': True, - 'inputs': cfg.model.inputs, - 'outputs': cfg.model.outputs, - 'workspace_path': cfg.tuning.workspace.path}) + { + "name": cfg.model.name, + "backend": "default", + "format": "default", + "device": cfg.device, + "fake_quant": True, + "inputs": cfg.model.inputs, + "outputs": cfg.model.outputs, + "workspace_path": cfg.tuning.workspace.path, + } + ) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) q_model = self.adaptor.convert(self._model, self._source, self._destination) # when eval_func is None but metric or _eval_dataloader is set by yaml or code, # it means Neural Compressor will create the eval_func from these info. - metric_cfg = [self._metric] if self._metric else \ - deep_get(cfg, 'evaluation.accuracy.metric') - postprocess_cfg = deep_get(cfg, 'evaluation.accuracy.postprocess') + metric_cfg = [self._metric] if self._metric else deep_get(cfg, "evaluation.accuracy.metric") + postprocess_cfg = deep_get(cfg, "evaluation.accuracy.postprocess") if self._eval_func is None and metric_cfg: - eval_dataloader_cfg = deep_get(cfg, 'evaluation.accuracy.dataloader') + eval_dataloader_cfg = deep_get(cfg, "evaluation.accuracy.dataloader") if self._eval_dataloader is None and eval_dataloader_cfg: self._eval_dataloader = create_dataloader(self.framework, eval_dataloader_cfg) - assert self._eval_dataloader, 'either "eval_dataloader" property or evaluation' \ - '.accuracy.dataloader field in yaml should be set when metric is set' - - self._eval_func = create_eval_func(self.framework, \ - self.eval_dataloader, \ - self.adaptor, \ - metric_cfg, \ - postprocess_cfg, \ - fp32_baseline = True) + assert self._eval_dataloader, ( + 'either "eval_dataloader" property or evaluation' + ".accuracy.dataloader field in yaml should be set when metric is set" + ) + + self._eval_func = create_eval_func( + self.framework, self.eval_dataloader, self.adaptor, metric_cfg, postprocess_cfg, fp32_baseline=True + ) if self._eval_func: baseline_score = self._eval_func(self._model) qmodel_score = self._eval_func(q_model) - logger.info("The score of Quantization-Aware Training model is {}.". - format(str(baseline_score))) + logger.info("The score of Quantization-Aware Training model is {}.".format(str(baseline_score))) logger.info("Converted model score is {}.".format(str(qmodel_score))) return q_model @@ -137,13 +140,15 @@ def __call__(self): fit = __call__ def _gen_yaml(self): - random_name = '{}'.format(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) - default_yaml_template = {'model': {'framework': self.framework, 'name': random_name}, - 'device': 'cpu', - 'model_conversion': {'source': 'QAT', 'destination': 'default'}} - - temp_yaml_path = tempfile.mkstemp(suffix='.yaml')[1] - with open(temp_yaml_path, 'w', encoding='utf-8') as f: + random_name = "{}".format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + default_yaml_template = { + "model": {"framework": self.framework, "name": random_name}, + "device": "cpu", + "model_conversion": {"source": "QAT", "destination": "default"}, + } + + temp_yaml_path = tempfile.mkstemp(suffix=".yaml")[1] + with open(temp_yaml_path, "w", encoding="utf-8") as f: yaml.dump(default_yaml_template, f) self.conf = Conf(temp_yaml_path) @@ -157,6 +162,7 @@ def dataset(self, dataset_type, *args, **kwargs): class: dataset class """ from .data import Datasets + return Datasets(self.framework)[dataset_type](*args, **kwargs) @property @@ -167,8 +173,9 @@ def source(self): @source.setter def source(self, _source): """Set source.""" - assert _source.lower() == 'qat', 'Model conversion now only supports TensorFlow ' \ - 'QAT model to default quantized model' + assert _source.lower() == "qat", ( + "Model conversion now only supports TensorFlow " "QAT model to default quantized model" + ) self._source = _source.lower() @property @@ -179,8 +186,9 @@ def destination(self): @destination.setter def destination(self, _destination): """Set destination.""" - assert _destination.lower() == 'default', 'Model conversion now only supports ' \ - 'TensorFlow QAT model to default quantized model' + assert _destination.lower() == "default", ( + "Model conversion now only supports " "TensorFlow QAT model to default quantized model" + ) self._destination = _destination.lower() @property @@ -192,9 +200,9 @@ def eval_dataloader(self): def eval_dataloader(self, dataloader): """Set Data loader for evaluation. - It is iterable and the batched data should consists of a tuple like (input, label), - when eval_dataloader is set, user should configure postprocess(optional) and metric - in yaml file or set postprocess and metric cls. Notice evaluation dataloader will be + It is iterable and the batched data should consists of a tuple like (input, label), + when eval_dataloader is set, user should configure postprocess(optional) and metric + in yaml file or set postprocess and metric cls. Notice evaluation dataloader will be used to generate data for model inference, make sure the input data can be feed to model. Args: @@ -211,11 +219,10 @@ def eval_dataloader(self, dataloader): and only after the Quantization object created then framework infomation can be known. Future we will support creating iterable dataloader from neural_compressor.common.DataLoader - """ from .common import _generate_common_dataloader - self._eval_dataloader = _generate_common_dataloader( - dataloader, self.framework) + + self._eval_dataloader = _generate_common_dataloader(dataloader, self.framework) @property def model(self): @@ -235,7 +242,6 @@ def model(self, user_model): set them manually in config yaml file. Another corner case is slim model of tensorflow, be careful of the name of model configured in yaml file, make sure the name is in supported slim model list. - """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") @@ -243,21 +249,20 @@ def model(self, user_model): else: self._model = user_model - assert self.framework == 'tensorflow', \ - 'Model conversion only supports Tensorflow at current stage.' + assert self.framework == "tensorflow", "Model conversion only supports Tensorflow at current stage." if not self.conf: self._gen_yaml() cfg = self.conf.usr_cfg - if self.framework == 'tensorflow': + if self.framework == "tensorflow": self._model.name = cfg.model.name self._model.workspace_path = cfg.tuning.workspace.path @property def metric(self): """Return metric.""" - assert False, 'Should not try to get the value of `metric` attribute.' + assert False, "Should not try to get the value of `metric` attribute." return None @metric.setter @@ -277,30 +282,32 @@ def metric(self, user_metric): neural_compressor.common.Metric, in this method the user_metric.metric_cls will be registered to specific frameworks and initialized. - """ if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"): - logger.warning("Override the value of `metric` field defined in yaml file" \ - " as user defines the value of `metric` attribute by code.") - + logger.warning( + "Override the value of `metric` field defined in yaml file" + " as user defines the value of `metric` attribute by code." + ) + from .common import Metric as NCMetric + if isinstance(user_metric, NCMetric): - metric_cfg = {user_metric.name : {**user_metric.kwargs}} + metric_cfg = {user_metric.name: {**user_metric.kwargs}} deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg) self.conf.usr_cfg = DotDict(self.conf.usr_cfg) from .metric import METRICS + metrics = METRICS(self.framework) metrics.register(user_metric.name, user_metric.metric_cls) else: - for i in ['reset', 'update', 'result']: - assert hasattr(user_metric, i), 'Please realise {} function' \ - 'in user defined metric'.format(i) + for i in ["reset", "update", "result"]: + assert hasattr(user_metric, i), "Please realise {} function" "in user defined metric".format(i) self._metric = user_metric @property def postprocess(self): """Check postprocess.""" - assert False, 'Should not try to get the value of `postprocess` attribute.' + assert False, "Should not try to get the value of `postprocess` attribute." return None @postprocess.setter @@ -312,28 +319,32 @@ def postprocess(self, user_postprocess): user_postprocess.postprocess_cls should be sub_class of neural_compressor.data.BaseTransform. Args: - user_postprocess(neural_compressor.common.Postprocess):user_postprocess should be + user_postprocess(neural_compressor.common.Postprocess):user_postprocess should be object initialized from neural_compressor.common.Postprocess, in this method the user_postprocess.postprocess_cls will be registered to specific frameworks and initialized. - """ from .common import Postprocess as NCPostprocess - assert isinstance(user_postprocess, NCPostprocess), \ - 'please initialize a neural_compressor.common.Postprocess and set....' - postprocess_cfg = {user_postprocess.name : {**user_postprocess.kwargs}} + + assert isinstance( + user_postprocess, NCPostprocess + ), "please initialize a neural_compressor.common.Postprocess and set...." + postprocess_cfg = {user_postprocess.name: {**user_postprocess.kwargs}} if deep_get(self.conf.usr_cfg, "evaluation.accuracy.postprocess"): - logger.warning("Override the value of `postprocess` field defined in yaml file" \ - " as user defines the value of `postprocess` attribute by code.") + logger.warning( + "Override the value of `postprocess` field defined in yaml file" + " as user defines the value of `postprocess` attribute by code." + ) deep_set(self.conf.usr_cfg, "evaluation.accuracy.postprocess.transform", postprocess_cfg) from .data import TRANSFORMS - postprocesses = TRANSFORMS(self.framework, 'postprocess') + + postprocesses = TRANSFORMS(self.framework, "postprocess") postprocesses.register(user_postprocess.name, user_postprocess.postprocess_cls) @property def eval_func(self): """Return eval_func.""" - assert False, 'Should not try to get the value of `eval_func` attribute.' + assert False, "Should not try to get the value of `eval_func` attribute." return None @eval_func.setter @@ -351,4 +362,4 @@ def eval_func(self, user_eval_func): def __repr__(self): """Return representation.""" - return 'ModelConversion' + return "ModelConversion" diff --git a/neural_compressor/experimental/nas/basic_nas.py b/neural_compressor/experimental/nas/basic_nas.py index d9ab7af2ee5..2f73f69e8fb 100644 --- a/neural_compressor/experimental/nas/basic_nas.py +++ b/neural_compressor/experimental/nas/basic_nas.py @@ -19,13 +19,14 @@ import os -from .nas import NASBase -from .nas_utils import nas_registry from neural_compressor.adaptor import FRAMEWORKS from neural_compressor.conf.config import Conf, NASConfig from neural_compressor.experimental.component import Component -from neural_compressor.utils.create_obj_from_config import \ - create_dataloader, create_train_func, create_eval_func +from neural_compressor.utils.create_obj_from_config import create_dataloader, create_eval_func, create_train_func + +from .nas import NASBase +from .nas_utils import nas_registry + @nas_registry("Basic") class BasicNAS(NASBase, Component): @@ -62,8 +63,7 @@ def estimate(self, model): Returns: Evaluated metrics of the model. """ - assert self._train_func is not None and self._eval_func is not None, \ - "train_func and eval_func must be set." + assert self._train_func is not None and self._eval_func is not None, "train_func and eval_func must be set." self._train_func(model) return self._eval_func(model) @@ -72,19 +72,15 @@ def init_by_cfg(self, conf_fname_or_obj): if isinstance(conf_fname_or_obj, str): if os.path.isfile(conf_fname_or_obj): self.conf = Conf(conf_fname_or_obj) - else: # pragma: no cover + else: # pragma: no cover raise FileNotFoundError( - "{} is not a file, please provide a NAS config file path.".format( - conf_fname_or_obj - ) + "{} is not a file, please provide a NAS config file path.".format(conf_fname_or_obj) ) elif isinstance(conf_fname_or_obj, NASConfig): conf_fname_or_obj.validate() self.conf = conf_fname_or_obj - else: # pragma: no cover - raise NotImplementedError( - "Please provide a str path to the config file or an object of NASConfig." - ) + else: # pragma: no cover + raise NotImplementedError("Please provide a str path to the config file or an object of NASConfig.") self._init_with_conf() assert self.cfg.nas is not None, "nas section must be set" # search related config @@ -92,50 +88,53 @@ def init_by_cfg(self, conf_fname_or_obj): def pre_process(self): """Initialize the train and evaluation settings.""" - framework_specific_info = {'device': self.cfg.device, - 'random_seed': self.cfg.tuning.random_seed, - 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None} + framework_specific_info = { + "device": self.cfg.device, + "random_seed": self.cfg.tuning.random_seed, + "workspace_path": self.cfg.tuning.workspace.path, + "q_dataloader": None, + } - if self.framework == 'tensorflow' or self.framework == 'tensorflow_itex': - framework_specific_info.update( - {"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) + if self.framework == "tensorflow" or self.framework == "tensorflow_itex": + framework_specific_info.update({"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) # create dataloaders if self._train_dataloader is None and self._train_func is None: train_dataloader_cfg = self.cfg.train.dataloader - assert train_dataloader_cfg is not None, \ - 'No training dataloader setting in current component. Please check ' \ - 'dataloader field of train field in yaml file. Or manually pass ' \ - 'dataloader to component.' + assert train_dataloader_cfg is not None, ( + "No training dataloader setting in current component. Please check " + "dataloader field of train field in yaml file. Or manually pass " + "dataloader to component." + ) self._train_dataloader = create_dataloader(self.framework, train_dataloader_cfg) if self._eval_dataloader is None and self._eval_func is None: eval_dataloader_cfg = self.cfg.evaluation.accuracy.dataloader - assert eval_dataloader_cfg is not None, \ - 'No evaluation dataloader setting in current component. Please check ' \ - 'dataloader field of evaluation field in yaml file. Or manually pass ' \ - 'dataloader to component.' + assert eval_dataloader_cfg is not None, ( + "No evaluation dataloader setting in current component. Please check " + "dataloader field of evaluation field in yaml file. Or manually pass " + "dataloader to component." + ) self._eval_dataloader = create_dataloader(self.framework, eval_dataloader_cfg) # create functions if self._train_func is None: - self._train_func = create_train_func(self.framework, - self._train_dataloader, - self.adaptor, - self.cfg.train, - hooks=self.hooks) + self._train_func = create_train_func( + self.framework, self._train_dataloader, self.adaptor, self.cfg.train, hooks=self.hooks + ) if self._eval_func is None: metric = [self._metric] if self._metric else self.cfg.evaluation.accuracy.metric - self._eval_func = create_eval_func(self.framework, - self._eval_dataloader, - self.adaptor, - metric, - self.cfg.evaluation.accuracy.postprocess, - fp32_baseline = False) + self._eval_func = create_eval_func( + self.framework, + self._eval_dataloader, + self.adaptor, + metric, + self.cfg.evaluation.accuracy.postprocess, + fp32_baseline=False, + ) def __repr__(self): """Class representation.""" - return 'BasicNAS' # pragma: no cover \ No newline at end of file + return "BasicNAS" # pragma: no cover diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index 60c6610a2a3..5c3bdf71968 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -23,7 +23,7 @@ from .nas import NASBase from .nas_utils import nas_registry -DyNASManager = LazyImport('dynast.dynast_manager.DyNAS') +DyNASManager = LazyImport("dynast.dynast_manager.DyNAS") @nas_registry("DyNAS") @@ -47,7 +47,7 @@ def __init__(self, conf_fname_or_obj): supernet=self.supernet, optimization_metrics=self.metrics, measurements=self.metrics, - search_tactic='linas', + search_tactic="linas", num_evals=self.num_evals, results_path=self.results_csv_path, dataset_path=self.dataset_path, @@ -68,7 +68,7 @@ def search(self): """ return self.dynas_manager.search() - def select_model_arch(self): # pragma: no cover + def select_model_arch(self): # pragma: no cover """Select the model architecture.""" # model_arch_proposition intrinsically contained in # pymoo.minimize API of search_manager.run_search method, @@ -77,7 +77,7 @@ def select_model_arch(self): # pragma: no cover def init_cfg(self, conf_fname_or_obj): """Initialize the configuration.""" - logger.info('init_cfg') + logger.info("init_cfg") if isinstance(conf_fname_or_obj, str): if os.path.isfile(conf_fname_or_obj): self.conf = Conf(conf_fname_or_obj).usr_cfg @@ -85,11 +85,9 @@ def init_cfg(self, conf_fname_or_obj): conf_fname_or_obj.validate() self.conf = conf_fname_or_obj.usr_cfg else: # pragma: no cover - raise NotImplementedError( - "Please provide a str path to the config file or an object of NASConfig." - ) + raise NotImplementedError("Please provide a str path to the config file or an object of NASConfig.") # self.init_search_cfg(self.conf.nas) - assert 'dynas' in self.conf.nas, "Must specify dynas section." + assert "dynas" in self.conf.nas, "Must specify dynas section." dynas_config = self.conf.nas.dynas self.seed = self.conf.nas.search.seed self.search_algo = self.conf.nas.search.search_algorithm @@ -103,8 +101,6 @@ def init_cfg(self, conf_fname_or_obj): self.batch_size = dynas_config.batch_size self.num_workers = dynas_config.num_workers if dynas_config.population < 10: # pragma: no cover - raise NotImplementedError( - "Please specify a population size >= 10" - ) + raise NotImplementedError("Please specify a population size >= 10") else: self.population = dynas_config.population diff --git a/neural_compressor/experimental/nas/nas.py b/neural_compressor/experimental/nas/nas.py index 029333c3fa6..d670e2b22aa 100644 --- a/neural_compressor/experimental/nas/nas.py +++ b/neural_compressor/experimental/nas/nas.py @@ -21,18 +21,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import os import shutil - from collections.abc import Iterable -from .nas_utils import find_pareto_front, NASMethods -from .search_algorithms import BayesianOptimizationSearcher, GridSearcher, RandomSearcher + +import numpy as np + from neural_compressor.conf.config import Conf, NASConfig from neural_compressor.conf.pythonic_config import Config -from neural_compressor.utils.utility import logger, LazyImport +from neural_compressor.utils.utility import LazyImport, logger + +from .nas_utils import NASMethods, find_pareto_front +from .search_algorithms import BayesianOptimizationSearcher, GridSearcher, RandomSearcher -torch = LazyImport('torch') +torch = LazyImport("torch") class NAS(object): @@ -57,18 +59,13 @@ def __new__(self, conf_fname_or_obj, *args, **kwargs): self.conf = NASConfig() self.conf.map_pyconfig_to_cfg(conf_fname_or_obj) else: # pragma: no cover - raise NotImplementedError( - "Please provide a str path to the config file." - ) + raise NotImplementedError("Please provide a str path to the config file.") assert self.conf.usr_cfg.nas is not None, "nas section must be set" - if isinstance(self.conf.usr_cfg.nas.approach, str) and \ - self.conf.usr_cfg.nas.approach.lower() in NASMethods: + if isinstance(self.conf.usr_cfg.nas.approach, str) and self.conf.usr_cfg.nas.approach.lower() in NASMethods: method = self.conf.usr_cfg.nas.approach.lower() else: - logger.warning( - "NAS approach not set in config, use default NAS approach, i.e. Basic." - ) - method = 'basic' + logger.warning("NAS approach not set in config, use default NAS approach, i.e. Basic.") + method = "basic" if isinstance(self.conf, NASConfig): return NASMethods[method](self.conf, *args, **kwargs) return NASMethods[method](conf_fname_or_obj, *args, **kwargs) @@ -100,9 +97,11 @@ def select_model_arch(self): Model architecture description. """ model_arch_paras = self._search_algorithm.suggest() - assert self.search_space_keys and isinstance(model_arch_paras, dict) and \ - self.search_space_keys == list(model_arch_paras.keys()), \ - "Keys of model_arch_paras should be the same with search_space_keys." + assert ( + self.search_space_keys + and isinstance(model_arch_paras, dict) + and self.search_space_keys == list(model_arch_paras.keys()) + ), "Keys of model_arch_paras should be the same with search_space_keys." return model_arch_paras def search(self, res_save_path=None): @@ -111,11 +110,12 @@ def search(self, res_save_path=None): Returns: Best model architecture found in search process. """ - assert self.model_builder is not None, \ - "Must specify model_builder for generating model instance by model architecture." + assert ( + self.model_builder is not None + ), "Must specify model_builder for generating model instance by model architecture." if res_save_path is None or not os.path.isdir(res_save_path): res_save_path = os.getcwd() - save_path = os.path.join(res_save_path, 'NASResults') + save_path = os.path.join(res_save_path, "NASResults") self.model_paras_num = {} self.load_search_results(save_path) os.makedirs(save_path, exist_ok=True) @@ -123,65 +123,39 @@ def search(self, res_save_path=None): for i in range(self.max_trials): logger.info( "{fix} Trial {n} starts, {r} trials to go {fix}".format( - n=i+1, r=self.max_trials-i-1, fix="="*30 + n=i + 1, r=self.max_trials - i - 1, fix="=" * 30 ) ) model_arch_paras = self.select_model_arch() - logger.info( - "Model architecture {} proposed.".format(model_arch_paras)) + logger.info("Model architecture {} proposed.".format(model_arch_paras)) model = self._model_builder(model_arch_paras) model_paras = self.count_model_parameters(model) - logger.info( - "***** Number of model parameters: {:.2f}M *****".format( - model_paras / 10**6) - ) - self.model_paras_num[tuple( - model_arch_paras.values())] = model_paras + logger.info("***** Number of model parameters: {:.2f}M *****".format(model_paras / 10**6)) + self.model_paras_num[tuple(model_arch_paras.values())] = model_paras if tuple(model_arch_paras.values()) in self.search_results: - logger.info( - "Skip evaluated model architecture {}.".format(model_arch_paras)) + logger.info("Skip evaluated model architecture {}.".format(model_arch_paras)) continue if tuple(model_arch_paras.values()) in self.resumed_search_results: - logger.info( - "Find previous results of model architecture: {}.".format( - model_arch_paras) - ) - metrics = self.resumed_search_results[tuple( - model_arch_paras.values())] + logger.info("Find previous results of model architecture: {}.".format(model_arch_paras)) + metrics = self.resumed_search_results[tuple(model_arch_paras.values())] else: - logger.info( - "Assessing model architecture: {}.".format(model_arch_paras)) + logger.info("Assessing model architecture: {}.".format(model_arch_paras)) metrics = self.estimate(model) - logger.info( - "Metrics of model architecture {} is {}.".format( - model_arch_paras, metrics) - ) + logger.info("Metrics of model architecture {} is {}.".format(model_arch_paras, metrics)) self.search_results[tuple(model_arch_paras.values())] = metrics - self._search_algorithm.get_feedback( - sum(self.metrics_conversion(metrics))) - self.dump_search_results( - os.path.join(save_path, 'Trial_{}_results.txt'.format(i+1)) - ) + self._search_algorithm.get_feedback(sum(self.metrics_conversion(metrics))) + self.dump_search_results(os.path.join(save_path, "Trial_{}_results.txt".format(i + 1))) for model_arch_vec in self.resumed_search_results: if model_arch_vec not in self.search_results: - self.search_results[model_arch_vec] = \ - self.resumed_search_results[model_arch_vec] - model = self._model_builder( - self.params_vec2params_dict(model_arch_vec)) - self.model_paras_num[model_arch_vec] = self.count_model_parameters( - model) - self.dump_search_results(os.path.join( - save_path, 'Final_results.txt')) + self.search_results[model_arch_vec] = self.resumed_search_results[model_arch_vec] + model = self._model_builder(self.params_vec2params_dict(model_arch_vec)) + self.model_paras_num[model_arch_vec] = self.count_model_parameters(model) + self.dump_search_results(os.path.join(save_path, "Final_results.txt")) self.find_best_model_archs() - logger.info( - "{fix} Found {n} best model architectures {fix}".format( - n=len(self.best_model_archs), fix="="*30 - ) - ) + logger.info("{fix} Found {n} best model architectures {fix}".format(n=len(self.best_model_archs), fix="=" * 30)) for i, model_arch in enumerate(self.best_model_archs): - logger.info( - "Best model architecture {}: {}".format(i+1, model_arch)) + logger.info("Best model architecture {}: {}".format(i + 1, model_arch)) return self.best_model_archs def estimate(self, model): # pragma: no cover @@ -201,49 +175,40 @@ def count_model_parameters(self, model): if isinstance(model, torch.nn.Module): return sum(p.numel() for p in model.parameters()) else: - raise NotImplementedError( - "Only support torch model now.") # pragma: no cover + raise NotImplementedError("Only support torch model now.") # pragma: no cover def load_search_results(self, path): """Load previous search results if exist.""" self.resumed_search_results = {} - lastest_results_record = os.path.join(path, 'lastest_results.npy') + lastest_results_record = os.path.join(path, "lastest_results.npy") if not os.path.exists(path) or not os.path.exists(lastest_results_record): return - self.resumed_search_results = np.load( - lastest_results_record, allow_pickle=True).item() - os.makedirs(os.path.join(path, 'previous_results'), exist_ok=True) + self.resumed_search_results = np.load(lastest_results_record, allow_pickle=True).item() + os.makedirs(os.path.join(path, "previous_results"), exist_ok=True) for f in os.listdir(path): if os.path.isfile(os.path.join(path, f)): - shutil.move(os.path.join(path, f), os.path.join( - path, 'previous_results', f)) + shutil.move(os.path.join(path, f), os.path.join(path, "previous_results", f)) logger.info("Loaded previous results.") def dump_search_results(self, path): """Save search results.""" - lastest_results_record = os.path.join(os.path.dirname(path), 'lastest_results.npy') + lastest_results_record = os.path.join(os.path.dirname(path), "lastest_results.npy") np.save(lastest_results_record, self.search_results, allow_pickle=True) - write_contents = '=' * 30 + ' All Search Results ' + '=' * 30 + '\n\n' + write_contents = "=" * 30 + " All Search Results " + "=" * 30 + "\n\n" for model_arch_vec in self.search_results: - tmp = ','.join(['{}_{}'.format(k, v) - for k, v in zip(self.search_space_keys, model_arch_vec)]) - write_contents += '{}: {} Paras: {}M\n'.format( - tmp, self.search_results[model_arch_vec], - self.model_paras_num[model_arch_vec] / 10**6 + tmp = ",".join(["{}_{}".format(k, v) for k, v in zip(self.search_space_keys, model_arch_vec)]) + write_contents += "{}: {} Paras: {}M\n".format( + tmp, self.search_results[model_arch_vec], self.model_paras_num[model_arch_vec] / 10**6 ) - write_contents += '\n\n\n' + '=' * 30 + \ - ' Best Search Results ' + '=' * 30 + '\n\n' + write_contents += "\n\n\n" + "=" * 30 + " Best Search Results " + "=" * 30 + "\n\n" self.find_best_model_archs() for i, model_arch in enumerate(self.best_model_archs): model_arch_vec = tuple(model_arch.values()) - tmp = ','.join(['{}_{}'.format(k, v) - for k, v in zip(self.search_space_keys, model_arch_vec)]) - write_contents += \ - '{}. {}: {} Paras: {}M\n'.format( - i+1, tmp, self.search_results[model_arch_vec], - self.model_paras_num[model_arch_vec] / 10**6 - ) - with open(path, mode='w') as f: + tmp = ",".join(["{}_{}".format(k, v) for k, v in zip(self.search_space_keys, model_arch_vec)]) + write_contents += "{}. {}: {} Paras: {}M\n".format( + i + 1, tmp, self.search_results[model_arch_vec], self.model_paras_num[model_arch_vec] / 10**6 + ) + with open(path, mode="w") as f: f.write(write_contents) def params_vec2params_dict(self, paras_vec): @@ -254,8 +219,9 @@ def params_vec2params_dict(self, paras_vec): Returns: Parameters dictionary defining the model architecture. """ - assert len(paras_vec) == len(self.search_space_keys), \ - "Length of paras_vec and search_space_keys should be the same." + assert len(paras_vec) == len( + self.search_space_keys + ), "Length of paras_vec and search_space_keys should be the same." return {k: v for k, v in zip(self.search_space_keys, paras_vec)} def find_best_model_archs(self): @@ -265,11 +231,9 @@ def find_best_model_archs(self): """ assert len(self.search_results) > 0, "Zero result in search_results." model_arches = list(self.search_results.keys()) - metrics = [self.metrics_conversion( - self.search_results[ma]) for ma in model_arches] + metrics = [self.metrics_conversion(self.search_results[ma]) for ma in model_arches] pareto_front_indices = find_pareto_front(metrics) - self.best_model_archs = [self.params_vec2params_dict(model_arches[i]) - for i in pareto_front_indices] + self.best_model_archs = [self.params_vec2params_dict(model_arches[i]) for i in pareto_front_indices] def metrics_conversion(self, metrics): """Convert the metrics to specific format. @@ -282,15 +246,21 @@ def metrics_conversion(self, metrics): if isinstance(metrics, dict): if self.metrics is None: self.metrics = list(metrics.keys()) - assert list(metrics.keys()) == list(self.metrics), \ - "Keys of metrics not match with metrics in the configuration." + assert list(metrics.keys()) == list( + self.metrics + ), "Keys of metrics not match with metrics in the configuration." metrics = list(metrics.values()) if self.higher_is_better is None: - self.higher_is_better = [True, ] * len(metrics) - logger.warning("higher_is_better not set in the configuration, " + - "set it to all True for every metric entry by default.") - converted_metrics = [metric if higher_is_better else -metric - for metric, higher_is_better in zip(metrics, self.higher_is_better)] + self.higher_is_better = [ + True, + ] * len(metrics) + logger.warning( + "higher_is_better not set in the configuration, " + + "set it to all True for every metric entry by default." + ) + converted_metrics = [ + metric if higher_is_better else -metric for metric, higher_is_better in zip(metrics, self.higher_is_better) + ] return converted_metrics def init_search_cfg(self, config): @@ -302,42 +272,35 @@ def init_search_cfg(self, config): else: logger.warning( "Use user provided search space {}, instead of search space " - "defined in the config, i.e. {}.".format( - self._search_space, self.search_cfg.search_space - ) + "defined in the config, i.e. {}.".format(self._search_space, self.search_cfg.search_space) ) - assert isinstance(self._search_space, dict) and len(self._search_space) > 0, \ - "Must provide a dict as search_space for NAS." + assert ( + isinstance(self._search_space, dict) and len(self._search_space) > 0 + ), "Must provide a dict as search_space for NAS." self.search_space_keys = sorted(self.search_space.keys()) for k in self.search_space_keys: - assert isinstance(self.search_space[k], (list, tuple)), \ - "Value of key \'{}\' must be a list or tuple".format(k) + assert isinstance(self.search_space[k], (list, tuple)), "Value of key '{}' must be a list or tuple".format( + k + ) - self.metrics = self.search_cfg.metrics \ - if self.search_cfg.metrics else None - self.higher_is_better = self.search_cfg.higher_is_better \ - if self.search_cfg.higher_is_better else None + self.metrics = self.search_cfg.metrics if self.search_cfg.metrics else None + self.higher_is_better = self.search_cfg.higher_is_better if self.search_cfg.higher_is_better else None self.seed = self.search_cfg.seed - self.max_trials = self.search_cfg.max_trials \ - if self.search_cfg.max_trials is not None else 3 # set default 3 for max_trials - self.search_algorithm_type = self.search_cfg.search_algorithm \ - if self.search_cfg.search_algorithm else None + self.max_trials = ( + self.search_cfg.max_trials if self.search_cfg.max_trials is not None else 3 + ) # set default 3 for max_trials + self.search_algorithm_type = self.search_cfg.search_algorithm if self.search_cfg.search_algorithm else None if not self.search_algorithm_type: - self._search_algorithm = BayesianOptimizationSearcher( - self.search_space, self.seed) - elif self.search_algorithm_type.lower() == 'grid': + self._search_algorithm = BayesianOptimizationSearcher(self.search_space, self.seed) + elif self.search_algorithm_type.lower() == "grid": self._search_algorithm = GridSearcher(self.search_space) - elif self.search_algorithm_type.lower() == 'random': - self._search_algorithm = RandomSearcher( - self.search_space, self.seed) - elif self.search_algorithm_type.lower() == 'bo': - self._search_algorithm = BayesianOptimizationSearcher( - self.search_space, self.seed) + elif self.search_algorithm_type.lower() == "random": + self._search_algorithm = RandomSearcher(self.search_space, self.seed) + elif self.search_algorithm_type.lower() == "bo": + self._search_algorithm = BayesianOptimizationSearcher(self.search_space, self.seed) else: # pragma: no cover logger.warning( - 'Please be aware that \'{}\' is not a built-in search algorithm.'.format( - self.search_algorithm_type - ) + "Please be aware that '{}' is not a built-in search algorithm.".format(self.search_algorithm_type) ) @property @@ -384,4 +347,4 @@ def model_builder(self, model_builder): def __repr__(self): """Class representation.""" - return 'Base Class of NAS' # pragma: no cover + return "Base Class of NAS" # pragma: no cover diff --git a/neural_compressor/experimental/nas/nas_utils.py b/neural_compressor/experimental/nas/nas_utils.py index 72fe884c38b..ec7610384b5 100644 --- a/neural_compressor/experimental/nas/nas_utils.py +++ b/neural_compressor/experimental/nas/nas_utils.py @@ -19,13 +19,12 @@ import numpy as np - NASMethods = {} def nas_registry(nas_method): """Decorate the NAS subclasses. - + The class decorator used to register all NAS subclasses. Args: @@ -39,6 +38,7 @@ def nas_registry(nas_method): def decorator(cls): NASMethods[nas_method.lower()] = cls return cls + return decorator @@ -58,7 +58,7 @@ def create_search_space_pool(search_space, idx=0): key = search_space_keys[idx] search_space_pool = [] for v in search_space[key]: - sub_search_space_pool = create_search_space_pool(search_space, idx+1) + sub_search_space_pool = create_search_space_pool(search_space, idx + 1) search_space_pool += [[v] + item for item in sub_search_space_pool] return search_space_pool @@ -82,5 +82,5 @@ def find_pareto_front(metrics): # Remove points being dominated by current point pareto_front_point_indices = pareto_front_point_indices[nondominated_points] metrics = metrics[nondominated_points] - next_point_idx = np.sum(nondominated_points[:next_point_idx+1]) + next_point_idx = np.sum(nondominated_points[: next_point_idx + 1]) return pareto_front_point_indices diff --git a/neural_compressor/experimental/nas/search_algorithms.py b/neural_compressor/experimental/nas/search_algorithms.py index 72ef8a0c9c2..cfe07724e22 100644 --- a/neural_compressor/experimental/nas/search_algorithms.py +++ b/neural_compressor/experimental/nas/search_algorithms.py @@ -18,10 +18,12 @@ # limitations under the License. import random -from .nas_utils import create_search_space_pool + from neural_compressor.strategy.bayesian import BayesianOptimization from neural_compressor.utils import logger +from .nas_utils import create_search_space_pool + class Searcher(object): """Base class for defining the common methods of different search algorithms. @@ -32,18 +34,17 @@ class Searcher(object): def __init__(self, search_space) -> None: """Initialize the attributes.""" - assert isinstance(search_space, dict) and search_space, \ - "Expect search_space to be a dict." + assert isinstance(search_space, dict) and search_space, "Expect search_space to be a dict." self.search_space = search_space self.search_space_keys = sorted(search_space.keys()) for k in self.search_space_keys: - assert isinstance(self.search_space[k], (list, tuple)), \ - "Value of key \'{}\' must be a list or tuple to specify choices".format( - k) + assert isinstance( + self.search_space[k], (list, tuple) + ), "Value of key '{}' must be a list or tuple to specify choices".format(k) def suggest(self): """Suggest the model architecture.""" - raise NotImplementedError('Depends on specific search algorithm.') # pragma: no cover + raise NotImplementedError("Depends on specific search algorithm.") # pragma: no cover def get_feedback(self, metric): """Get metric feedback for the search algorithm.""" @@ -57,8 +58,9 @@ def params_vec2params_dict(self, para_vec): Returns: Parameters dictionary defining the model architecture. """ - assert len(para_vec) == len(self.search_space_keys), \ - "Length of para_vec and search_space_keys should be the same." + assert len(para_vec) == len( + self.search_space_keys + ), "Length of para_vec and search_space_keys should be the same." return {k: para_vec[i] for i, k in enumerate(self.search_space_keys)} @@ -130,10 +132,8 @@ class BayesianOptimizationSearcher(Searcher): def __init__(self, search_space, seed=42) -> None: """Initialize the attributes.""" super(BayesianOptimizationSearcher, self).__init__(search_space) - idx_search_space = { - k: (0, len(search_space[k])-1) for k in self.search_space_keys} - self.bo_agent = BayesianOptimization( - idx_search_space, random_seed=seed) + idx_search_space = {k: (0, len(search_space[k]) - 1) for k in self.search_space_keys} + self.bo_agent = BayesianOptimization(idx_search_space, random_seed=seed) self.last_param_indices = None def suggest(self): @@ -148,8 +148,9 @@ def suggest(self): def get_feedback(self, metric): """Get metric feedback and register this metric.""" - assert self.last_param_indices is not None, "Need run suggest first " + \ - "to get parameters and the input metric is corresponding to this parameters." + assert self.last_param_indices is not None, ( + "Need run suggest first " + "to get parameters and the input metric is corresponding to this parameters." + ) try: self.bo_agent._space.register(self.last_param_indices, metric) except KeyError: # pragma: no cover @@ -162,6 +163,6 @@ def indices2params_vec(self, indices): res = [] for key, ind in indices.items(): # keep ind within the index range of self.search_space[key] - ind = int(min(max(round(ind), 0), len(self.search_space[key])-1)) + ind = int(min(max(round(ind), 0), len(self.search_space[key]) - 1)) res.append(self.search_space[key][ind]) return res diff --git a/neural_compressor/experimental/pruner_legacy/__init__.py b/neural_compressor/experimental/pruner_legacy/__init__.py index acd42f3f451..87f63033d12 100644 --- a/neural_compressor/experimental/pruner_legacy/__init__.py +++ b/neural_compressor/experimental/pruner_legacy/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Legacy pruner module.""" from os.path import dirname, basename, isfile, join @@ -24,7 +23,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) __all__ = ["PRUNERS"] diff --git a/neural_compressor/experimental/pruner_legacy/gradient_sensitivity.py b/neural_compressor/experimental/pruner_legacy/gradient_sensitivity.py index d986b9a7ce7..2c535261290 100644 --- a/neural_compressor/experimental/pruner_legacy/gradient_sensitivity.py +++ b/neural_compressor/experimental/pruner_legacy/gradient_sensitivity.py @@ -14,14 +14,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Gradient sensitivity pruner.""" +import re +from heapq import heappop, heappush + import numpy as np -from .pruner import pruner_registry, Pruner -from heapq import heappush, heappop + from neural_compressor.utils import logger -import re + +from .pruner import Pruner, pruner_registry + @pruner_registry class GradientSensitivityPruner(Pruner): @@ -47,8 +50,7 @@ def on_epoch_begin(self, epoch): self.model.register_forward_pre_hook() if self.elementwise_prune: self.sparsity = self.update_sparsity(epoch) - logger.debug("Start pruning in epoch {} with sparsity {}.". - format(str(epoch), str(self.sparsity))) + logger.debug("Start pruning in epoch {} with sparsity {}.".format(str(epoch), str(self.sparsity))) self.is_last_epoch = epoch == self.end_epoch if epoch >= self.start_epoch and epoch <= self.end_epoch: self.compute_mask() @@ -58,9 +60,9 @@ def on_step_begin(self, batch_id): if self.elementwise_prune: for weight_name in self.weights: if weight_name in self.masks: - new_weight = self.masks[weight_name].reshape(\ - np.array(self.model.get_weight(weight_name).shape)) * \ - np.array(self.model.get_weight(weight_name)) + new_weight = self.masks[weight_name].reshape( + np.array(self.model.get_weight(weight_name).shape) + ) * np.array(self.model.get_weight(weight_name)) self.model.update_weights(weight_name, new_weight) def on_epoch_end(self): @@ -71,22 +73,20 @@ def on_epoch_end(self): if weight_name in self.masks: logger.info( "Set {} sparsity with mask {} {} {}.".format( - weight_name, str( - self.masks[weight_name].size), str( - self.masks[weight_name].sum()), str( - 1 - self.masks[weight_name].sum() / - self.masks[weight_name].size))) - new_weight = self.masks[weight_name].reshape(\ - np.array(self.model.get_weight(weight_name).shape)) * \ - np.array(self.model.get_weight(weight_name)) + weight_name, + str(self.masks[weight_name].size), + str(self.masks[weight_name].sum()), + str(1 - self.masks[weight_name].sum() / self.masks[weight_name].size), + ) + ) + new_weight = self.masks[weight_name].reshape( + np.array(self.model.get_weight(weight_name).shape) + ) * np.array(self.model.get_weight(weight_name)) self.model.update_weights(weight_name, new_weight) else: for weight_name_raw in self.weights: for weight_name in self.parse_weight_name(weight_name_raw): - self.prune_weight(self.model, - self.importance, - weight_name, - self.parameters) + self.prune_weight(self.model, self.importance, weight_name, self.parameters) if self.is_last_epoch: # remove hooks for FWK model to ensure model saving self.model.remove_hooks() @@ -94,50 +94,42 @@ def on_epoch_end(self): def parse_weight_name(self, weight_name_pattern): """Parse weight name.""" # check if asterisk is used to match bert layer indexes - if '*' not in weight_name_pattern: + if "*" not in weight_name_pattern: yield weight_name_pattern else: weight_all_names = self.model.get_all_weight_names() - importance_inputs = self.parameters['importance_inputs'] + importance_inputs = self.parameters["importance_inputs"] for single_weight_name in weight_all_names: - index_group = re.match( - weight_name_pattern.replace('*', '(\d+)'), single_weight_name) + index_group = re.match(weight_name_pattern.replace("*", "(\d+)"), single_weight_name) if index_group is not None: index = index_group.group(1) if self.parameters.get(index) is None: - self.parameters['index'] = int(index) + self.parameters["index"] = int(index) # dynamic change importance_inputs with matched index - self.parameters['importance_inputs'] = [ - x.replace('*', index) for x in self.parameters['importance_inputs']] + self.parameters["importance_inputs"] = [ + x.replace("*", index) for x in self.parameters["importance_inputs"] + ] yield single_weight_name # change importance_inputs back - self.parameters['importance_inputs'] = importance_inputs + self.parameters["importance_inputs"] = importance_inputs def on_step_end(self): """Update importance tensor.""" if self.elementwise_prune: for weight_name in self.weights: - self.update_importance_elementwise(self.model, - self.importance, - weight_name) + self.update_importance_elementwise(self.model, self.importance, weight_name) if weight_name in self.masks: - new_weight = self.masks[weight_name].reshape(\ - np.array(self.model.get_weight(weight_name).shape)) * \ - np.array(self.model.get_weight(weight_name)) + new_weight = self.masks[weight_name].reshape( + np.array(self.model.get_weight(weight_name).shape) + ) * np.array(self.model.get_weight(weight_name)) self.model.update_weights(weight_name, new_weight) else: for weight_name_raw in self.weights: for weight_name in self.parse_weight_name(weight_name_raw): - if self.parameters['importance_metric'] == 'abs_gradient': - self.update_importance_abs(self.model, - self.importance, - weight_name, - self.parameters) - elif self.parameters['importance_metric'] == 'weighted_gradient': - self.update_importance_weighted(self.model, - self.importance, - weight_name, - self.parameters) + if self.parameters["importance_metric"] == "abs_gradient": + self.update_importance_abs(self.model, self.importance, weight_name, self.parameters) + elif self.parameters["importance_metric"] == "weighted_gradient": + self.update_importance_weighted(self.model, self.importance, weight_name, self.parameters) def compute_mask(self): """Compute masks according to absolute values.""" @@ -161,21 +153,17 @@ def compute_mask(self): def prune_weight(self, model, importance, weight_name, parameters): """Prune the specified weight by importance.""" - if parameters['normalize']: + if parameters["normalize"]: exponent = 2 - norm_by_layer = np.power( - np.power(importance[weight_name], exponent).sum(-1), 1 / exponent) + norm_by_layer = np.power(np.power(importance[weight_name], exponent).sum(-1), 1 / exponent) importance[weight_name] /= np.expand_dims(norm_by_layer, -1) + 1e-20 importance = importance[weight_name] weight_tensor = np.array(model.get_weight(weight_name)) - if parameters['transpose']: + if parameters["transpose"]: weight_tensor = weight_tensor.transpose((1, 0)) - weight_tensor = self.prune_by_importance(weight_tensor, - importance, - parameters['target'], - parameters['stride']) - if parameters['transpose']: + weight_tensor = self.prune_by_importance(weight_tensor, importance, parameters["target"], parameters["stride"]) + if parameters["transpose"]: weight_tensor = weight_tensor.transpose((1, 0)) model.update_weights(weight_name, weight_tensor) @@ -183,25 +171,25 @@ def prune_weight(self, model, importance, weight_name, parameters): def update_importance_elementwise(self, model, importance, weight_name): """Update importance tensor elementwisely.""" if importance.get(weight_name) is not None: - importance[weight_name] += np.absolute( - np.array(np.array(model.get_gradient(weight_name)) * np.array(model.get_weight(weight_name)))) + importance[weight_name] += np.absolute( + np.array(np.array(model.get_gradient(weight_name)) * np.array(model.get_weight(weight_name))) + ) else: importance[weight_name] = np.absolute( - np.array(model.get_gradient(weight_name) * np.array(model.get_weight(weight_name)))) + np.array(model.get_gradient(weight_name) * np.array(model.get_weight(weight_name))) + ) def update_importance_abs(self, model, importance, weight_name, parameters): """Update importance tensor with absolute gradient.""" - head_mask = model.get_inputs( - input_name=parameters['importance_inputs'][0]) + head_mask = model.get_inputs(input_name=parameters["importance_inputs"][0]) if importance.get(weight_name) is not None: - importance[weight_name] += np.absolute( - np.array(model.get_gradient(head_mask)))[parameters['index']] + importance[weight_name] += np.absolute(np.array(model.get_gradient(head_mask)))[parameters["index"]] else: - importance[weight_name] = np.absolute( - np.array(model.get_gradient(head_mask)))[parameters['index']] + importance[weight_name] = np.absolute(np.array(model.get_gradient(head_mask)))[parameters["index"]] def update_importance_weighted(self, model, importance, weight_name, parameters): """Update importance tensor with weighted gradient.""" + def weighted_grad(input_weight): """Compute weighted gradient.""" weight_grad = np.array(model.get_gradient(input_weight)) @@ -212,8 +200,7 @@ def weighted_grad(input_weight): weighted_grad = weighted_grad.sum(1) return weighted_grad - accumulated_grad = sum([weighted_grad(input_weight) for input_weight \ - in parameters['importance_inputs']]) + accumulated_grad = sum([weighted_grad(input_weight) for input_weight in parameters["importance_inputs"]]) if importance.get(weight_name) is not None: importance[weight_name] += np.absolute(accumulated_grad) @@ -234,11 +221,11 @@ def prune_by_importance(self, tensor, importance, num_instances, stride): head_to_add = heappop(importance_ordered)[1] if sorted_tensor_to_concat is None: sorted_tensor_to_concat = ( - tensor[int(head_to_add * stride): int(head_to_add * stride) + - int(stride), ...], ) + tensor[int(head_to_add * stride) : int(head_to_add * stride) + int(stride), ...], + ) else: sorted_tensor_to_concat += ( - tensor[int(head_to_add * stride): int(head_to_add * stride) + - int(stride), ...], ) + tensor[int(head_to_add * stride) : int(head_to_add * stride) + int(stride), ...], + ) i += 1 return np.concatenate(sorted_tensor_to_concat) diff --git a/neural_compressor/experimental/pruner_legacy/group_lasso.py b/neural_compressor/experimental/pruner_legacy/group_lasso.py index 3d5dfd21a37..5508be46611 100644 --- a/neural_compressor/experimental/pruner_legacy/group_lasso.py +++ b/neural_compressor/experimental/pruner_legacy/group_lasso.py @@ -14,16 +14,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Group Lasso pruner.""" -import re import copy +import re + import numpy as np -from .pruner import pruner_registry, Pruner -from .magnitude import BasicMagnitudePruner + from neural_compressor.utils import logger +from .magnitude import BasicMagnitudePruner +from .pruner import Pruner, pruner_registry + + @pruner_registry class GroupLassoPruner(BasicMagnitudePruner): """Group Lasso pruner class. @@ -39,7 +42,7 @@ def __init__(self, model, local_config, global_config): super(GroupLassoPruner, self).__init__(model, local_config, global_config) self.cur_weights = copy.deepcopy(self.weights) self.is_masks_set = False - self.alpha = local_config.parameters['alpha'] + self.alpha = local_config.parameters["alpha"] def on_before_optimizer_step(self): """Update gradient to prune the weights by back propagation.""" @@ -48,7 +51,7 @@ def on_before_optimizer_step(self): weight_grad = self.model.get_gradient(weight_name) weight = np.array(self.model.get_weight(weight_name)) reshaped_weight = self.pattern.reshape(weight) - coeff = self.alpha / np.linalg.norm(reshaped_weight, 2, axis=(1,3)) + coeff = self.alpha / np.linalg.norm(reshaped_weight, 2, axis=(1, 3)) coeff[np.isinf(coeff)] = 0 coeff = self.pattern.repeat_mask(coeff).reshape(weight.shape) weight_grad += coeff * weight diff --git a/neural_compressor/experimental/pruner_legacy/magnitude.py b/neural_compressor/experimental/pruner_legacy/magnitude.py index 38408e1ea1f..3910cfb359b 100644 --- a/neural_compressor/experimental/pruner_legacy/magnitude.py +++ b/neural_compressor/experimental/pruner_legacy/magnitude.py @@ -14,13 +14,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Magnitude pruner.""" import numpy as np -from .pruner import pruner_registry, Pruner + from neural_compressor.utils import logger +from .pruner import Pruner, pruner_registry + + @pruner_registry class BasicMagnitudePruner(Pruner): """Magnitude pruner class. @@ -38,8 +40,7 @@ def __init__(self, model, local_config, global_config): def on_epoch_begin(self, epoch): """Update target sparsity according to the schedule and compute mask accordingly.""" self.sparsity = self.update_sparsity(epoch) - logger.debug("Start pruning in epoch {} with sparsity {}.". - format(str(epoch), str(self.sparsity))) + logger.debug("Start pruning in epoch {} with sparsity {}.".format(str(epoch), str(self.sparsity))) self.is_last_epoch = epoch == self.end_epoch if epoch >= self.start_epoch and epoch <= self.end_epoch: self.compute_mask() @@ -50,8 +51,7 @@ def on_step_begin(self, batch_id): for weight in self.weights: if weight in self.masks: - new_weight = self.masks[weight] * \ - np.array(self.model.get_weight(weight)) + new_weight = self.masks[weight] * np.array(self.model.get_weight(weight)) self.model.update_weights(weight, new_weight) res[weight] = new_weight return res @@ -79,17 +79,17 @@ def on_epoch_end(self): """Sparsity ratio summary and apply mask to the weight.""" res = dict() if self.is_last_epoch: - for weight in self.weights: if weight in self.masks: logger.info( "Set {} sparsity with mask {} {} {}.".format( - weight, str( - self.masks[weight].size), str( - self.masks[weight].sum()), str( - 1 - self.masks[weight].sum() / self.masks[weight].size))) - new_weight = self.masks[weight] * \ - np.array(self.model.get_weight(weight)) + weight, + str(self.masks[weight].size), + str(self.masks[weight].sum()), + str(1 - self.masks[weight].sum() / self.masks[weight].size), + ) + ) + new_weight = self.masks[weight] * np.array(self.model.get_weight(weight)) self.model.update_weights(weight, new_weight) res[weight] = new_weight return res @@ -99,8 +99,7 @@ def on_step_end(self): res = dict() for weight in self.weights: if weight in self.masks: - new_weight = self.masks[weight] * \ - np.array(self.model.get_weight(weight)) + new_weight = self.masks[weight] * np.array(self.model.get_weight(weight)) self.model.update_weights(weight, new_weight) res[weight] = new_weight return res diff --git a/neural_compressor/experimental/pruner_legacy/pattern_lock.py b/neural_compressor/experimental/pruner_legacy/pattern_lock.py index 6c4d1428fec..c91c606ec29 100644 --- a/neural_compressor/experimental/pruner_legacy/pattern_lock.py +++ b/neural_compressor/experimental/pruner_legacy/pattern_lock.py @@ -14,10 +14,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Pattern lock pruner.""" -from .pruner import pruner_registry, Pruner +from .pruner import Pruner, pruner_registry @pruner_registry @@ -56,7 +55,7 @@ def compute_mask(self): for weight in self.weights: tensor = self.model.get_weight(weight) if len(tensor.shape) in self.tensor_dims: - self.masks[weight] = tensor == 0. + self.masks[weight] = tensor == 0.0 def update_weights(self): """Update weights according to the masks.""" diff --git a/neural_compressor/experimental/pruner_legacy/pruner.py b/neural_compressor/experimental/pruner_legacy/pruner.py index 3af3a395dba..974834272d4 100644 --- a/neural_compressor/experimental/pruner_legacy/pruner.py +++ b/neural_compressor/experimental/pruner_legacy/pruner.py @@ -14,13 +14,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Pattern lock pruner.""" from neural_compressor.experimental.pruning_recipes.patterns import patterns PRUNERS = {} + def pruner_registry(cls): """The class decorator used to register all Pruners subclasses. @@ -30,14 +30,13 @@ def pruner_registry(cls): Returns: cls: The class of register. """ - assert cls.__name__.endswith( - 'Pruner' - ), "The name of subclass of Pruner should end with \'Pruner\' substring." - if cls.__name__[:-len('Pruner')].lower() in PRUNERS: - raise ValueError('Cannot have two pruner with the same name') - PRUNERS[cls.__name__[:-len('Pruner')]] = cls + assert cls.__name__.endswith("Pruner"), "The name of subclass of Pruner should end with 'Pruner' substring." + if cls.__name__[: -len("Pruner")].lower() in PRUNERS: + raise ValueError("Cannot have two pruner with the same name") + PRUNERS[cls.__name__[: -len("Pruner")]] = cls return cls + class Pruner: """The base clase of Pruner. @@ -50,7 +49,7 @@ class Pruner: def __init__(self, model, local_config, global_config): """Initialize the attributes.""" self.model = model - #2 for linear weight, 4 for conv weight + # 2 for linear weight, 4 for conv weight self.tensor_dims = [2, 4] if local_config.method is not None: @@ -86,10 +85,10 @@ def __init__(self, model, local_config, global_config): self.is_last_epoch = False # TBD, add pattern in config - if hasattr(local_config, 'pattern'): + if hasattr(local_config, "pattern"): self.pattern = patterns[local_config.pattern]() else: - self.pattern = patterns['tile_pattern_1x1']() + self.pattern = patterns["tile_pattern_1x1"]() self.masks = {} def on_epoch_begin(self, epoch): @@ -134,5 +133,5 @@ def update_sparsity(self, epoch): if self.start_epoch == self.end_epoch or epoch > self.end_epoch: return self.target_sparsity return self.initial_sparsity + (self.target_sparsity - self.initial_sparsity) * ( - (epoch - self.start_epoch + 1) // self.freq) * self.freq / \ - (self.end_epoch - self.start_epoch + 1) + (epoch - self.start_epoch + 1) // self.freq + ) * self.freq / (self.end_epoch - self.start_epoch + 1) diff --git a/neural_compressor/experimental/pruner_legacy/util/block_mask.py b/neural_compressor/experimental/pruner_legacy/util/block_mask.py index a92e744c398..9a11d608175 100644 --- a/neural_compressor/experimental/pruner_legacy/util/block_mask.py +++ b/neural_compressor/experimental/pruner_legacy/util/block_mask.py @@ -14,5 +14,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""Block mask.""" \ No newline at end of file +"""Block mask.""" diff --git a/neural_compressor/experimental/pruning.py b/neural_compressor/experimental/pruning.py index d01f4c6d1be..7503ded2c01 100644 --- a/neural_compressor/experimental/pruning.py +++ b/neural_compressor/experimental/pruning.py @@ -1,4 +1,4 @@ -"""pruning module.""" +"""Pruning module.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -16,17 +16,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .component import Component -from .pruner_legacy import PRUNERS -from ..utils import logger -from ..utils.utility import GLOBAL_STATE, MODE -from ..utils.create_obj_from_config import create_dataloader, create_train_func, create_eval_func -from ..model import BaseModel +from deprecated import deprecated + from ..adaptor import FRAMEWORKS from ..conf.config import PruningConf from ..conf.pythonic_config import Config - -from deprecated import deprecated +from ..model import BaseModel +from ..utils import logger +from ..utils.create_obj_from_config import create_dataloader, create_eval_func, create_train_func +from ..utils.utility import GLOBAL_STATE, MODE +from .component import Component +from .pruner_legacy import PRUNERS class Pruning(Component): @@ -45,7 +45,6 @@ class Pruning(Component): Attributes: conf: A config dict object. Contains pruning setting parameters. pruners: A list of Pruner object. - """ def __init__(self, conf_fname_or_obj=None): @@ -66,7 +65,7 @@ def __init__(self, conf_fname_or_obj=None): def update_items_for_all_pruners(self, **kwargs): """Functions which add User-defined arguments to the original configurations. - The original config of pruning is read from a file. + The original config of pruning is read from a file. However, users can still modify configurations by passing key-value arguments in this function. Please note that the key-value arguments' keys are analysable in current configuration. """ @@ -142,19 +141,20 @@ def prepare(self): def pre_process(self): """Functions called before pruning begins, usually set up pruners.""" - assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for pruning....' + assert isinstance(self._model, BaseModel), "need set neural_compressor Model for pruning...." GLOBAL_STATE.STATE = MODE.PRUNING - framework_specific_info = {'device': self.cfg.device, - 'random_seed': self.cfg.tuning.random_seed, - 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None, - 'format': 'default', - 'backend': 'default'} - - if self.framework == 'tensorflow': - framework_specific_info.update( - {"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) + framework_specific_info = { + "device": self.cfg.device, + "random_seed": self.cfg.tuning.random_seed, + "workspace_path": self.cfg.tuning.workspace.path, + "q_dataloader": None, + "format": "default", + "backend": "default", + } + + if self.framework == "tensorflow": + framework_specific_info.update({"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) @@ -162,44 +162,53 @@ def pre_process(self): if self._train_dataloader is None and self._train_func is None: train_dataloader_cfg = self.cfg.pruning.train.dataloader - assert train_dataloader_cfg is not None, \ - 'dataloader field of train field of pruning section ' \ - 'in yaml file should be configured as train_dataloader property is NOT set!' + assert train_dataloader_cfg is not None, ( + "dataloader field of train field of pruning section " + "in yaml file should be configured as train_dataloader property is NOT set!" + ) train_dataloader_cfg.distributed = self.train_distributed self._train_dataloader = create_dataloader(self.framework, train_dataloader_cfg) if self._eval_dataloader is None and self._eval_func is None: eval_dataloader_cfg = self.cfg.evaluation.accuracy.dataloader - assert eval_dataloader_cfg is not None, \ - 'dataloader field of evaluation ' \ - 'in yaml file should be configured as eval_dataloader property is NOT set!' + assert eval_dataloader_cfg is not None, ( + "dataloader field of evaluation " + "in yaml file should be configured as eval_dataloader property is NOT set!" + ) eval_dataloader_cfg.distributed = self.evaluation_distributed self._eval_dataloader = create_dataloader(self.framework, eval_dataloader_cfg) if self._train_func is None: # train section of pruning section in yaml file should be configured. train_cfg = self.cfg.pruning.train - assert train_cfg, "train field of pruning section in yaml file must " \ - "be configured for pruning if pruning_func is NOT set." - self._train_func = create_train_func(self.framework, \ - self.train_dataloader, \ - self.adaptor, \ - train_cfg, \ - hooks=self.hooks, \ - callbacks=self.callbacks) + assert train_cfg, ( + "train field of pruning section in yaml file must " + "be configured for pruning if pruning_func is NOT set." + ) + self._train_func = create_train_func( + self.framework, + self.train_dataloader, + self.adaptor, + train_cfg, + hooks=self.hooks, + callbacks=self.callbacks, + ) if self._eval_func is None: # eval section in yaml file should be configured. eval_cfg = self.cfg.evaluation - assert eval_cfg, "eval field of pruning section in yaml file must " \ - "be configured for pruning if eval_func is NOT set." - self._eval_func = create_eval_func(self.framework, \ - self.eval_dataloader, \ - self.adaptor, \ - eval_cfg.accuracy.metric, \ - eval_cfg.accuracy.postprocess, \ - fp32_baseline = False) - if getattr(self.train_dataloader, 'distributed', False): - self.register_hook('on_train_begin', self.adaptor._pre_hook_for_hvd) + assert eval_cfg, ( + "eval field of pruning section in yaml file must " "be configured for pruning if eval_func is NOT set." + ) + self._eval_func = create_eval_func( + self.framework, + self.eval_dataloader, + self.adaptor, + eval_cfg.accuracy.metric, + eval_cfg.accuracy.postprocess, + fp32_baseline=False, + ) + if getattr(self.train_dataloader, "distributed", False): + self.register_hook("on_train_begin", self.adaptor._pre_hook_for_hvd) def execute(self): """Functions that execute the pruning process. @@ -207,70 +216,71 @@ def execute(self): Follow the working flow: evaluate the dense model -> train/prune the model, evaluate the sparse model. """ logger.info("Start to get the baseline model's score before pruning.") - self.baseline_score = self._eval_func(self._model if getattr(self._eval_func, 'builtin', None) \ - else self._model.model) + self.baseline_score = self._eval_func( + self._model if getattr(self._eval_func, "builtin", None) else self._model.model + ) logger.info("Baseline model's score is {}.".format(str(self.baseline_score))) logger.info("Model pruning begins.") - self._train_func(self._model if getattr(self._train_func, 'builtin', None) \ - else self._model.model) + self._train_func(self._model if getattr(self._train_func, "builtin", None) else self._model.model) logger.info("Model pruning is done. Start to evaluate the pruned model.") - self.last_score = self._eval_func(self._model if getattr(self._eval_func, 'builtin', None) \ - else self._model.model) + self.last_score = self._eval_func( + self._model if getattr(self._eval_func, "builtin", None) else self._model.model + ) logger.info("Pruned model score is {}.".format(str(self.last_score))) return self._model def generate_hooks(self): """Register hooks for pruning.""" - self.register_hook('on_train_begin', self._on_train_begin) - self.register_hook('on_train_end', self._on_train_end) - self.register_hook('on_epoch_begin', self._on_epoch_begin) - self.register_hook('on_epoch_end', self._on_epoch_end) - self.register_hook('on_step_begin', self._on_step_begin) - self.register_hook('on_step_end', self._on_step_end) - self.register_hook('on_before_optimizer_step', self._on_before_optimizer_step) - self.register_hook('on_after_optimizer_step', self._on_after_optimizer_step) - self.register_hook('on_before_eval', self._on_before_eval) - self.register_hook('on_after_eval', self._on_after_eval) + self.register_hook("on_train_begin", self._on_train_begin) + self.register_hook("on_train_end", self._on_train_end) + self.register_hook("on_epoch_begin", self._on_epoch_begin) + self.register_hook("on_epoch_end", self._on_epoch_end) + self.register_hook("on_step_begin", self._on_step_begin) + self.register_hook("on_step_end", self._on_step_end) + self.register_hook("on_before_optimizer_step", self._on_before_optimizer_step) + self.register_hook("on_after_optimizer_step", self._on_after_optimizer_step) + self.register_hook("on_before_eval", self._on_before_eval) + self.register_hook("on_after_eval", self._on_after_eval) def generate_pruners(self): """Functions that generate pruners and set up self.pruners.""" for name in self.cfg.pruning.approach: - assert name == 'weight_compression' or name == "weight_compression_pytorch", \ - 'now we only support weight_compression and weight_compression_pytorch' + assert ( + name == "weight_compression" or name == "weight_compression_pytorch" + ), "now we only support weight_compression and weight_compression_pytorch" - if self.cfg.pruning.approach.weight_compression_pytorch != None: + if self.cfg.pruning.approach.weight_compression_pytorch is not None: from .pytorch_pruner.pruning import Pruning as PytorchPruning + self.pytorch_pruner = PytorchPruning(self.cfg) - self.pytorch_pruner.model = self.model._model # extract their pytorch model + self.pytorch_pruner.model = self.model._model # extract their pytorch model self.pytorch_pruner.prepare() self.pytorch_pruner.on_train_begin() self.pruners += self.pytorch_pruner.pruners - if self.cfg.pruning.approach.weight_compression != None: + if self.cfg.pruning.approach.weight_compression is not None: for pruner in self.cfg.pruning.approach.weight_compression.pruners: - if pruner.prune_type == 'basic_magnitude': - self.pruners.append(PRUNERS['BasicMagnitude'](\ - self._model, \ - pruner, - self.cfg.pruning.approach.weight_compression)) - elif pruner.prune_type == 'pattern_lock': - self.pruners.append(PRUNERS['PatternLock'](\ - self._model, \ - pruner, - self.cfg.pruning.approach.weight_compression)) - elif pruner.prune_type == 'gradient_sensitivity': - self.pruners.append(PRUNERS['GradientSensitivity'](\ - self._model, \ - pruner, - self.cfg.pruning.approach.weight_compression)) - elif pruner.prune_type == 'group_lasso': - self.pruners.append(PRUNERS['GroupLasso'](\ - self._model, \ - pruner, - self.cfg.pruning.approach.weight_compression)) + if pruner.prune_type == "basic_magnitude": + self.pruners.append( + PRUNERS["BasicMagnitude"](self._model, pruner, self.cfg.pruning.approach.weight_compression) + ) + elif pruner.prune_type == "pattern_lock": + self.pruners.append( + PRUNERS["PatternLock"](self._model, pruner, self.cfg.pruning.approach.weight_compression) + ) + elif pruner.prune_type == "gradient_sensitivity": + self.pruners.append( + PRUNERS["GradientSensitivity"]( + self._model, pruner, self.cfg.pruning.approach.weight_compression + ) + ) + elif pruner.prune_type == "group_lasso": + self.pruners.append( + PRUNERS["GroupLasso"](self._model, pruner, self.cfg.pruning.approach.weight_compression) + ) else: ##print(pruner.prune_type) - assert False, 'now only support {}'.format(PRUNERS.keys()) + assert False, "now only support {}".format(PRUNERS.keys()) def __call__(self): """Entry point of pruning. @@ -307,7 +317,6 @@ def __call__(self): Returns: pruned model: best pruned model found, otherwise return None - """ return super(Pruning, self).__call__() @@ -317,11 +326,11 @@ def __call__(self): @property def pruning_func(self): """Not support get pruning_func.""" - assert False, 'Should not try to get the value of `pruning_func` attribute.' + assert False, "Should not try to get the value of `pruning_func` attribute." return None @pruning_func.setter - @deprecated(version='2.0', reason="please use `train_func` instead") + @deprecated(version="2.0", reason="please use `train_func` instead") def pruning_func(self, user_pruning_func): """Training function for pruning. @@ -339,7 +348,7 @@ def pruning_func(self, user_pruning_func): def evaluation_distributed(self): """Getter to know whether need distributed evaluation dataloader.""" eval_dataloader_cfg = self.cfg.evaluation.accuracy.dataloader - yaml_distributed = eval_dataloader_cfg.get('distributed', False) + yaml_distributed = eval_dataloader_cfg.get("distributed", False) return self._evaluation_distributed or yaml_distributed @evaluation_distributed.setter @@ -351,7 +360,7 @@ def evaluation_distributed(self, distributed): def train_distributed(self): """Getter to know whether need distributed training dataloader.""" train_dataloader_cfg = self.cfg.pruning.train.dataloader - yaml_distributed = train_dataloader_cfg.get('distributed', False) + yaml_distributed = train_dataloader_cfg.get("distributed", False) return self._train_distributed or yaml_distributed @train_distributed.setter @@ -361,7 +370,7 @@ def train_distributed(self, distributed): def __repr__(self): """Return the class's string representation.""" - return 'Pruning' + return "Pruning" def get_sparsity_ratio(self): """Functions that calculate a modules/layers sparsity. @@ -373,6 +382,7 @@ def get_sparsity_ratio(self): blockwise_over_matmul_gemm_conv refers to all-zero blocks' ratio in pruning layers. """ import torch + pattern_sparsity_cnt = 0 element_sparsity_cnt = 0 for pruner in self.pruners: @@ -395,8 +405,7 @@ def get_sparsity_ratio(self): param_cnt += param.numel() blockwise_over_matmul_gemm_conv = float(pattern_sparsity_cnt) / linear_conv_cnt elementwise_over_matmul_gemm_conv = float(element_sparsity_cnt) / linear_conv_cnt - elementwise_over_all = float( - element_sparsity_cnt) / param_cnt + elementwise_over_all = float(element_sparsity_cnt) / param_cnt return elementwise_over_matmul_gemm_conv, elementwise_over_all, blockwise_over_matmul_gemm_conv @@ -429,18 +438,18 @@ def _set_weights(self): def on_train_begin(self, logs=None, dataloader=None): """Call the same-name function from hooks.""" - self.hooks['on_train_begin'](dataloader) + self.hooks["on_train_begin"](dataloader) def on_train_end(self, logs=None): """Call the same-name function from hooks.""" - self.hooks['on_train_end']() + self.hooks["on_train_end"]() - @deprecated(version='2.0', reason="please use `on_train_begin` instead") + @deprecated(version="2.0", reason="please use `on_train_begin` instead") def pre_epoch_begin(self, logs=None, dataloader=None): # pragma: no cover """Call the same-name function from hooks.""" self.on_train_begin(logs, dataloader) - @deprecated(version='2.0', reason="please use `on_train_end` instead") + @deprecated(version="2.0", reason="please use `on_train_end` instead") def post_epoch_end(self, logs=None): # pragma: no cover """Call the same-name function from hooks.""" self.on_train_end(logs) @@ -448,12 +457,12 @@ def post_epoch_end(self, logs=None): # pragma: no cover def on_epoch_begin(self, epoch, logs=None): """Call the same-name function from hooks.""" self._set_weights() - self.hooks['on_epoch_begin'](epoch) + self.hooks["on_epoch_begin"](epoch) def on_epoch_end(self, logs=None): """Call the same-name function from hooks.""" self._set_weights() - res = self.hooks['on_epoch_end']() + res = self.hooks["on_epoch_end"]() for layer_index, weights in res[0][0].items(): get_weights = self.model.layers[layer_index].get_weights() get_weights[0] = weights @@ -462,32 +471,31 @@ def on_epoch_end(self, logs=None): def on_step_begin(self, batch, logs=None): """Call the same-name function from hooks.""" self._set_weights() - res = self.hooks['on_step_begin'](batch) + res = self.hooks["on_step_begin"](batch) for layer_index, weights in res[0][0].items(): get_weights = self.model.layers[layer_index].get_weights() get_weights[0] = weights self.model.layers[layer_index].set_weights(get_weights) - @deprecated(version='2.0', reason="please use `on_step_begin` instead") + @deprecated(version="2.0", reason="please use `on_step_begin` instead") def on_batch_begin(self, batch, logs=None): # pragma: no cover """Call the same-name function from hooks.""" self.on_step_begin(batch, logs) def on_after_compute_loss(self, input, s_outputs, s_loss, t_outputs=None): """Call the same-name function from hooks.""" - return self.hooks['on_after_compute_loss'](input, s_outputs, s_loss, t_outputs) + return self.hooks["on_after_compute_loss"](input, s_outputs, s_loss, t_outputs) def on_step_end(self, logs=None): """Call the same-name function from hooks.""" self._set_weights() - res = self.hooks['on_step_end']() + res = self.hooks["on_step_end"]() for layer_index, weights in res[0][0].items(): get_weights = self.model.layers[layer_index].get_weights() get_weights[0] = weights self.model.layers[layer_index].set_weights(get_weights) - @deprecated(version='2.0', reason="please use `on_step_end` instead") + @deprecated(version="2.0", reason="please use `on_step_end` instead") def on_batch_end(self, logs=None): # pragma: no cover """Call the same-name function from hooks.""" self.on_step_end(logs) - diff --git a/neural_compressor/experimental/pruning_recipes/__init__.py b/neural_compressor/experimental/pruning_recipes/__init__.py index 86a06645734..118c7b5b2f0 100644 --- a/neural_compressor/experimental/pruning_recipes/__init__.py +++ b/neural_compressor/experimental/pruning_recipes/__init__.py @@ -21,4 +21,5 @@ __all__ = [ "PATTERNS", - "patterns",] + "patterns", +] diff --git a/neural_compressor/experimental/pruning_recipes/patterns/__init__.py b/neural_compressor/experimental/pruning_recipes/patterns/__init__.py index 98382014bfe..897ffe49ed3 100644 --- a/neural_compressor/experimental/pruning_recipes/patterns/__init__.py +++ b/neural_compressor/experimental/pruning_recipes/patterns/__init__.py @@ -1,4 +1,4 @@ -"""different patterns.""" +"""Different patterns.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -23,7 +23,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) patterns = PATTERNS() diff --git a/neural_compressor/experimental/pruning_recipes/patterns/pattern.py b/neural_compressor/experimental/pruning_recipes/patterns/pattern.py index 280deea799f..9df37685e7c 100644 --- a/neural_compressor/experimental/pruning_recipes/patterns/pattern.py +++ b/neural_compressor/experimental/pruning_recipes/patterns/pattern.py @@ -1,4 +1,4 @@ -"""pattern classes.""" +"""Pattern classes.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -17,10 +17,12 @@ # limitations under the License. from abc import abstractmethod + import numpy as np registry_patterns = {} + def pattern_registry(pattern_type): """Class decorator used to register all Pruning Pattern subclasses. @@ -31,16 +33,19 @@ def pattern_registry(pattern_type): Returns: cls: The class of register. """ + def decorator_pattern(cls): if pattern_type in registry_patterns: - raise ValueError('Cannot have two pattern with the same name') + raise ValueError("Cannot have two pattern with the same name") registry_patterns[pattern_type] = cls return cls + return decorator_pattern + class PATTERNS(object): """Class that contain all registered pattern types. - + Attributes: patterns: A dict which stores registered Pruning Pattern subclasses. """ @@ -49,8 +54,7 @@ class PATTERNS(object): def __getitem__(self, pattern_type): """Obtain a Pruning Pattern subclass.""" - assert pattern_type in self.patterns, "pattern type only support {}".\ - format(self.patterns.keys()) + assert pattern_type in self.patterns, "pattern type only support {}".format(self.patterns.keys()) return self.patterns[pattern_type] @classmethod @@ -58,6 +62,7 @@ def support_pattern(self): """Support patterns.""" return set(self.patterns.keys()) + class PatternBase: """Base class of pruning pattern.""" @@ -70,7 +75,7 @@ def compute_sparsity(self, tensor): """To be implemented in subclasses.""" raise NotImplementedError - def reduce(self, tensor, method='abs_sum'): + def reduce(self, tensor, method="abs_sum"): """Reshaped tensor, support 'abs_max', 'abs_sum'.""" if len(tensor.shape) in [2, 4]: reshaped_tensor = self.reshape(tensor) @@ -80,9 +85,9 @@ def reduce(self, tensor, method='abs_sum'): reduced_tensor = new_tensor.reshape(new_shape[:-3] + [new_shape[-2], -1]) else: assert False, "tile-pattern pruning now only support 2d & 4d tensor" - if method == 'abs_max': + if method == "abs_max": return np.abs(reduced_tensor).max(-1).values - elif method == 'abs_sum': + elif method == "abs_sum": return np.abs(reduced_tensor).sum(-1) else: raise NotImplementedError @@ -91,9 +96,9 @@ def reshape(self, tensor): """Reshape tensor into dims+2.""" if len(tensor.shape) == 4: tensor = tensor.reshape(tensor.shape[0], -1) - assert tensor.shape[-1] % self.mask_shape[-1] == 0 and \ - tensor.shape[-2] % self.mask_shape[-2] == 0, \ - 'tensor shape {} cannot be divided by mask {}'.format(tensor.shape, self.mask_shape) + assert ( + tensor.shape[-1] % self.mask_shape[-1] == 0 and tensor.shape[-2] % self.mask_shape[-2] == 0 + ), "tensor shape {} cannot be divided by mask {}".format(tensor.shape, self.mask_shape) new_shape = list(tensor.shape)[:-2] new_shape.append(tensor.shape[-2] // self.mask_shape[-2]) diff --git a/neural_compressor/experimental/pruning_recipes/patterns/tile_pattern.py b/neural_compressor/experimental/pruning_recipes/patterns/tile_pattern.py index 31fbb2f233e..94a30d97d6a 100644 --- a/neural_compressor/experimental/pruning_recipes/patterns/tile_pattern.py +++ b/neural_compressor/experimental/pruning_recipes/patterns/tile_pattern.py @@ -17,8 +17,10 @@ # limitations under the License. import numpy as np + from .pattern import PatternBase, pattern_registry + class TilePatternBase(PatternBase): """Parent class for all NxM tile patterns.""" @@ -33,14 +35,14 @@ def compute_sparsity(self, tensor): def repeat_mask(self, mask, ori_shape=None): """Repeat mask in 2 dimensions.""" - flatten_mask = np.repeat(np.repeat(mask, self.mask_shape[0], axis=-2), \ - self.mask_shape[1], axis=-1) + flatten_mask = np.repeat(np.repeat(mask, self.mask_shape[0], axis=-2), self.mask_shape[1], axis=-1) if ori_shape: return flatten_mask.reshape(ori_shape) else: return flatten_mask -@pattern_registry(pattern_type='tile_pattern_1x1') + +@pattern_registry(pattern_type="tile_pattern_1x1") class TilePattern_1x1(TilePatternBase): """1x1 tile pattern (unstructured).""" @@ -48,7 +50,8 @@ def __init__(self): """Element wise sparsity.""" super(TilePattern_1x1, self).__init__([1, 1]) -@pattern_registry(pattern_type='tile_pattern_2x2') + +@pattern_registry(pattern_type="tile_pattern_2x2") class TilePattern_2x2(TilePatternBase): """2x2 tile pattern (unstructured).""" @@ -56,7 +59,8 @@ def __init__(self): """2x2 tile wise sparsity.""" super(TilePattern_2x2, self).__init__([2, 2]) -@pattern_registry(pattern_type='tile_pattern_1x16') + +@pattern_registry(pattern_type="tile_pattern_1x16") class TilePattern_1x16(TilePatternBase): """1x16 tile pattern (unstructured).""" @@ -64,15 +68,17 @@ def __init__(self): """1x16 tile wise sparsity.""" super(TilePattern_1x16, self).__init__([1, 16]) -@pattern_registry(pattern_type='tile_pattern_4x1') + +@pattern_registry(pattern_type="tile_pattern_4x1") class TilePattern_4x1(TilePatternBase): """4x1 tile pattern (unstructured).""" def __init__(self): """4x1 tile wise vnni-aware sparsity.""" super(TilePattern_4x1, self).__init__([4, 1]) - -@pattern_registry(pattern_type='tile_pattern_1x2') + + +@pattern_registry(pattern_type="tile_pattern_1x2") class TilePattern_1x2(TilePatternBase): """1x2 tile pattern (unstructured).""" diff --git a/neural_compressor/experimental/pruning_v2.py b/neural_compressor/experimental/pruning_v2.py index cd5a02f45a0..ce4e15134fd 100644 --- a/neural_compressor/experimental/pruning_v2.py +++ b/neural_compressor/experimental/pruning_v2.py @@ -1,4 +1,4 @@ -"""pruning module.""" +"""Pruning module.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -16,28 +16,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .component import Component -from ..utils import logger -from ..utils.utility import GLOBAL_STATE, MODE -from ..utils.create_obj_from_config import create_dataloader, create_train_func, create_eval_func -from ..model import BaseModel from ..adaptor import FRAMEWORKS +from ..compression.pruner.pruners import get_pruner +from ..compression.pruner.utils import ( + check_config, + generate_pruner_config, + parse_to_prune, + process_config, + update_params, +) from ..conf.config import PruningConf from ..conf.pythonic_config import Config from ..config import WeightPruningConfig - +from ..model import BaseModel +from ..utils import logger +from ..utils.create_obj_from_config import create_dataloader, create_eval_func, create_train_func +from ..utils.utility import GLOBAL_STATE, MODE, LazyImport +from .component import Component from .pruner_legacy import PRUNERS -from ..compression.pruner.utils import generate_pruner_config -from ..compression.pruner.utils import process_config, parse_to_prune, check_config, update_params -from ..utils.utility import LazyImport -from ..compression.pruner.pruners import get_pruner -LazyImport('torch.nn') -torch = LazyImport('torch') +LazyImport("torch.nn") +torch = LazyImport("torch") -from deprecated import deprecated import re +from deprecated import deprecated + class Pruning(Component): """This is base class of pruning object. @@ -55,7 +59,6 @@ class Pruning(Component): Attributes: conf: A config dict object. Contains pruning setting parameters. pruners: A list of Pruner object. - """ def __init__(self, conf_fname_or_obj=None): @@ -119,21 +122,22 @@ def get_sparsity_ratio(self): linear_conv_cnt = 0 param_cnt = 0 for name, module in self._model.model.named_modules(): - if type(module).__name__ in ["Linear"] or re.search(r'Conv.d', type(module).__name__) != None: + if type(module).__name__ in ["Linear"] or re.search(r"Conv.d", type(module).__name__) is not None: linear_conv_cnt += module.weight.numel() for n, param in self._model.model.named_parameters(): param_cnt += param.numel() - + blockwise_over_matmul_gemm_conv = float(pattern_sparsity_cnt) / linear_conv_cnt if linear_conv_cnt != 0 else 0 elementwise_over_matmul_gemm_conv = float(element_sparsity_cnt) / linear_conv_cnt if linear_conv_cnt != 0 else 0 - + elementwise_over_all = float(element_sparsity_cnt) / param_cnt if param_cnt != 0 else 0 logger.info( f"elementwise_over_matmul_gemm_conv:{elementwise_over_matmul_gemm_conv}," f" elementwise_over_all:{elementwise_over_all}," - f"blockwise_over_matmul_gemm_conv:{blockwise_over_matmul_gemm_conv}") + f"blockwise_over_matmul_gemm_conv:{blockwise_over_matmul_gemm_conv}" + ) return elementwise_over_matmul_gemm_conv, elementwise_over_all, blockwise_over_matmul_gemm_conv @@ -209,19 +213,20 @@ def prepare(self): def pre_process(self): """Functions called before pruning begins, usually set up pruners.""" - assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for pruning....' + assert isinstance(self._model, BaseModel), "need set neural_compressor Model for pruning...." GLOBAL_STATE.STATE = MODE.PRUNING - framework_specific_info = {'device': self.cfg.device, - 'random_seed': self.cfg.tuning.random_seed, - 'workspace_path': self.cfg.tuning.workspace.path, - 'q_dataloader': None, - 'format': 'default', - 'backend': 'default'} - - if self.framework == 'tensorflow': - framework_specific_info.update( - {"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) + framework_specific_info = { + "device": self.cfg.device, + "random_seed": self.cfg.tuning.random_seed, + "workspace_path": self.cfg.tuning.workspace.path, + "q_dataloader": None, + "format": "default", + "backend": "default", + } + + if self.framework == "tensorflow": + framework_specific_info.update({"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) @@ -229,44 +234,53 @@ def pre_process(self): if self._train_dataloader is None and self._train_func is None: train_dataloader_cfg = self.cfg.pruning.train.dataloader - assert train_dataloader_cfg is not None, \ - 'dataloader field of train field of pruning section ' \ - 'in yaml file should be configured as train_dataloader property is NOT set!' + assert train_dataloader_cfg is not None, ( + "dataloader field of train field of pruning section " + "in yaml file should be configured as train_dataloader property is NOT set!" + ) train_dataloader_cfg.distributed = self.train_distributed self._train_dataloader = create_dataloader(self.framework, train_dataloader_cfg) if self._eval_dataloader is None and self._eval_func is None: eval_dataloader_cfg = self.cfg.evaluation.accuracy.dataloader - assert eval_dataloader_cfg is not None, \ - 'dataloader field of evaluation ' \ - 'in yaml file should be configured as eval_dataloader property is NOT set!' + assert eval_dataloader_cfg is not None, ( + "dataloader field of evaluation " + "in yaml file should be configured as eval_dataloader property is NOT set!" + ) eval_dataloader_cfg.distributed = self.evaluation_distributed self._eval_dataloader = create_dataloader(self.framework, eval_dataloader_cfg) if self._train_func is None: # train section of pruning section in yaml file should be configured. train_cfg = self.cfg.pruning.train - assert train_cfg, "train field of pruning section in yaml file must " \ - "be configured for pruning if pruning_func is NOT set." - self._train_func = create_train_func(self.framework, \ - self.train_dataloader, \ - self.adaptor, \ - train_cfg, \ - hooks=self.hooks, \ - callbacks=self.callbacks) + assert train_cfg, ( + "train field of pruning section in yaml file must " + "be configured for pruning if pruning_func is NOT set." + ) + self._train_func = create_train_func( + self.framework, + self.train_dataloader, + self.adaptor, + train_cfg, + hooks=self.hooks, + callbacks=self.callbacks, + ) if self._eval_func is None: # eval section in yaml file should be configured. eval_cfg = self.cfg.evaluation - assert eval_cfg, "eval field of pruning section in yaml file must " \ - "be configured for pruning if eval_func is NOT set." - self._eval_func = create_eval_func(self.framework, \ - self.eval_dataloader, \ - self.adaptor, \ - eval_cfg.accuracy.metric, \ - eval_cfg.accuracy.postprocess, \ - fp32_baseline=False) - if getattr(self.train_dataloader, 'distributed', False): - self.register_hook('on_train_begin', self.adaptor._pre_hook_for_hvd) + assert eval_cfg, ( + "eval field of pruning section in yaml file must " "be configured for pruning if eval_func is NOT set." + ) + self._eval_func = create_eval_func( + self.framework, + self.eval_dataloader, + self.adaptor, + eval_cfg.accuracy.metric, + eval_cfg.accuracy.postprocess, + fp32_baseline=False, + ) + if getattr(self.train_dataloader, "distributed", False): + self.register_hook("on_train_begin", self.adaptor._pre_hook_for_hvd) def execute(self): """Functions that execute the pruning process. @@ -274,30 +288,31 @@ def execute(self): Follow the working flow: evaluate the dense model -> train/prune the model, evaluate the sparse model. """ logger.info("Start to get the baseline model's score before pruning.") - self.baseline_score = self._eval_func(self._model if getattr(self._eval_func, 'builtin', None) \ - else self._model.model) + self.baseline_score = self._eval_func( + self._model if getattr(self._eval_func, "builtin", None) else self._model.model + ) logger.info("Baseline model's score is {}.".format(str(self.baseline_score))) logger.info("Model pruning begins.") - self._train_func(self._model if getattr(self._train_func, 'builtin', None) \ - else self._model.model) + self._train_func(self._model if getattr(self._train_func, "builtin", None) else self._model.model) logger.info("Model pruning is done. Start to evaluate the pruned model.") - self.last_score = self._eval_func(self._model if getattr(self._eval_func, 'builtin', None) \ - else self._model.model) + self.last_score = self._eval_func( + self._model if getattr(self._eval_func, "builtin", None) else self._model.model + ) logger.info("Pruned model score is {}.".format(str(self.last_score))) return self._model def generate_hooks(self): """Register hooks for pruning.""" - self.register_hook('on_train_begin', self._on_train_begin) - self.register_hook('on_train_end', self._on_train_end) - self.register_hook('on_epoch_begin', self._on_epoch_begin) - self.register_hook('on_epoch_end', self._on_epoch_end) - self.register_hook('on_step_begin', self._on_step_begin) - self.register_hook('on_step_end', self._on_step_end) - self.register_hook('on_before_optimizer_step', self._on_before_optimizer_step) - self.register_hook('on_after_optimizer_step', self._on_after_optimizer_step) - self.register_hook('on_before_eval', self._on_before_eval) - self.register_hook('on_after_eval', self._on_after_eval) + self.register_hook("on_train_begin", self._on_train_begin) + self.register_hook("on_train_end", self._on_train_end) + self.register_hook("on_epoch_begin", self._on_epoch_begin) + self.register_hook("on_epoch_end", self._on_epoch_end) + self.register_hook("on_step_begin", self._on_step_begin) + self.register_hook("on_step_end", self._on_step_end) + self.register_hook("on_before_optimizer_step", self._on_before_optimizer_step) + self.register_hook("on_after_optimizer_step", self._on_after_optimizer_step) + self.register_hook("on_before_eval", self._on_before_eval) + self.register_hook("on_after_eval", self._on_after_eval) def _generate_pruners(self): """Obtain Pruner objects.""" @@ -308,37 +323,25 @@ def _generate_pruners(self): logger.warning("one pruner hooks no layers, please have a check") self.pruners.append(get_pruner(info, modules)) - info['modules'] = [key for key in modules.keys()] - info['len_of_modules'] = len(info['modules']) + info["modules"] = [key for key in modules.keys()] + info["len_of_modules"] = len(info["modules"]) logger.info(info) else: for info in self.pruners_info: pruner = generate_pruner_config(info) - if info.prune_type == 'magnitude': - self.pruners.append(PRUNERS['BasicMagnitude'](\ - self._model, \ - pruner, - None)) - elif info.prune_type == 'pattern_lock': - self.pruners.append(PRUNERS['PatternLock'](\ - self._model, \ - pruner, - None)) - elif info.prune_type == 'gradient_sensitivity': - self.pruners.append(PRUNERS['GradientSensitivity'](\ - self._model, \ - pruner, - None)) - elif info.prune_type == 'group_lasso': - self.pruners.append(PRUNERS['GroupLasso'](\ - self._model, \ - pruner, - None)) + if info.prune_type == "magnitude": + self.pruners.append(PRUNERS["BasicMagnitude"](self._model, pruner, None)) + elif info.prune_type == "pattern_lock": + self.pruners.append(PRUNERS["PatternLock"](self._model, pruner, None)) + elif info.prune_type == "gradient_sensitivity": + self.pruners.append(PRUNERS["GradientSensitivity"](self._model, pruner, None)) + elif info.prune_type == "group_lasso": + self.pruners.append(PRUNERS["GroupLasso"](self._model, pruner, None)) else: ##print(pruner.prune_type) - assert False, 'now only support {}'.format(PRUNERS.keys()) + assert False, "now only support {}".format(PRUNERS.keys()) logger.info(info) - + def __call__(self): """Entry point of pruning. @@ -374,7 +377,6 @@ def __call__(self): Returns: pruned model: best pruned model found, otherwise return None - """ return super(Pruning, self).__call__() @@ -384,11 +386,11 @@ def __call__(self): @property def pruning_func(self): """Not support get pruning_func.""" - assert False, 'Should not try to get the value of `pruning_func` attribute.' + assert False, "Should not try to get the value of `pruning_func` attribute." return None @pruning_func.setter - @deprecated(version='2.0', reason="please use `train_func` instead") + @deprecated(version="2.0", reason="please use `train_func` instead") def pruning_func(self, user_pruning_func): """Training function for pruning. @@ -406,7 +408,7 @@ def pruning_func(self, user_pruning_func): def evaluation_distributed(self): """Getter to know whether need distributed evaluation dataloader.""" eval_dataloader_cfg = self.cfg.evaluation.accuracy.dataloader - yaml_distributed = eval_dataloader_cfg.get('distributed', False) + yaml_distributed = eval_dataloader_cfg.get("distributed", False) return self._evaluation_distributed or yaml_distributed @evaluation_distributed.setter @@ -418,7 +420,7 @@ def evaluation_distributed(self, distributed): def train_distributed(self): """Getter to know whether need distributed training dataloader.""" train_dataloader_cfg = self.cfg.pruning.train.dataloader - yaml_distributed = train_dataloader_cfg.get('distributed', False) + yaml_distributed = train_dataloader_cfg.get("distributed", False) return self._train_distributed or yaml_distributed @train_distributed.setter @@ -428,7 +430,7 @@ def train_distributed(self, distributed): def __repr__(self): """Return the class's string representation.""" - return 'Pruning' + return "Pruning" class TfPruningCallback(object): @@ -459,18 +461,18 @@ def _set_weights(self): def on_train_begin(self, logs=None, dataloader=None): """Call the same-name function from hooks.""" - self.hooks['on_train_begin'](dataloader) + self.hooks["on_train_begin"](dataloader) def on_train_end(self, logs=None): """Call the same-name function from hooks.""" - self.hooks['on_train_end']() + self.hooks["on_train_end"]() - @deprecated(version='2.0', reason="please use `on_train_begin` instead") + @deprecated(version="2.0", reason="please use `on_train_begin` instead") def pre_epoch_begin(self, logs=None, dataloader=None): # pragma: no cover """Call the same-name function from hooks.""" self.on_train_begin(logs, dataloader) - @deprecated(version='2.0', reason="please use `on_train_end` instead") + @deprecated(version="2.0", reason="please use `on_train_end` instead") def post_epoch_end(self, logs=None): # pragma: no cover """Call the same-name function from hooks.""" self.on_train_end(logs) @@ -478,12 +480,12 @@ def post_epoch_end(self, logs=None): # pragma: no cover def on_epoch_begin(self, epoch, logs=None): """Call the same-name function from hooks.""" self._set_weights() - self.hooks['on_epoch_begin'](epoch) + self.hooks["on_epoch_begin"](epoch) def on_epoch_end(self, logs=None): """Call the same-name function from hooks.""" self._set_weights() - res = self.hooks['on_epoch_end']() + res = self.hooks["on_epoch_end"]() for layer_index, weights in res[0][0].items(): get_weights = self.model.layers[layer_index].get_weights() get_weights[0] = weights @@ -492,32 +494,31 @@ def on_epoch_end(self, logs=None): def on_step_begin(self, batch, logs=None): """Call the same-name function from hooks.""" self._set_weights() - res = self.hooks['on_step_begin'](batch) + res = self.hooks["on_step_begin"](batch) for layer_index, weights in res[0][0].items(): get_weights = self.model.layers[layer_index].get_weights() get_weights[0] = weights self.model.layers[layer_index].set_weights(get_weights) - @deprecated(version='2.0', reason="please use `on_step_begin` instead") + @deprecated(version="2.0", reason="please use `on_step_begin` instead") def on_batch_begin(self, batch, logs=None): # pragma: no cover """Call the same-name function from hooks.""" self.on_step_begin(batch, logs) def on_after_compute_loss(self, input, s_outputs, s_loss, t_outputs=None): """Call the same-name function from hooks.""" - return self.hooks['on_after_compute_loss'](input, s_outputs, s_loss, t_outputs) + return self.hooks["on_after_compute_loss"](input, s_outputs, s_loss, t_outputs) def on_step_end(self, logs=None): """Call the same-name function from hooks.""" self._set_weights() - res = self.hooks['on_step_end']() + res = self.hooks["on_step_end"]() for layer_index, weights in res[0][0].items(): get_weights = self.model.layers[layer_index].get_weights() get_weights[0] = weights self.model.layers[layer_index].set_weights(get_weights) - @deprecated(version='2.0', reason="please use `on_step_end` instead") + @deprecated(version="2.0", reason="please use `on_step_end` instead") def on_batch_end(self, logs=None): # pragma: no cover """Call the same-name function from hooks.""" self.on_step_end(logs) - diff --git a/neural_compressor/experimental/pytorch_pruner/logger.py b/neural_compressor/experimental/pytorch_pruner/logger.py index b74fdcd7245..bc89905b305 100644 --- a/neural_compressor/experimental/pytorch_pruner/logger.py +++ b/neural_compressor/experimental/pytorch_pruner/logger.py @@ -1,4 +1,4 @@ -"""logger module.""" +"""Logger module.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # diff --git a/neural_compressor/experimental/pytorch_pruner/patterns.py b/neural_compressor/experimental/pytorch_pruner/patterns.py index 21979b5c16a..530f0c0130f 100644 --- a/neural_compressor/experimental/pytorch_pruner/patterns.py +++ b/neural_compressor/experimental/pytorch_pruner/patterns.py @@ -1,4 +1,4 @@ -"""pattern module.""" +"""Pattern module.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -19,6 +19,7 @@ import logging import torch + from .logger import logger PATTERNS = {} @@ -27,7 +28,7 @@ def register_pattern(name): """Class decorator used to register a Pattern subclass to the registry. - Decorator function used before a Pattern subclasses. + Decorator function used before a Pattern subclasses. Make sure that this Pattern class can be registered in PATTERNS. Args: @@ -36,7 +37,6 @@ def register_pattern(name): Returns: cls: The class of register. - """ def register(pattern): @@ -61,7 +61,7 @@ def get_pattern(config): AssertionError: Currently only support patterns which have been registered in PATTERNS. """ name = config.pattern - name = name.split('_')[-1] + name = name.split("_")[-1] if "x" in name: return PATTERNS["NxM"](config) if ":" in name: @@ -100,8 +100,7 @@ def get_masks(self, scores, target_sparsity_ratio, pre_masks, max_sparsity_ratio max_sparsity_ratio_per_layer: A float. The maximum sparsity that one layer can reach. Returns: - A dict with the identical size as pre_masks. Update the 0/1 values in it. - + A dict with the identical size as pre_masks. Update the 0/1 values in it. """ if self.is_global: return self.get_masks_global(scores, target_sparsity_ratio, pre_masks, max_sparsity_ratio_per_layer) @@ -117,7 +116,7 @@ def get_mask_single(self, score, exact_sparsity_ratio): Args: score: A Tensor. Store the pruning scores of one layer. - exact_sparsity_ratio: A float. After pruning, the layer's sparsity will reach this value. + exact_sparsity_ratio: A float. After pruning, the layer's sparsity will reach this value. Returns: A Tensor with the identical size as score. a new mask. @@ -126,8 +125,8 @@ def get_mask_single(self, score, exact_sparsity_ratio): k = int(exact_sparsity_ratio * flattern_score.numel()) threshold, _ = torch.kthvalue(flattern_score, k) if not k < 1: - zero = torch.tensor([0.]).to(score.device) - one = torch.tensor([1.]).to(score.device) + zero = torch.tensor([0.0]).to(score.device) + one = torch.tensor([1.0]).to(score.device) mask = torch.where(score <= threshold, zero, one) else: mask = torch.ones(score.shape, device=score.device) @@ -147,7 +146,7 @@ def get_masks_local(self, scores, target_sparsity_ratio, pre_masks, max_sparsity max_sparsity_ratio_per_layer: A float. The maximum sparsity that one layer can reach. Returns: - A dict with the identical size as pre_masks. Update the 0/1 values in it. + A dict with the identical size as pre_masks. Update the 0/1 values in it. """ masks = {} if isinstance(self, PatternNxM) and not isinstance(self.block_size, dict): @@ -161,10 +160,10 @@ def get_masks_local(self, scores, target_sparsity_ratio, pre_masks, max_sparsity def get_sparsity_ratio(self, pre_masks): """Calulate the zero elements' ration in pre_masks. - + Args: pre_masks: Dict{"layer_name": Tensor}. The masks generated after the last pruning step. - + Returns: A float. The zero elements' ratio in pre_masks. """ @@ -195,7 +194,7 @@ def get_pattern_lock_masks(self, modules): return pattern_lock_masks -@register_pattern('NxM') +@register_pattern("NxM") class PatternNxM(Pattern): """Pruning Pattern. @@ -214,27 +213,27 @@ class PatternNxM(Pattern): def __init__(self, config): """Initialize.""" super(PatternNxM, self).__init__(config) - pattern = self.pattern.split('_')[-1] - self.N = pattern.split('x')[0] - self.M = pattern.split('x')[1] + pattern = self.pattern.split("_")[-1] + self.N = pattern.split("x")[0] + self.M = pattern.split("x")[1] if self.N == "channel": ##channel-wise pruning mode self.block_size = ["channel", int(self.M)] elif self.M == "channel": ##channel-wise pruning mode self.block_size = [int(self.N), "channel"] else: - self.block_size = [int(pattern.split('x')[0]), int(pattern.split('x')[1])] + self.block_size = [int(pattern.split("x")[0]), int(pattern.split("x")[1])] def get_block_size_dict(self, data): """Calulate the zero elements' ration in pre_masks. - + Args: data: Dict{"layer_name": Tensor}. Store weights or scores. - + Returns: A dict. Dict{"layer_name": [block_size_1, block_size_2]}. Containing layers' corresponding pruning pattern's block shape. - Please be aware that because in channel-wise pruning, - different layers can have different pruning patterns. + Please be aware that because in channel-wise pruning, + different layers can have different pruning patterns. """ block_sizes_dict = {} if self.N == "channel" or self.M == "channel": @@ -254,10 +253,10 @@ def get_block_size_dict(self, data): def get_sparsity_ratio(self, pre_masks): """Calulate the zero elements' ration in pre_masks. - + Args: pre_masks: Dict{"layer_name": Tensor}. The masks generated after the last pruning step. - + Returns: A float. Calculate the zero elements' ratio in pre_masks. """ @@ -282,8 +281,9 @@ def get_sparsity_ratio(self, pre_masks): total_cnt += pre_mask_sum.numel() return float(zero_cnt) / total_cnt - def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsity_ratio_per_layer, - keep_pre_mask=False): + def get_masks_global( + self, scores, target_sparsity_ratio, pre_masks, max_sparsity_ratio_per_layer, keep_pre_mask=False + ): """Generate masks for layers. Gather all layer's scores together and calculate a common threshold. @@ -297,7 +297,7 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsit keep_pre_masks: A bool. If True, keep the masks unchanged. Returns: - A dict with the identical size as pre_masks. Update the 0/1 values in it. + A dict with the identical size as pre_masks. Update the 0/1 values in it. """ if isinstance(self.block_size, list): self.block_size = self.get_block_size_dict(scores) @@ -314,11 +314,11 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsit not_divided_keys.append(key) continue - new_shape = [shape[0] // block_size[0], block_size[0], shape[1] // block_size[1], - block_size[1]] + new_shape = [shape[0] // block_size[0], block_size[0], shape[1] // block_size[1], block_size[1]] current_score = current_score.reshape(new_shape) current_score_sum = current_score.mean(-1).mean( - 1) ##TODO sum or mean is quite different for per channel pruning + 1 + ) ##TODO sum or mean is quite different for per channel pruning new_scores[key] = current_score_sum global_scores = torch.cat([torch.flatten(v) for v in new_scores.values()]) k = int(target_sparsity_ratio * global_scores.numel()) @@ -328,8 +328,8 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsit for key in new_scores.keys(): block_size = self.block_size[key] score = new_scores[key] - zero = torch.tensor([0.]).to(score.device) - one = torch.tensor([1.]).to(score.device) + zero = torch.tensor([0.0]).to(score.device) + one = torch.tensor([1.0]).to(score.device) mask = torch.where(score <= threshold, zero, one) mask = mask.repeat_interleave(block_size[0], dim=0).repeat_interleave(block_size[1], dim=-1) if torch.sum(mask) / mask.numel() < 1.0 - max_sparsity_ratio_per_layer: @@ -360,8 +360,9 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsit for key in masks.keys(): if len(scores[key].shape) == 4 and len(masks[key].shape) == 2: ## need to permute mask = masks[key] - mask = mask.reshape(scores[key].shape[0], scores[key].shape[2], scores[key].shape[3], - scores[key].shape[1]) + mask = mask.reshape( + scores[key].shape[0], scores[key].shape[2], scores[key].shape[3], scores[key].shape[1] + ) mask = mask.permute(0, 3, 1, 2) masks[key] = mask return masks @@ -403,12 +404,12 @@ def get_pattern_lock_masks(self, modules): return pattern_lock_masks -@register_pattern('N:M') +@register_pattern("N:M") class PatternNInM(Pattern): """Pruning Pattern. A Pattern class derived from Pattern. In this pattern, N out of every M continuous weights will be pruned. - For more info of this pattern, please refer to + For more info of this pattern, please refer to https://github.com/intel/neural-compressor/blob/master/docs/pruning.md Args: @@ -417,22 +418,21 @@ class PatternNInM(Pattern): Attributes: N: The number of elements to be prune in a weight sequence. M: The size of the weight sequence. - """ def __init__(self, config): """Initialize.""" super(PatternNInM, self).__init__(config) - pattern = self.pattern.split('_')[-1] - self.N = int(pattern.split(':')[0]) - self.M = int(pattern.split(':')[1]) ##m is bigger + pattern = self.pattern.split("_")[-1] + self.N = int(pattern.split(":")[0]) + self.M = int(pattern.split(":")[1]) ##m is bigger def get_sparsity_ratio(self, pre_masks): """Calulate the zero elements' ration in pre_masks. - + Args: pre_masks: Dict{"layer_name": Tensor}. The masks generated after the last pruning step. - + Returns: A float. Calculate the zero elements' ratio in pre_masks. """ @@ -457,7 +457,7 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsit max_sparsity_ratio_per_layer: A float. The maximum sparsity that one layer can reach. Returns: - A dict with the identical size as pre_masks. Update the 0/1 values in it. + A dict with the identical size as pre_masks. Update the 0/1 values in it. """ N = self.N M = self.M @@ -484,8 +484,8 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsit threshold = threshold.expand(shape[0], shape[1] // M, M) threshold = threshold.reshape((shape[0], shape[1])) - one = torch.tensor([1.]).to(current_score.device) - zero = torch.tensor([0.]).to(current_score.device) + one = torch.tensor([1.0]).to(current_score.device) + zero = torch.tensor([0.0]).to(current_score.device) mask = torch.where(current_score <= threshold, zero, one) current_score_new = current_score_new.reshape((shape[0], shape[1])) ##to get the sum of N scores in each block with M @@ -502,19 +502,19 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsit threshold, _ = torch.kthvalue(global_scores, k) for key in new_scores.keys(): score = new_scores[key] - zero = torch.tensor([0.]).to(score.device) - one = torch.tensor([1.]).to(score.device) + zero = torch.tensor([0.0]).to(score.device) + one = torch.tensor([1.0]).to(score.device) mask = torch.where(score <= threshold, zero, one) mask = mask.repeat_interleave(M, dim=-1) ## both zero will be zero - mask = (mask + all_nm_masks[key]) + mask = mask + all_nm_masks[key] mask = torch.where(mask <= 0, zero, one) if torch.sum(mask) / mask.numel() < 1.0 - max_sparsity_ratio_per_layer: ##trick, to prevent some layer not be purned too much masks[key] = self.get_mask_single(new_scores[key], max_sparsity_ratio_per_layer) masks[key] = masks[key].repeat_interleave(M, dim=-1) ## both zero will be zero - masks[key] = (masks[key] + all_nm_masks[key]) + masks[key] = masks[key] + all_nm_masks[key] masks[key] = torch.where(masks[key] <= 0, zero, one) else: masks[key] = mask @@ -530,8 +530,9 @@ def get_masks_global(self, scores, target_sparsity_ratio, pre_masks, max_sparsit for key in masks.keys(): if len(scores[key].shape) == 4 and len(masks[key].shape) == 2: ## need to permute mask = masks[key] - mask = mask.reshape(scores[key].shape[0], scores[key].shape[2], scores[key].shape[3], - scores[key].shape[1]) + mask = mask.reshape( + scores[key].shape[0], scores[key].shape[2], scores[key].shape[3], scores[key].shape[1] + ) mask = mask.permute(0, 3, 1, 2) masks[key] = mask @@ -564,8 +565,8 @@ def get_pattern_lock_masks(self, modules): mask1[weight_new == 0] = 0.0 mask2[zeros >= N] = 0.0 mask3 = mask1 + mask2 # zero in mask3 means its block has been completely pruned. - zero = torch.tensor([0.]).to(weight.device) - one = torch.tensor([1.]).to(weight.device) + zero = torch.tensor([0.0]).to(weight.device) + one = torch.tensor([1.0]).to(weight.device) mask = torch.where(mask3 == 0, zero, one) mask = mask.reshape(shape) orig_shape = modules[key].weight.shape diff --git a/neural_compressor/experimental/pytorch_pruner/prune_utils.py b/neural_compressor/experimental/pytorch_pruner/prune_utils.py index 2c4223f3576..b0302839f06 100644 --- a/neural_compressor/experimental/pytorch_pruner/prune_utils.py +++ b/neural_compressor/experimental/pytorch_pruner/prune_utils.py @@ -1,4 +1,4 @@ -"""prune utils.""" +"""Prune utils.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -17,12 +17,14 @@ # limitations under the License. import re + import yaml try: from ...conf.dotdict import DotDict except: from .dot_dict import DotDict ##TODO + from .logger import logger @@ -38,20 +40,22 @@ def check_config(prune_config): Raises: AssertionError. """ - assert prune_config['start_step'] >= 0, "start_step should be greater than 0" - assert prune_config['end_step'] >= -1, "end_step should be greater than 0" - assert prune_config['end_step'] >= prune_config['start_step'], \ - "end_step should be greater than start_step" - assert prune_config['target_sparsity'] >= 0 and prune_config['target_sparsity'] < 1.0, \ - "begin_pruning_step should be in range [0,1)" - assert prune_config['update_frequency_on_step'] > 0, "update_frequency_on_step should be greater than 0" - assert prune_config['max_sparsity_ratio_per_layer'] >= 0 and prune_config['max_sparsity_ratio_per_layer'] < 1, \ - "update_frequency_on_step should be greater than 0" - assert prune_config['prune_domain'] == "global" or prune_config['prune_domain'] == "local", \ - "only support 'global' and 'local' prune domain" + assert prune_config["start_step"] >= 0, "start_step should be greater than 0" + assert prune_config["end_step"] >= -1, "end_step should be greater than 0" + assert prune_config["end_step"] >= prune_config["start_step"], "end_step should be greater than start_step" + assert ( + prune_config["target_sparsity"] >= 0 and prune_config["target_sparsity"] < 1.0 + ), "begin_pruning_step should be in range [0,1)" + assert prune_config["update_frequency_on_step"] > 0, "update_frequency_on_step should be greater than 0" + assert ( + prune_config["max_sparsity_ratio_per_layer"] >= 0 and prune_config["max_sparsity_ratio_per_layer"] < 1 + ), "update_frequency_on_step should be greater than 0" + assert ( + prune_config["prune_domain"] == "global" or prune_config["prune_domain"] == "local" + ), "only support 'global' and 'local' prune domain" if "x" in prune_config["pattern"]: - pattern = prune_config["pattern"].split('_')[-1].split('x') - if pattern[0]=="channel" or pattern[1]=="channel": + pattern = prune_config["pattern"].split("_")[-1].split("x") + if pattern[0] == "channel" or pattern[1] == "channel": pass else: try: @@ -62,7 +66,7 @@ def check_config(prune_config): assert N > 0, "N should be greater than 0" assert M > 0, "M should be greater than 0" if ":" in prune_config["pattern"]: - pattern = prune_config["pattern"].split('_')[-1].split(':') + pattern = prune_config["pattern"].split("_")[-1].split(":") try: N = int(pattern[0]) M = int(pattern[1]) @@ -71,31 +75,33 @@ def check_config(prune_config): assert N > 0, "N should be greater than 0" assert M > N, "M should be greater than N" max_ratio = float(N) / M - assert prune_config['target_sparsity'] <= max_ratio, \ - "in N:M pattern, the max sparsity is N/M={}".format(max_ratio) - prune_config['max_sparsity_ratio_per_layer'] = min(max_ratio, prune_config['max_sparsity_ratio_per_layer']) + assert prune_config["target_sparsity"] <= max_ratio, "in N:M pattern, the max sparsity is N/M={}".format( + max_ratio + ) + prune_config["max_sparsity_ratio_per_layer"] = min(max_ratio, prune_config["max_sparsity_ratio_per_layer"]) + def reset_non_value_to_default(obj, key, default): - """Functions that add up undefined configurations. - - If some configurations are not defined in the configuration, set it to a default value. - - Args: - obj: A dict{key: value} - key: A string. Key in obj. - default: When the key is not in obj, Add key: default item in original obj. - - """ - if isinstance(obj, dict): - if (not key in obj.keys()) or obj[key] == None: + """Functions that add up undefined configurations. + + If some configurations are not defined in the configuration, set it to a default value. + + Args: + obj: A dict{key: value} + key: A string. Key in obj. + default: When the key is not in obj, Add key: default item in original obj. + """ + if isinstance(obj, dict): + if (key not in obj.keys()) or obj[key] is None: return default else: return obj[key] - else: - if not hasattr(obj, key) or getattr(obj, key) == None: - return default - else: - return getattr(obj, key) + else: + if not hasattr(obj, key) or getattr(obj, key) is None: + return default + else: + return getattr(obj, key) + def process_and_check_config(val): """Functions which converts a initial configuration object to a Pruning configuration. @@ -108,11 +114,11 @@ def process_and_check_config(val): Returns: A dict whose contents which are regularized for a Pruning obejct. """ - val = val["pruning"]['approach']['weight_compression_pytorch'] + val = val["pruning"]["approach"]["weight_compression_pytorch"] start_step = reset_non_value_to_default(val, "start_step", 0) end_step = reset_non_value_to_default(val, "end_step", 0) excluded_names = reset_non_value_to_default(val, "excluded_names", []) - prune_layer_type = reset_non_value_to_default(val, "prune_layer_type", ['Conv2d', 'Linear']) + prune_layer_type = reset_non_value_to_default(val, "prune_layer_type", ["Conv2d", "Linear"]) target_sparsity = reset_non_value_to_default(val, "target_sparsity", 0.0) ## be care of this val update_frequency_on_step = int(reset_non_value_to_default(val, "update_frequency_on_step", 1)) prune_domain = reset_non_value_to_default(val, "prune_domain", "global") @@ -124,25 +130,25 @@ def process_and_check_config(val): pattern = reset_non_value_to_default(val, "pattern", "tile_pattern_4x1") pruners_info = [] - for info in val['pruners']: + for info in val["pruners"]: pruner = {} - pruner['start_step'] = reset_non_value_to_default(info, 'start_step', start_step) - pruner['end_step'] = reset_non_value_to_default(info, 'end_step', end_step) - pruner['excluded_names'] = reset_non_value_to_default(info, 'excluded_names', excluded_names) - pruner['prune_layer_type'] = reset_non_value_to_default(info, 'prune_layer_type', prune_layer_type) - pruner['target_sparsity'] = reset_non_value_to_default(info, 'target_sparsity', target_sparsity) - pruner['update_frequency_on_step'] = reset_non_value_to_default(info, 'update_frequency_on_step', \ - update_frequency_on_step) - pruner['prune_domain'] = reset_non_value_to_default(info, 'prune_domain', prune_domain) - pruner['prune_type'] = reset_non_value_to_default(info, 'prune_type', prune_type) - pruner['sparsity_decay_type'] = reset_non_value_to_default(info, 'sparsity_decay_type', sparsity_decay_type) - pruner['max_sparsity_ratio_per_layer'] = reset_non_value_to_default(info, 'max_sparsity_ratio_per_layer', \ - max_sparsity_ratio_per_layer) - pruner['names'] = reset_non_value_to_default(info, 'names', names) - pruner['extra_excluded_names'] = reset_non_value_to_default(info, 'extra_excluded_names', - extra_excluded_names) - pruner['pattern'] = reset_non_value_to_default(info, 'pattern', - pattern) + pruner["start_step"] = reset_non_value_to_default(info, "start_step", start_step) + pruner["end_step"] = reset_non_value_to_default(info, "end_step", end_step) + pruner["excluded_names"] = reset_non_value_to_default(info, "excluded_names", excluded_names) + pruner["prune_layer_type"] = reset_non_value_to_default(info, "prune_layer_type", prune_layer_type) + pruner["target_sparsity"] = reset_non_value_to_default(info, "target_sparsity", target_sparsity) + pruner["update_frequency_on_step"] = reset_non_value_to_default( + info, "update_frequency_on_step", update_frequency_on_step + ) + pruner["prune_domain"] = reset_non_value_to_default(info, "prune_domain", prune_domain) + pruner["prune_type"] = reset_non_value_to_default(info, "prune_type", prune_type) + pruner["sparsity_decay_type"] = reset_non_value_to_default(info, "sparsity_decay_type", sparsity_decay_type) + pruner["max_sparsity_ratio_per_layer"] = reset_non_value_to_default( + info, "max_sparsity_ratio_per_layer", max_sparsity_ratio_per_layer + ) + pruner["names"] = reset_non_value_to_default(info, "names", names) + pruner["extra_excluded_names"] = reset_non_value_to_default(info, "extra_excluded_names", extra_excluded_names) + pruner["pattern"] = reset_non_value_to_default(info, "pattern", pattern) check_config(pruner) pruner_info = DotDict(pruner) pruners_info.append(pruner_info) @@ -160,7 +166,7 @@ def process_config(config): """ if isinstance(config, str): try: - with open(config, 'r') as f: + with open(config, "r") as f: content = f.read() try: from .schema_check import schema @@ -172,14 +178,10 @@ def process_config(config): schema.validate(val) except FileNotFoundError as f: logger.error("{}.".format(f)) - raise RuntimeError( - "The yaml file is not exist. Please check the file name or path." - ) + raise RuntimeError("The yaml file is not exist. Please check the file name or path.") except Exception as e: logger.error("{}.".format(e)) - raise RuntimeError( - "The yaml file format is not correct. Please refer to document." - ) + raise RuntimeError("The yaml file format is not correct. Please refer to document.") elif isinstance(config, DotDict): val = config @@ -192,7 +194,7 @@ def process_config(config): def parse_to_prune(model, config): """Keep target pruned layers.""" modules = {} - if config["names"] == None or config["names"] == []: + if config["names"] is None or config["names"] == []: config["names"] = [".*"] for raw in config["names"]: try: diff --git a/neural_compressor/experimental/pytorch_pruner/pruner.py b/neural_compressor/experimental/pytorch_pruner/pruner.py index fac15c68a56..cc3b4a9713b 100644 --- a/neural_compressor/experimental/pytorch_pruner/pruner.py +++ b/neural_compressor/experimental/pytorch_pruner/pruner.py @@ -1,4 +1,4 @@ -"""pruner module.""" +"""Pruner module.""" # !/usr/bin/env python # -*- coding: utf-8 -*- # @@ -17,10 +17,10 @@ # limitations under the License. import torch -from .patterns import get_pattern -from .scheduler import get_scheduler from .logger import logger +from .patterns import get_pattern +from .scheduler import get_scheduler PRUNERS = {} @@ -50,14 +50,14 @@ def get_pruner(modules, config): """Get registered pruner class. Get a Pruner object from PRUNERS. - + Args: modules: A dict {"module_name": Tensor}. Store the pruning modules' weights. config: A config dict object. Contains the pruner information. Returns: A Pruner object. - + Raises: AssertionError: Cuurently only support pruners which have been registered in PRUNERS. """ name = config["prune_type"] @@ -89,7 +89,7 @@ class Pruner: global_step: A integer. The total steps the model has run. start_step: A integer. When to trigger pruning process. end_step: A integer. When to end pruning process. - update_frequency_on_step: A integer. The pruning frequency, which's valid when iterative + update_frequency_on_step: A integer. The pruning frequency, which's valid when iterative pruning is enabled. target_sparsity_ratio: A float. The final sparsity after pruning. max_sparsity_ratio_per_layer: A float. Sparsity ratio maximum for every module. @@ -110,21 +110,20 @@ def __init__(self, modules, config): def _init(self): """Auxiliary function for initializing.""" self.global_step = -1 - self.start_step = self.config['start_step'] - self.end_step = self.config['end_step'] - self.update_frequency_on_step = self.config['update_frequency_on_step'] + self.start_step = self.config["start_step"] + self.end_step = self.config["end_step"] + self.update_frequency_on_step = self.config["update_frequency_on_step"] ##this is different with original code - self.total_prune_cnt = (self.end_step - self.start_step + 1) \ - // self.update_frequency_on_step + self.total_prune_cnt = (self.end_step - self.start_step + 1) // self.update_frequency_on_step self.completed_pruned_cnt = 0 self.masks = {} for key in self.modules.keys(): module = self.modules[key] self.masks[key] = torch.ones(module.weight.shape).to(module.weight.device) ##TODO support bias or others - self.target_sparsity_ratio = self.config['target_sparsity'] + self.target_sparsity_ratio = self.config["target_sparsity"] - self.max_sparsity_ratio_per_layer = self.config['max_sparsity_ratio_per_layer'] + self.max_sparsity_ratio_per_layer = self.config["max_sparsity_ratio_per_layer"] def on_epoch_begin(self, epoch): """Functions called in the beginning of each epoch.""" @@ -154,16 +153,17 @@ def on_step_begin(self, local_step): if self.current_sparsity_ratio > self.target_sparsity_ratio: return - current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio(self.target_sparsity_ratio, - self.completed_pruned_cnt, - self.total_prune_cnt, self.masks) + current_target_sparsity_ratio = self.scheduler.update_sparsity_ratio( + self.target_sparsity_ratio, self.completed_pruned_cnt, self.total_prune_cnt, self.masks + ) logger.info(f"current target ratio is {current_target_sparsity_ratio}") self.update_scores() self.completed_pruned_cnt += 1 if self.scores == {}: return - self.masks = self.pattern.get_masks(self.scores, current_target_sparsity_ratio, self.masks, - self.max_sparsity_ratio_per_layer) + self.masks = self.pattern.get_masks( + self.scores, current_target_sparsity_ratio, self.masks, self.max_sparsity_ratio_per_layer + ) self.mask_weights() self.current_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) @@ -183,12 +183,12 @@ def on_before_optimizer_step(self): def on_after_optimizer_step(self): """Functions called after the optimizer.step(). - + Prune the model after optimization. """ self.mask_weights() - def on_train_begin(self, dataloader = None): + def on_train_begin(self, dataloader=None): """Functions called in the beginning of training.""" pass @@ -217,7 +217,7 @@ def update_scores(self): pass -@register_pruners('magnitude') +@register_pruners("magnitude") class MagnitudePruner(Pruner): """Pruning Pruner. @@ -244,12 +244,12 @@ def update_scores(self): self.scores[key] = p -@register_pruners('snip') +@register_pruners("snip") class SnipPruner(Pruner): """Pruning Pruner. A Pruner class derived from Pruner. In this pruner, the scores are calculated based on SNIP. - Please refer to SNIP: Single-shot Network Pruning based on Connection Sensitivity + Please refer to SNIP: Single-shot Network Pruning based on Connection Sensitivity (https://arxiv.org/abs/1810.02340) Args: @@ -268,7 +268,7 @@ def __init__(self, modules, config): def on_after_optimizer_step(self): """Functions called after the optimizer.step(). - + Prune the model after optimization and update the scores based on weights and gradients. """ self.mask_weights() @@ -278,7 +278,7 @@ def on_after_optimizer_step(self): self.scores[key] = torch.abs(p * p.grad) -@register_pruners('snip_momentum') +@register_pruners("snip_momentum") class SnipMomentumPruner(Pruner): """Pruning Pruner. @@ -304,7 +304,7 @@ def __init__(self, modules, config): def on_after_optimizer_step(self): """Functions called after the optimizer.step(). - + Prune the model after optimization and update the scores based on weights and gradients. """ self.mask_weights() @@ -315,7 +315,7 @@ def on_after_optimizer_step(self): self.scores[key] += 1.0 * torch.abs(p * p.grad) -@register_pruners('pattern_lock') +@register_pruners("pattern_lock") class PatternLockPruner(Pruner): """Pruning Pruner. diff --git a/neural_compressor/experimental/pytorch_pruner/pruning.py b/neural_compressor/experimental/pytorch_pruner/pruning.py index 055ae6c0cb0..39d5749de31 100644 --- a/neural_compressor/experimental/pytorch_pruner/pruning.py +++ b/neural_compressor/experimental/pytorch_pruner/pruning.py @@ -1,4 +1,4 @@ -"""pruning module.""" +"""Pruning module.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -18,9 +18,9 @@ import torch.nn -from .prune_utils import process_config, parse_to_prune, parse_not_to_prune -from .pruner import get_pruner from .logger import logger +from .prune_utils import parse_not_to_prune, parse_to_prune, process_config +from .pruner import get_pruner class Pruning: @@ -32,14 +32,14 @@ class Pruning: Args: config: a string. The path to a config file. For config file template, please refer to https://github.com/intel/neural-compressor/tree/master/examples/pytorch/nlp/huggingface_models/text-classification/pruning/pytorch_pruner/eager/ - + Attributes: model: The model object to prune. config_file_path: A string. The path to a config file. pruners: A list. A list of Pruner objects. - pruner_info: A config dict object. Contains pruners' information. + pruner_info: A config dict object. Contains pruners' information. """ - + def __init__(self, config): """Initialize.""" self.model = None @@ -50,7 +50,7 @@ def __init__(self, config): def update_items_for_all_pruners(self, **kwargs): """Functions which add User-defined arguments to the original configurations. - The original config of pruning is read from a file. + The original config of pruning is read from a file. However, users can still modify configurations by passing key-value arguments in this function. Please note that the key-value arguments' keys are analysable in current configuration. """ @@ -94,8 +94,7 @@ def get_sparsity_ratio(self): param_cnt += param.numel() blockwise_over_matmul_gemm_conv = float(pattern_sparsity_cnt) / linear_conv_cnt elementwise_over_matmul_gemm_conv = float(element_sparsity_cnt) / linear_conv_cnt - elementwise_over_all = float( - element_sparsity_cnt) / param_cnt + elementwise_over_all = float(element_sparsity_cnt) / param_cnt return elementwise_over_matmul_gemm_conv, elementwise_over_all, blockwise_over_matmul_gemm_conv @@ -110,8 +109,8 @@ def _generate_pruners(self): logger.warning("one pruner hooks no layers, please have a check") self.pruners.append(get_pruner(modules, info)) - info['modules'] = [key for key in modules.keys()] - info['len_of_modules'] = len(info['modules']) + info["modules"] = [key for key in modules.keys()] + info["len_of_modules"] = len(info["modules"]) logger.info(info) def on_train_begin(self): @@ -120,7 +119,7 @@ def on_train_begin(self): Before training, ensure that pruners are generated. """ self._generate_pruners() ##TODO is there better place to place - + def on_epoch_begin(self, epoch): """Functions called in the beginning of every epoch.""" for pruner in self.pruners: diff --git a/neural_compressor/experimental/pytorch_pruner/scheduler.py b/neural_compressor/experimental/pytorch_pruner/scheduler.py index 915022a5a6e..38c1ca17de7 100644 --- a/neural_compressor/experimental/pytorch_pruner/scheduler.py +++ b/neural_compressor/experimental/pytorch_pruner/scheduler.py @@ -1,4 +1,4 @@ -"""scheduler module.""" +"""Scheduler module.""" #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -26,7 +26,7 @@ def register_scheduler(name): Decorator function used before a Scheduler subclass. Make sure that the Scheduler class decorated by this function can be registered in SCHEDULERS. - + Args: cls (class): The class of register. name: A string. Define the scheduler type. @@ -83,7 +83,7 @@ def update_sparsity_ratio(self, aggressive_ratio, current_prune_step, total_prun raise NotImplementedError -@register_scheduler('oneshot') +@register_scheduler("oneshot") class OneshotScheduler(Scheduler): """Pruning Scheduler. @@ -106,7 +106,7 @@ def update_sparsity_ratio(self, aggressive_ratio, current_prune_step, total_prun return aggressive_ratio -@register_scheduler('iterative') +@register_scheduler("iterative") class IterativeScheduler(Scheduler): """Pruning Scheduler. @@ -133,7 +133,7 @@ def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_st current_prune_step: A integer. The current pruning step. total_prune_steps: A integer. The total steps included in the pruning progress. masks: A dict{"module_name": Tensor}. The masks for modules' weights. - + Returns: A float. the target sparsity ratio the model will reach after the next pruning step. """ @@ -141,16 +141,16 @@ def update_sparsity_ratio(self, target_ratio, current_prune_step, total_prune_st # if self.config.prune_domain == "global": # aggressive_ratio += 0.02 - aggressive_ratio = min(self.config.max_sparsity_ratio_per_layer, - aggressive_ratio) ##lagacy issue + aggressive_ratio = min(self.config.max_sparsity_ratio_per_layer, aggressive_ratio) ##lagacy issue decay_type = self.config.sparsity_decay_type if decay_type == "cos": current_target_sparsity = (aggressive_ratio) * ( - 1.0 - math.cos(float(current_prune_step) / total_prune_steps * (math.pi / 2))) + 1.0 - math.cos(float(current_prune_step) / total_prune_steps * (math.pi / 2)) + ) elif decay_type == "exp": target_dense_change_ratio = (1.0 - aggressive_ratio) ** (1 / total_prune_steps) - current_target_sparsity = 1.0 - target_dense_change_ratio ** current_prune_step + current_target_sparsity = 1.0 - target_dense_change_ratio**current_prune_step elif decay_type == "linear": current_target_sparsity = (aggressive_ratio) * float(current_prune_step) / total_prune_steps diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 3ee91151d3d..d08e7159997 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -20,19 +20,22 @@ import os import pickle import random + import numpy as np -from .component import Component -from ..conf.dotdict import deep_get, deep_set, DotDict -from .strategy import EXP_STRATEGIES -from ..utils import logger -from ..utils.utility import time_limit -from ..utils.create_obj_from_config import create_dataloader -from ..model import BaseModel -from ..model.tensorflow_model import TensorflowQATModel -from ..model.model import get_model_fwk_name +from deprecated import deprecated + from ..conf.config import QuantConf +from ..conf.dotdict import DotDict, deep_get, deep_set from ..conf.pythonic_config import Config -from deprecated import deprecated +from ..model import BaseModel +from ..model.model import get_model_fwk_name +from ..model.tensorflow_model import TensorflowQATModel +from ..utils import logger +from ..utils.create_obj_from_config import create_dataloader +from ..utils.utility import time_limit +from .component import Component +from .strategy import EXP_STRATEGIES + class Quantization(Component): """This class provides easy use API for quantization. @@ -51,7 +54,6 @@ class Quantization(Component): conf_fname_or_obj (string or obj): The path to the YAML configuration file or QuantConf class containing accuracy goal, tuning objective and preferred calibration & quantization tuning space etc. - """ def __init__(self, conf_fname_or_obj=None): @@ -77,50 +79,60 @@ def _create_eval_dataloader(self, cfg): # when eval_func is set, will be directly used and eval_dataloader can be None if self._eval_func is None: if self._eval_dataloader is None: - eval_dataloader_cfg = deep_get(cfg, 'evaluation.accuracy.dataloader') + eval_dataloader_cfg = deep_get(cfg, "evaluation.accuracy.dataloader") if eval_dataloader_cfg is None: - logger.info("Because both eval_dataloader_cfg and user-defined eval_func are None," \ - " automatically setting 'tuning.exit_policy.performance_only = True'.") - deep_set(cfg, 'tuning.exit_policy.performance_only', True) - logger.info("The cfg.tuning.exit_policy.performance_only is: {}".format(\ - cfg.tuning.exit_policy.performance_only)) + logger.info( + "Because both eval_dataloader_cfg and user-defined eval_func are None," + " automatically setting 'tuning.exit_policy.performance_only = True'." + ) + deep_set(cfg, "tuning.exit_policy.performance_only", True) + logger.info( + "The cfg.tuning.exit_policy.performance_only is: {}".format( + cfg.tuning.exit_policy.performance_only + ) + ) else: - if deep_get(cfg, 'evaluation.accuracy.iteration') == -1 and 'dummy_v2' \ - in deep_get(cfg, 'evaluation.accuracy.dataloader.dataset', {}): - deep_set(cfg, 'evaluation.accuracy.iteration', 10) + if deep_get(cfg, "evaluation.accuracy.iteration") == -1 and "dummy_v2" in deep_get( + cfg, "evaluation.accuracy.dataloader.dataset", {} + ): + deep_set(cfg, "evaluation.accuracy.iteration", 10) - self._eval_dataloader = create_dataloader(self.framework, \ - eval_dataloader_cfg) - if os.environ.get("PERFORMANCE_ONLY") in ['0', '1']: + self._eval_dataloader = create_dataloader(self.framework, eval_dataloader_cfg) + if os.environ.get("PERFORMANCE_ONLY") in ["0", "1"]: performance_only = bool(int(os.environ.get("PERFORMANCE_ONLY"))) - deep_set(cfg, 'tuning.exit_policy.performance_only', performance_only) - logger.info("Get environ 'PERFORMANCE_ONLY={}'," \ - " force setting 'tuning.exit_policy.performance_only = True'.".format(performance_only)) + deep_set(cfg, "tuning.exit_policy.performance_only", performance_only) + logger.info( + "Get environ 'PERFORMANCE_ONLY={}'," + " force setting 'tuning.exit_policy.performance_only = True'.".format(performance_only) + ) def _create_calib_dataloader(self, cfg): """Create default calibration dataloader if train_func is not set.""" - approach_cfg = deep_get(cfg, 'quantization.approach') + approach_cfg = deep_get(cfg, "quantization.approach") if self._calib_dataloader is None and self._calib_func is None: - if approach_cfg in ['post_training_static_quant', 'post_training_auto_quant']: - calib_dataloader_cfg = deep_get(cfg, 'quantization.calibration.dataloader') - - if approach_cfg == "post_training_auto_quant" and calib_dataloader_cfg == None: - logger.error("dataloader is required for 'post_training_auto_quant'. " - "use 'post_training_dynamic_quant' instead if no dataloader provided.") - assert calib_dataloader_cfg is not None, \ - 'dataloader field of calibration field of quantization section ' \ - 'in yaml file should be configured as calib_dataloader property is NOT set!' - - if deep_get(calib_dataloader_cfg, 'shuffle'): - logger.warning("Reset `shuffle` field to False when post_training_static_quant" - " is selected.") - deep_set(calib_dataloader_cfg, 'shuffle', False) - elif approach_cfg == 'quant_aware_training': - calib_dataloader_cfg = deep_get(cfg, 'quantization.train.dataloader') - assert calib_dataloader_cfg is not None, \ - 'dataloader field of train field of quantization section ' \ - 'in yaml file should be configured as calib_dataloader property is NOT set!' + if approach_cfg in ["post_training_static_quant", "post_training_auto_quant"]: + calib_dataloader_cfg = deep_get(cfg, "quantization.calibration.dataloader") + + if approach_cfg == "post_training_auto_quant" and calib_dataloader_cfg is None: + logger.error( + "dataloader is required for 'post_training_auto_quant'. " + "use 'post_training_dynamic_quant' instead if no dataloader provided." + ) + assert calib_dataloader_cfg is not None, ( + "dataloader field of calibration field of quantization section " + "in yaml file should be configured as calib_dataloader property is NOT set!" + ) + + if deep_get(calib_dataloader_cfg, "shuffle"): + logger.warning("Reset `shuffle` field to False when post_training_static_quant" " is selected.") + deep_set(calib_dataloader_cfg, "shuffle", False) + elif approach_cfg == "quant_aware_training": + calib_dataloader_cfg = deep_get(cfg, "quantization.train.dataloader") + assert calib_dataloader_cfg is not None, ( + "dataloader field of train field of quantization section " + "in yaml file should be configured as calib_dataloader property is NOT set!" + ) else: calib_dataloader_cfg = None @@ -130,17 +142,17 @@ def _create_calib_dataloader(self, cfg): def pre_process(self): """Prepare dataloaders, qfuncs for Component.""" cfg = self.conf.usr_cfg - assert isinstance(self._model, BaseModel), 'need set your Model for quantization....' + assert isinstance(self._model, BaseModel), "need set your Model for quantization...." self._create_eval_dataloader(cfg) self._create_calib_dataloader(cfg) strategy = cfg.tuning.strategy.name.lower() if cfg.quantization.quant_level == 0: strategy = "conservative" - logger.info(f"On the premise that the accuracy meets the conditions, improve the performance.") + logger.info("On the premise that the accuracy meets the conditions, improve the performance.") if strategy == "mse_v2": - if not (self.framework.startswith("tensorflow") or self.framework == 'pytorch_fx'): + if not (self.framework.startswith("tensorflow") or self.framework == "pytorch_fx"): strategy = "basic" logger.warning(f"MSE_v2 does not support {self.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") @@ -149,12 +161,16 @@ def pre_process(self): _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(cfg.tuning.workspace.resume)) \ - if cfg.tuning.workspace and cfg.tuning.workspace.resume else None + self.resume_file = ( + os.path.abspath(os.path.expanduser(cfg.tuning.workspace.resume)) + if cfg.tuning.workspace and cfg.tuning.workspace.resume + else None + ) if self.resume_file: - assert os.path.exists(self.resume_file), \ - "The specified resume file {} doesn't exist!".format(self.resume_file) - with open(self.resume_file, 'rb') as f: + assert os.path.exists(self.resume_file), "The specified resume file {} doesn't exist!".format( + self.resume_file + ) + with open(self.resume_file, "rb") as f: _resume = pickle.load(f).__dict__ self.strategy = EXP_STRATEGIES[strategy]( @@ -165,10 +181,11 @@ def pre_process(self): self._eval_dataloader, self._eval_func, _resume, - self.hooks) + self.hooks, + ) - if getattr(self._calib_dataloader, 'distributed', False): - self.register_hook('on_train_begin', self.strategy.adaptor._pre_hook_for_hvd) + if getattr(self._calib_dataloader, "distributed", False): + self.register_hook("on_train_begin", self.strategy.adaptor._pre_hook_for_hvd) def execute(self): """Quantization execute routine based on strategy design.""" @@ -182,21 +199,23 @@ def execute(self): except Exception as e: logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) import traceback + traceback.print_exc() finally: if self.strategy.best_qmodel: logger.info( "Specified timeout or max trials is reached! " - "Found a quantized model which meet accuracy goal. Exit.") + "Found a quantized model which meet accuracy goal. Exit." + ) self.strategy.deploy_config() else: logger.error( "Specified timeout or max trials is reached! " - "Not found any quantized model which meet accuracy goal. Exit.") + "Not found any quantized model which meet accuracy goal. Exit." + ) return self.strategy.best_qmodel - def __call__(self): """Automatic quantization tuning main entry point. @@ -236,7 +255,6 @@ def __call__(self): Returns: quantized model: best qanitized model found, otherwise return None - """ return super(Quantization, self).__call__() @@ -245,6 +263,7 @@ def __call__(self): def dataset(self, dataset_type, *args, **kwargs): """Get dataset according to dataset_type.""" from ..data import Datasets + return Datasets(self.framework)[dataset_type](*args, **kwargs) @property @@ -281,13 +300,13 @@ def calib_dataloader(self, dataloader): from neural_compressor.experimental.common.DataLoader """ from .common import _generate_common_dataloader - self._calib_dataloader = _generate_common_dataloader( - dataloader, self.framework) + + self._calib_dataloader = _generate_common_dataloader(dataloader, self.framework) @property def metric(self): """Get `metric` attribute.""" - assert False, 'Should not try to get the value of `metric` attribute.' + assert False, "Should not try to get the value of `metric` attribute." return None @metric.setter @@ -308,21 +327,23 @@ def metric(self, user_metric): specific frameworks and initialized. """ if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"): - logger.warning("Override the value of `metric` field defined in yaml file" \ - " as user defines the value of `metric` attribute by code.") + logger.warning( + "Override the value of `metric` field defined in yaml file" + " as user defines the value of `metric` attribute by code." + ) - from .common import Metric as NCMetric from ..metric import METRICS + from .common import Metric as NCMetric + if isinstance(user_metric, NCMetric): name = user_metric.name metric_cls = user_metric.metric_cls metric_cfg = {name: {**user_metric.kwargs}} else: - for i in ['reset', 'update', 'result']: - assert hasattr(user_metric, i), 'Please realise {} function' \ - 'in user defined metric'.format(i) + for i in ["reset", "update", "result"]: + assert hasattr(user_metric, i), "Please realise {} function" "in user defined metric".format(i) metric_cls = type(user_metric).__name__ - name = 'user_' + metric_cls + name = "user_" + metric_cls metric_cfg = {name: id(user_metric)} deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg) self.conf.usr_cfg = DotDict(self.conf.usr_cfg) @@ -333,7 +354,7 @@ def metric(self, user_metric): @property def objective(self): """Get `objective` attribute.""" - assert False, 'Should not try to get the value of `objective` attribute.' + assert False, "Should not try to get the value of `objective` attribute." return None @objective.setter @@ -342,14 +363,21 @@ def objective(self, user_objective): The built-in objectives include Accuracy, Performance, Footprint and ModelSize. """ - if deep_get(self.conf.usr_cfg, "tuning.multi_objectives.objective") or \ - deep_get(self.conf.usr_cfg, "tuning.objective"): - logger.warning("Override the value of `objective` field defined in yaml file" \ - " as user defines the value of `objective` attribute by code.") - - user_obj_cfg = "tuning.objective" if deep_get(self.conf.usr_cfg, "tuning.objective") \ + if deep_get(self.conf.usr_cfg, "tuning.multi_objectives.objective") or deep_get( + self.conf.usr_cfg, "tuning.objective" + ): + logger.warning( + "Override the value of `objective` field defined in yaml file" + " as user defines the value of `objective` attribute by code." + ) + + user_obj_cfg = ( + "tuning.objective" + if deep_get(self.conf.usr_cfg, "tuning.objective") else "tuning.multi_objectives.objective" + ) from ..objective import objective_custom_registry + objective_cls = type(user_objective) name = user_objective.__class__.__name__ objective_cfg = name if deep_get(self.conf.usr_cfg, "tuning.objective") else [name] @@ -360,7 +388,7 @@ def objective(self, user_objective): @property def postprocess(self, user_postprocess): """Get `postprocess` attribute.""" - assert False, 'Should not try to get the value of `postprocess` attribute.' + assert False, "Should not try to get the value of `postprocess` attribute." return None @postprocess.setter @@ -379,23 +407,27 @@ def postprocess(self, user_postprocess): registered to specific frameworks and initialized. """ from .common import Postprocess as NCPostprocess - assert isinstance(user_postprocess, NCPostprocess), \ - 'please initialize a neural_compressor.experimental.common.Postprocess and set....' - postprocess_cfg = {user_postprocess.name : {**user_postprocess.kwargs}} + + assert isinstance( + user_postprocess, NCPostprocess + ), "please initialize a neural_compressor.experimental.common.Postprocess and set...." + postprocess_cfg = {user_postprocess.name: {**user_postprocess.kwargs}} if deep_get(self.conf.usr_cfg, "evaluation.accuracy.postprocess"): - logger.warning("Override the value of `postprocess` field defined in yaml file" \ - " as user defines the value of `postprocess` attribute by code.") - deep_set( - self.conf.usr_cfg, "evaluation.accuracy.postprocess.transform", postprocess_cfg) + logger.warning( + "Override the value of `postprocess` field defined in yaml file" + " as user defines the value of `postprocess` attribute by code." + ) + deep_set(self.conf.usr_cfg, "evaluation.accuracy.postprocess.transform", postprocess_cfg) from neural_compressor.data import TRANSFORMS - postprocesses = TRANSFORMS(self.framework, 'postprocess') + + postprocesses = TRANSFORMS(self.framework, "postprocess") postprocesses.register(user_postprocess.name, user_postprocess.postprocess_cls) # BELOW API TO BE DEPRECATED! @property def q_func(self): """Get `q_func` attribute.""" - assert False, 'Should not try to get the value of `q_func` attribute.' + assert False, "Should not try to get the value of `q_func` attribute." return None @q_func.setter @@ -436,10 +468,10 @@ def model(self, user_model): be careful of the name of model configured in yaml file, make sure the name is in supported slim model list. """ - approach_cfg = deep_get(self.cfg, 'quantization.approach') + approach_cfg = deep_get(self.cfg, "quantization.approach") if not self.framework: self.framework = get_model_fwk_name(user_model) - if self.framework == 'tensorflow' and approach_cfg == 'quant_aware_training': + if self.framework == "tensorflow" and approach_cfg == "quant_aware_training": if type(user_model) == str: self._model = TensorflowQATModel(user_model) else: @@ -449,4 +481,4 @@ def model(self, user_model): def __repr__(self): """Return the class string.""" - return 'Quantization' + return "Quantization" diff --git a/neural_compressor/experimental/scheduler.py b/neural_compressor/experimental/scheduler.py index d0ef980caa7..a44462391f7 100644 --- a/neural_compressor/experimental/scheduler.py +++ b/neural_compressor/experimental/scheduler.py @@ -19,28 +19,21 @@ import os -from ..utils import logger -from .common import Model as NCModel +from ..conf.dotdict import DotDict, deep_set from ..model import BaseModel from ..model.model import get_model_fwk_name - -from .quantization import Quantization -from .pruning import Pruning -from .distillation import Distillation -from .model_conversion import ModelConversion -from .graph_optimization import Graph_Optimization +from ..utils import logger from .benchmark import Benchmark +from .common import Model as NCModel from .component import Component -from ..conf.dotdict import DotDict, deep_set +from .distillation import Distillation +from .graph_optimization import Graph_Optimization +from .model_conversion import ModelConversion +from .pruning import Pruning +from .quantization import Quantization + +SUPPORTED_COMPONENTS = [Quantization, Pruning, Graph_Optimization, ModelConversion, Benchmark, Component] -SUPPORTED_COMPONENTS = [ - Quantization, - Pruning, - Graph_Optimization, - ModelConversion, - Benchmark, - Component -] class Scheduler(object): """Scheduler for neural_compressor component pipeline execution. @@ -83,7 +76,6 @@ class Scheduler(object): scheduler.append(quantizer) opt_model = scheduler() opt_model.save() - """ def __init__(self): @@ -119,8 +111,7 @@ def append(self, *args): kwargs (named arguments): Reserved for interface extension. """ for item in args: - assert any([isinstance(item, supported_component) \ - for supported_component in SUPPORTED_COMPONENTS]) + assert any([isinstance(item, supported_component) for supported_component in SUPPORTED_COMPONENTS]) self.components.append(item) def __call__(self): @@ -131,20 +122,15 @@ def __call__(self): Returns: optimized model: best optimized model generated, otherwise return None - """ - assert self.model, "Scheduler class's model property should be set " \ - "before invoking this __call__() function" + assert self.model, "Scheduler class's model property should be set " "before invoking this __call__() function" model = self.model assert len(self.components) > 0 logger.info("Start sequential pipeline execution.") for i, component in enumerate(self.components): # print appropriate ordinal number representation (1st, 2nd, 3rd) for each step - ordinal = lambda n: "%d%s" % (n,"tsnrhtdd"[(n//10%10!=1)*(n%10<4)*n%10::4]) - logger.info("The {} step being executing is {}.".format( - ordinal(i), - repr(component).upper() - )) + ordinal = lambda n: "%d%s" % (n, "tsnrhtdd"[(n // 10 % 10 != 1) * (n % 10 < 4) * n % 10 :: 4]) + logger.info("The {} step being executing is {}.".format(ordinal(i), repr(component).upper())) component.model = model if self._train_func is not None: @@ -176,8 +162,7 @@ def combine(self, *args): # create component for the combination combination = [] for arg in args: - combination += [arg.__class__.__name__] \ - if arg.combination is None else arg.combination + combination += [arg.__class__.__name__] if arg.combination is None else arg.combination new_component = Component(combination=combination) self._combine_components(*args, dist_component=new_component) @@ -185,16 +170,14 @@ def combine(self, *args): def _combination_sanity_check(self, *args): """Check sanity of the combination.""" - TEMP_SUPPORTED_COMPONENTS = ['Quantization', 'Pruning', 'Distillation'] + TEMP_SUPPORTED_COMPONENTS = ["Quantization", "Pruning", "Distillation"] checked_components = [] for component in args: component_class = component.__class__.__name__ - if component_class in TEMP_SUPPORTED_COMPONENTS and \ - component_class not in checked_components : + if component_class in TEMP_SUPPORTED_COMPONENTS and component_class not in checked_components: checked_components.append(component_class) else: - logger.error("The combination of {} is not supported.".format( - checked_components + [component_class])) + logger.error("The combination of {} is not supported.".format(checked_components + [component_class])) def _combine_components(self, *args, dist_component=None): """Actual implementation of combine(). @@ -222,31 +205,36 @@ def _combine_components(self, *args, dist_component=None): for combine_component in args: # check if config is valid - assert combine_component.framework == framework, "Combined components should have " \ - "same framework. Detect different frameworks: {} and {} are used.".format( - framework, combine_component.framework ) - assert combine_component.cfg.device == device, "Combined components should have " \ - "same device. Detect different device: {} and {} are used.".format( - device, combine_component.cfg.device ) + assert combine_component.framework == framework, ( + "Combined components should have " + "same framework. Detect different frameworks: {} and {} are used.".format( + framework, combine_component.framework + ) + ) + assert ( + combine_component.cfg.device == device + ), "Combined components should have " "same device. Detect different device: {} and {} are used.".format( + device, combine_component.cfg.device + ) # sync configs component_name = combine_component.__class__.__name__.lower() - component_cfg = getattr(combine_component.cfg, - component_name, - None) - assert combine_component is not None, "Please ensure field {} is configured " \ - "in input yaml".format(component_name) + component_cfg = getattr(combine_component.cfg, component_name, None) + assert combine_component is not None, "Please ensure field {} is configured " "in input yaml".format( + component_name + ) # in case of key train/evaluation not exist, return an empty DotDict - component_train_cfg = component_cfg.get('train', DotDict()) + component_train_cfg = component_cfg.get("train", DotDict()) # TODO: Assumption here: train phase is defined inside component yaml field. # But eval is defined at root yaml field. - component_eval_cfg = combine_component.cfg.get('evaluation', DotDict()) - component_tuning_cfg = combine_component.cfg.get('tuning', DotDict()) - component_model_cfg = combine_component.cfg.get('model', DotDict()) - component_quantization_cfg = combine_component.cfg.get('quantization', DotDict()) \ - if component_name == 'quantization' else DotDict() + component_eval_cfg = combine_component.cfg.get("evaluation", DotDict()) + component_tuning_cfg = combine_component.cfg.get("tuning", DotDict()) + component_model_cfg = combine_component.cfg.get("model", DotDict()) + component_quantization_cfg = ( + combine_component.cfg.get("quantization", DotDict()) if component_name == "quantization" else DotDict() + ) combine_component._model = self._model if component_eval_cfg and component_train_cfg: @@ -290,12 +278,12 @@ def _combine_components(self, *args, dist_component=None): # sync to dist component dist_component_cfg = DotDict() if dist_component is not None: - deep_set(dist_component_cfg, 'train', train_cfg) - deep_set(dist_component_cfg, 'evaluation', eval_cfg) - deep_set(dist_component_cfg, 'tuning', tuning_cfg) - deep_set(dist_component_cfg, 'device', device) - deep_set(dist_component_cfg, 'model', model_cfg) - deep_set(dist_component_cfg, 'quantization', quantization_cfg) + deep_set(dist_component_cfg, "train", train_cfg) + deep_set(dist_component_cfg, "evaluation", eval_cfg) + deep_set(dist_component_cfg, "tuning", tuning_cfg) + deep_set(dist_component_cfg, "device", device) + deep_set(dist_component_cfg, "model", model_cfg) + deep_set(dist_component_cfg, "quantization", quantization_cfg) dist_component._model = self._model dist_component.framework = framework dist_component.cfg = dist_component_cfg @@ -322,10 +310,10 @@ def _sync_config(self, dist_config, src_config): if isinstance(dist_config[key], dict) and isinstance(src_config[key], dict): self._sync_config(dist_config[key], src_config[key]) elif dist_config[key] != src_config[key]: - logger.warning("Find different value {} and {} on key {}.".format( - dist_config[key], src_config[key], key) + \ - " Use first key-value ({}: {}) pair as default".format( - key, dist_config[key])) + logger.warning( + "Find different value {} and {} on key {}.".format(dist_config[key], src_config[key], key) + + " Use first key-value ({}: {}) pair as default".format(key, dist_config[key]) + ) # update src config to dist if dist is empty. elif not dist_config and src_config: dist_config.update(src_config) @@ -333,7 +321,7 @@ def _sync_config(self, dist_config, src_config): @property def model(self): """Getter of model. - + Returns: The model used in the Scheduler process. """ @@ -355,7 +343,6 @@ def model(self, user_model): Another corner case is slim model of tensorflow, be careful of the name of model configured in yaml file, make sure the name is in supported slim model list. - """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") @@ -366,7 +353,7 @@ def model(self, user_model): @property def train_func(self): """Do not support get train_func.""" - assert False, 'Should not try to get the value of `train_func` attribute.' + assert False, "Should not try to get the value of `train_func` attribute." return None @train_func.setter @@ -387,7 +374,7 @@ def train_func(self, user_train_func): @property def eval_func(self): """Do not support get eval_func.""" - assert False, 'Should not try to get the value of `eval_func` attribute.' + assert False, "Should not try to get the value of `eval_func` attribute." return None @eval_func.setter @@ -398,7 +385,7 @@ def eval_func(self, user_eval_func): user_eval_func: This function takes "model" as input parameter and executes entire evaluation process with self contained metrics. If eval_func set, - an evaluation process must be triggered + an evaluation process must be triggered to make evaluation of the model executed. """ self._eval_func = user_eval_func diff --git a/neural_compressor/experimental/strategy/__init__.py b/neural_compressor/experimental/strategy/__init__.py index f4a137cb792..4a9c0bd8253 100644 --- a/neural_compressor/experimental/strategy/__init__.py +++ b/neural_compressor/experimental/strategy/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Intel Neural Compressor Strategy.""" from .strategy import EXP_STRATEGIES @@ -24,7 +23,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) __all__ = ["EXP_STRATEGIES"] diff --git a/neural_compressor/experimental/strategy/auto_mixed_precision.py b/neural_compressor/experimental/strategy/auto_mixed_precision.py index 76ef7c8bb7e..41a9c399730 100644 --- a/neural_compressor/experimental/strategy/auto_mixed_precision.py +++ b/neural_compressor/experimental/strategy/auto_mixed_precision.py @@ -14,31 +14,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The auto-mixed precision strategy.""" import copy -import numpy as np from collections import OrderedDict -from .strategy import strategy_registry, TuneStrategy -from ...utils import logger -from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +import numpy as np + +from ...utils import logger +from .strategy import TuneStrategy, strategy_registry +from .utils.tuning_sampler import FallbackTuningSampler, OpTypeWiseTuningSampler from .utils.tuning_structs import OpTuningConfig @strategy_registry class AutoMixedPrecisionTuneStrategy(TuneStrategy): """Tuning strategy for auto mixed precision.""" - + def next_tune_cfg(self): """Generate the next tuning config. - + Tuning configurations are generated according to the following rules: 1. First, it tries to convert all ops into target date type as many as possible. - 2. If the accuracy does not meets the requirements, it starts the stage of fallback + 2. If the accuracy does not meets the requirements, it starts the stage of fallback which converts ops into higher precision. - + Yields: tune_config (dict): A dict containing the tuning configuration. """ @@ -46,65 +46,79 @@ def next_tune_cfg(self): # filter quantization dtype # TODO align with the old mixed-precison - target_dtypes = self.cfg.graph_optimization.precisions if self.cfg.graph_optimization \ + target_dtypes = ( + self.cfg.graph_optimization.precisions + if self.cfg.graph_optimization else self.cfg.mixed_precision.precisions - target_dtypes = list(set(target_dtypes) - set(['fp32'])) + ) + target_dtypes = list(set(target_dtypes) - set(["fp32"])) tuning_space = self.tuning_space initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) if not target_dtypes: - target_dtypes = ['bf16'] + target_dtypes = ["bf16"] # step1. target_dtype AMAP, collect the ops that support target_dtype bf16_items_name = [] op_tuning_cfg = {} for idx, target_dtype in enumerate(target_dtypes): bf16_items = tuning_space.query_items_by_quant_mode(target_dtype) - if len(bf16_items) == 0 and \ - not (idx == len(target_dtypes) - 1 and len(bf16_items_name) == 0): + if len(bf16_items) == 0 and not (idx == len(target_dtypes) - 1 and len(bf16_items_name) == 0): continue bf16_items_name = [item.name for item in bf16_items] op_tuning_cfg = deepcopy(initial_op_tuning_cfg) for op_name_type in bf16_items_name: - op_tuning_cfg[op_name_type] = \ - OpTuningConfig(op_name_type[0], op_name_type[1], target_dtype, tuning_space) + op_tuning_cfg[op_name_type] = OpTuningConfig( + op_name_type[0], op_name_type[1], target_dtype, tuning_space + ) calib_sampling_size = 1 - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg # step2. fallback - target_dtype = 'fp32' + target_dtype = "fp32" fallback_items_name_lst = bf16_items_name[::-1] if fallback_items_name_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(op_tuning_cfg) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc # do accumulated fallback according to the order in the previous stage if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) + ordered_ops = sorted( + op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better, + ) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) logger.info("Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(op_tuning_cfg) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg def traverse(self): @@ -119,15 +133,14 @@ def traverse(self): trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: - self.last_tune_result = tuning_history['last_tune_result'] - self.best_tune_result = tuning_history['best_tune_result'] + self.last_tune_result = tuning_history["last_tune_result"] + self.best_tune_result = tuning_history["best_tune_result"] logger.warn("Find evaluated tuning config, skip.") continue logger.debug("Dump current mixed precision configuration:") logger.debug(tune_cfg) - self.last_qmodel = self.adaptor.quantize( - tune_cfg, self.model, self.calib_dataloader, self.q_func) + self.last_qmodel = self.adaptor.quantize(tune_cfg, self.model, self.calib_dataloader, self.q_func) assert self.last_qmodel # Return the last quantized model as a result. if performance only. if self.cfg.tuning.exit_policy.performance_only: @@ -152,5 +165,3 @@ def traverse(self): if need_stop: break - - diff --git a/neural_compressor/experimental/strategy/basic.py b/neural_compressor/experimental/strategy/basic.py index 1e906b61027..c3dd734a551 100644 --- a/neural_compressor/experimental/strategy/basic.py +++ b/neural_compressor/experimental/strategy/basic.py @@ -16,26 +16,28 @@ # limitations under the License. """The basic tuning strategy.""" import copy -import numpy as np from collections import OrderedDict -from .strategy import strategy_registry, TuneStrategy -from ...utils import logger -from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .utils.tuning_structs import OpTuningConfig +import numpy as np + +from ...utils import logger +from .strategy import TuneStrategy, strategy_registry from .utils.constant import TUNING_ITEMS_LST +from .utils.tuning_sampler import FallbackTuningSampler, ModelWiseTuningSampler, OpTypeWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig + @strategy_registry class BasicTuneStrategy(TuneStrategy): """The basic tuning strategy. - + There are three stages executed by Basic strategy sequentially, and the tuning process ends once the condition meets the exit policy. """ def next_tune_cfg(self): """Generate and yield the next tuning config with below order. - + 1. OP Type Wise Tuning: tries to quantize the OPs as many as possible and traverse all OP type wise tuning configs 2. Fallback OP One by One: it performs high-precision OP (FP32, BF16 ...) @@ -49,22 +51,24 @@ def next_tune_cfg(self): tune_config (dict): A dict containing the tuning configuration for quantization. """ from copy import deepcopy + tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options for calib_sampling_size in calib_sampling_size_lst: # Initialize the tuning config for each op according to the quantization approach. op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) early_stop_tuning = False stage1_cnt = 0 - quant_ops = quant_mode_wise_items.get('static', []) - quant_ops += quant_mode_wise_items.get('dynamic', []) + quant_ops = quant_mode_wise_items.get("static", []) + quant_ops += quant_mode_wise_items.get("dynamic", []) stage1_max = 1e9 # TODO set a more appropriate value op_type_wise_tuning_sampler = OpTypeWiseTuningSampler( - tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) for index, op_tuning_cfg in enumerate(op_type_wise_tuning_sampler): logger.debug(f"[OP TYPE WISE STAGE], Trial {index + 1}") - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size # Apply all recipes, if not got the qmodel that meet the requirements, discard it. if index == 1 and not self.applied_all_recipes_flag: logger.info("Apply all recipes.") @@ -75,18 +79,21 @@ def next_tune_cfg(self): logger.info("Early stopping the stage 1.") break yield op_tuning_cfg - + # Apply all recipes, if not got the qmodel that meet the requirements, discard it. if stage1_cnt == 1 and not self.applied_all_recipes_flag: logger.info("Apply all recipes.") self.applied_all_recipes_flag = True yield self.apply_all_tuning_recipes(deepcopy(self.cur_best_tuning_cfg)) - + # Fallback the ops supported both static and dynamic from static to dynamic # Tuning items: None - if self.cfg.quantization.approach == 'post_training_auto_quant': - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] + if self.cfg.quantization.approach == "post_training_auto_quant": + static_dynamic_items = [ + item + for item in tuning_space.query_items_by_quant_mode("static") + if item in tuning_space.query_items_by_quant_mode("dynamic") + ] if static_dynamic_items: logger.info("Fallback all ops that support both dynamic and static to dynamic.") else: @@ -95,8 +102,9 @@ def next_tune_cfg(self): new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) for item in static_dynamic_items: new_op_tuning_cfg[item.name] = self._initial_dynamic_cfg_based_on_static_cfg( - new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + new_op_tuning_cfg[item.name] + ) + new_op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield new_op_tuning_cfg logger.info("Apply recipe one by one.") @@ -105,57 +113,68 @@ def next_tune_cfg(self): best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) # Fallback - for target_dtype in ['bf16', 'fp32']: + for target_dtype in ["bf16", "fp32"]: target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) fallback_items_lst = [item for item in quant_ops if item in target_type_lst] if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() - fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc - # Fallback OPs accumulated according to the order in the previous stage if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) + ordered_ops = sorted( + op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better, + ) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg - - def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): + + def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig): op_state = op_static_cfg.get_state() op_name = op_static_cfg.op_name op_type = op_static_cfg.op_type op_name_type = (op_name, op_type) - op_quant_mode = 'dynamic' + op_quant_mode = "dynamic" tuning_space = self.tuning_space dynamic_state = {} - for att in ['weight', 'activation']: - if att not in op_state: continue + for att in ["weight", "activation"]: + if att not in op_state: + continue # Add dtype full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, op_quant_mode) - dynamic_state[att + '_dtype'] = self.tuning_space.ops_data_type[op_name_type][full_path[att]] + dynamic_state[att + "_dtype"] = self.tuning_space.ops_data_type[op_name_type][full_path[att]] for method_name, method_val in op_state[att].items(): att_and_method_name = (att, method_name) - if att_and_method_name not in TUNING_ITEMS_LST: continue + if att_and_method_name not in TUNING_ITEMS_LST: + continue if tuning_space.query_item_option(op_name_type, full_path[att], att_and_method_name, method_val): dynamic_state[att_and_method_name] = method_val else: diff --git a/neural_compressor/experimental/strategy/bayesian.py b/neural_compressor/experimental/strategy/bayesian.py index 3f684f2c102..547d69c20a3 100644 --- a/neural_compressor/experimental/strategy/bayesian.py +++ b/neural_compressor/experimental/strategy/bayesian.py @@ -14,31 +14,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The Bayesian tuning strategy.""" import warnings +from copy import deepcopy + import numpy as np from scipy.optimize import minimize -from sklearn.gaussian_process.kernels import Matern from sklearn.gaussian_process import GaussianProcessRegressor - -from copy import deepcopy +from sklearn.gaussian_process.kernels import Matern from ...utils import logger -from .strategy import strategy_registry, TuneStrategy +from .strategy import TuneStrategy, strategy_registry from .utils.tuning_sampler import OpWiseTuningSampler @strategy_registry class BayesianTuneStrategy(TuneStrategy): """The Bayesian tuning strategy.""" - - def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, - eval_func=None, dicts=None, q_hooks=None): + + def __init__( + self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None + ): """Init the BaySian tuning strategy.""" - super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, - eval_func, dicts, q_hooks) + super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, eval_func, dicts, q_hooks) self.bayes_opt = None def __getstate__(self): @@ -48,57 +47,55 @@ def __getstate__(self): dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.cfg): - history['bayes_opt'] = self.bayes_opt + if self._same_yaml(history["cfg"], self.cfg): + history["bayes_opt"] = self.bayes_opt save_dict = super().__getstate__() return save_dict def _params_to_tune_configs(self, params): op_tuning_cfg = {} - calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name("calib_sampling_size").options for op_name_type, configs in self.op_configs.items(): if len(configs) == 1: op_tuning_cfg[op_name_type] = configs[0] else: op_tuning_cfg[op_name_type] = configs[min(len(configs) - 1, int(params[op_name_type[0]]))] if len(calib_sampling_size_lst) > 1: - calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params['calib_sampling_size']))] + calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params["calib_sampling_size"]))] else: calib_sampling_size = calib_sampling_size_lst[0] - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size return op_tuning_cfg def next_tune_cfg(self): """Generate the next tuning config according to bayesian search algorithm. - + This strategy comes from the Bayesian optimization package and changed it to a discrete version. - It uses Gaussian processes to define the prior/posterior distribution over the black-box - function with the tuning history and then finds the tuning configuration that maximizes + It uses Gaussian processes to define the prior/posterior distribution over the black-box + function with the tuning history and then finds the tuning configuration that maximizes the expected improvement. Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ params = None - pbounds = {} + pbounds = {} tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() for op_name_type, configs in self.op_configs.items(): if len(configs) > 1: pbounds[op_name_type[0]] = (0, len(configs)) if len(calib_sampling_size_lst) > 1: - pbounds['calib_sampling_size'] = (0, len(calib_sampling_size_lst)) + pbounds["calib_sampling_size"] = (0, len(calib_sampling_size_lst)) if len(pbounds) == 0: yield self._params_to_tune_configs(params) return if self.bayes_opt is None: - self.bayes_opt = BayesianOptimization( - pbounds=pbounds, random_seed=self.cfg.tuning.random_seed) + self.bayes_opt = BayesianOptimization(pbounds=pbounds, random_seed=self.cfg.tuning.random_seed) while True: params = self.bayes_opt.gen_next_params() logger.debug("Dump current bayesian params:") @@ -110,13 +107,14 @@ def next_tune_cfg(self): logger.debug("Find registered params, skip it.") pass + # Util part # Bayesian opt acq function def acq_max(ac, gp, y_max, bounds, random_seed, n_warmup=10000, n_iter=10): """Find the maximum of the acquisition function parameters. - + Args: ac: The acquisition function object that return its point-wise value. gp: A gaussian process fitted to the relevant data. @@ -125,31 +123,28 @@ def acq_max(ac, gp, y_max, bounds, random_seed, n_warmup=10000, n_iter=10): random_seed: instance of np.RandomState random number generator n_warmup: number of times to randomly sample the acquisition function n_iter: number of times to run scipy.minimize - + Returns: x_max: The arg max of the acquisition function. """ # Warm up with random points - x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], - size=(n_warmup, bounds.shape[0])) + x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(n_warmup, bounds.shape[0])) ys = ac(x_tries, gp=gp, y_max=y_max) x_max = x_tries[ys.argmax()] max_acq = ys.max() # Explore the parameter space more thoroughly - x_seeds = np.random.uniform(bounds[:, 0], bounds[:, 1], - size=(n_iter, bounds.shape[0])) + x_seeds = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(n_iter, bounds.shape[0])) for x_try in x_seeds: # Find the minimum of minus the acquisition function - res = minimize(lambda x: -ac(x.reshape(1, -1), gp=gp, y_max=y_max), - x_try.reshape(1, -1), - bounds=bounds, - method="L-BFGS-B") + res = minimize( + lambda x: -ac(x.reshape(1, -1), gp=gp, y_max=y_max), x_try.reshape(1, -1), bounds=bounds, method="L-BFGS-B" + ) # See if success if not res.success: continue - + if isinstance(res.fun, float): res.fun = np.array([res.fun]) # Store it if better than previous minimum(maximum). @@ -166,16 +161,17 @@ def _hashable(x): """Ensure that an point is hashable by a python dict.""" return tuple(map(float, x)) + # Target space part class TargetSpace(object): """Holds the param-space coordinates (X) and target values (Y). - + Allows for constant-time appends while ensuring no duplicates are added. """ def __init__(self, pbounds, random_seed=9527): """Construct a TargetSpace. - + Args: target_func (function): Function to be maximized. pbounds (dict): Dictionary with parameters names as keys and a tuple with minimum and maximum values. @@ -186,10 +182,7 @@ def __init__(self, pbounds, random_seed=9527): names = list(pbounds.keys()) self._keys = deepcopy(names) # Create an array with parameters bounds - self._bounds = np.array( - [pbounds[name] for name in names], - dtype=np.float32 - ) + self._bounds = np.array([pbounds[name] for name in names], dtype=np.float32) # preallocated memory for X and Y points self._params = np.empty(shape=(0, self.dim)) @@ -241,18 +234,18 @@ def params_to_array(self, params): """Generate an array from params. Args: - params (Dict): The dict contains keys in `self.keys`, and + params (Dict): The dict contains keys in `self.keys`, and corresponding param. Returns: - np.array: An array contains all params. + np.array: An array contains all params. """ try: assert set(params) == set(self.keys) except AssertionError: raise ValueError( - "Parameters' keys ({}) do ".format(list(params.keys())) + - "not match the expected set of keys ({}).".format(self.keys) + "Parameters' keys ({}) do ".format(list(params.keys())) + + "not match the expected set of keys ({}).".format(self.keys) ) return np.asarray([params[key] for key in self.keys]) @@ -269,8 +262,8 @@ def array_to_params(self, x): assert len(x) == len(self.keys) except AssertionError: raise ValueError( - "Size of array ({}) is different than the ".format(len(x)) + - "expected number of parameters ({}).".format(len(self.keys)) + "Size of array ({}) is different than the ".format(len(x)) + + "expected number of parameters ({}).".format(len(self.keys)) ) return dict(zip(self.keys, x)) @@ -285,26 +278,26 @@ def _as_array(self, x): assert x.size == self.dim except AssertionError: raise ValueError( - "Size of array ({}) is different than the ".format(len(x)) + - "expected number of parameters ({}).".format(len(self.keys)) + "Size of array ({}) is different than the ".format(len(x)) + + "expected number of parameters ({}).".format(len(self.keys)) ) return x def register(self, params, target): """Append a point and its target value to the known data. - + Runs in amortized constant time. - + Args: params (ndarray): a single point, with len(params) == self.dim target (float): target function value - + Raises: KeyError: if the point is not unique """ x = self._as_array(params) if x in self: - raise KeyError('Params point {} is not unique'.format(x)) + raise KeyError("Params point {} is not unique".format(x)) # Insert data into unique dictionary self._cache[_hashable(x.ravel())] = target @@ -314,10 +307,10 @@ def register(self, params, target): def get_target(self, params): """Get the target value of params. - + Args: params (ndarray): a single point, with len(params) == self.dim - + Returns: target (float): target function value. """ @@ -327,26 +320,20 @@ def get_target(self, params): def random_sample(self): """Create random points within the bounds of the space. - + Returns: data (ndarray): [num x dim] array points with dimensions corresponding to `self._keys` """ # TODO: support integer, category, and basic scipy.optimize constraints data = np.empty((1, self.dim)) for col, (lower, upper) in enumerate(self._bounds): - data.T[col] = np.random.uniform( # pylint: disable=unsupported-assignment-operation - lower, upper, size=1) + data.T[col] = np.random.uniform(lower, upper, size=1) # pylint: disable=unsupported-assignment-operation return data.ravel() def max(self): """Get maximum target value found and corresponding parametes.""" try: - res = { - 'target': self.target.max(), - 'params': dict( - zip(self.keys, self.params[self.target.argmax()]) - ) - } + res = {"target": self.target.max(), "params": dict(zip(self.keys, self.params[self.target.argmax()]))} except ValueError: res = {} return res @@ -355,26 +342,24 @@ def res(self): """Get all target values found and corresponding parametes.""" params = [dict(zip(self.keys, p)) for p in self.params] - return [ - {"target": target, "params": param} - for target, param in zip(self.target, params) - ] + return [{"target": target, "params": param} for target, param in zip(self.target, params)] + # Tuning part -class BayesianOptimization(): +class BayesianOptimization: """The class for bayesian optimization. - - This class takes the parameters bounds in order to find which values for + + This class takes the parameters bounds in order to find which values for the parameters yield the maximum value using bayesian optimization. """ - + def __init__(self, pbounds, random_seed=9527, verbose=2): """Init bayesian optimization. Args: pbounds (dict): Dictionary with parameters names as keys and a tuple with minimum and maximum values. - random_seed (int, optional): The seed for random searching. Default to 9527. + random_seed (int, optional): The seed for random searching. Default to 9527. verbose (int, optional): The level of verbosity. Default to 2. """ self._random_seed = random_seed @@ -431,7 +416,7 @@ def suggest(self): gp=self._gp, y_max=self._space.target.max(), bounds=self._space.bounds, - random_seed=self._random_seed + random_seed=self._random_seed, ) return self._space.array_to_params(suggestion) diff --git a/neural_compressor/experimental/strategy/exhaustive.py b/neural_compressor/experimental/strategy/exhaustive.py index 6cbfdd985bd..318ed8edf6b 100644 --- a/neural_compressor/experimental/strategy/exhaustive.py +++ b/neural_compressor/experimental/strategy/exhaustive.py @@ -15,31 +15,33 @@ # See the License for the specific language governing permissions and # limitations under the License. """The exhaustive tuning strategy.""" -from .strategy import strategy_registry, TuneStrategy +from .strategy import TuneStrategy, strategy_registry from .utils.tuning_sampler import OpWiseTuningSampler + @strategy_registry class ExhaustiveTuneStrategy(TuneStrategy): """The exhaustive tuning strategy.""" def next_tune_cfg(self): """Generate and yield the next tuning config using exhaustive search in tuning space. - + It sequentially traverse all possible quantization tuning configurations in a tuning space. From the perspective of the impact on performance, we currently only traverse all possible quantization tuning configs. Same reason as Bayesian, fallback datatypes are not included for now. - + Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options for calib_sampling_size in calib_sampling_size_lst: op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) for op_tuning_cfg in op_wise_tuning_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg return diff --git a/neural_compressor/experimental/strategy/mse.py b/neural_compressor/experimental/strategy/mse.py index 55955774e74..6a85752ec19 100644 --- a/neural_compressor/experimental/strategy/mse.py +++ b/neural_compressor/experimental/strategy/mse.py @@ -15,17 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. """MSE tuning strategy.""" +from collections import OrderedDict from copy import deepcopy +from time import time +from typing import Any, Dict, List + import numpy as np -from collections import OrderedDict -from typing import Dict, Any, List -from .strategy import strategy_registry, TuneStrategy -from ...utils import logger -from time import time -from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +from ...utils import logger +from .strategy import TuneStrategy, strategy_registry +from .utils.tuning_sampler import FallbackTuningSampler, OpTypeWiseTuningSampler from .utils.tuning_structs import OpTuningConfig + @strategy_registry class MSETuneStrategy(TuneStrategy): """The tuning strategy using MSE policy in tuning space. @@ -34,15 +36,14 @@ class MSETuneStrategy(TuneStrategy): the best model-wise tuning configuration. It then calculates the MSE (Mean Squared Error) for each OP, sorts those OPs according to the MSE value, and performs the op-wise fallback in this order. """ - - def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, - eval_func=None, dicts=None, q_hooks=None): + + def __init__( + self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None + ): """Init an mse tuning strategy.""" - super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, - eval_func, dicts, q_hooks) + super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, eval_func, dicts, q_hooks) self.ordered_ops = None - def __getstate__(self): """Magic method for pickle saving. @@ -50,8 +51,8 @@ def __getstate__(self): save_dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.cfg): - history['ordered_ops'] = self.ordered_ops + if self._same_yaml(history["cfg"], self.cfg): + history["ordered_ops"] = self.ordered_ops save_dict = super().__getstate__() return save_dict @@ -67,79 +68,95 @@ def _mse_metric_gap(self, fp32_tensor, dequantize_tensor): dequantize_max = np.max(dequantize_tensor) dequantize_min = np.min(dequantize_tensor) fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) - dequantize_tensor = (dequantize_tensor - dequantize_min) / \ - (dequantize_max - dequantize_min) + dequantize_tensor = (dequantize_tensor - dequantize_min) / (dequantize_max - dequantize_min) diff_tensor = fp32_tensor - dequantize_tensor - euclidean_dist = np.sum(diff_tensor ** 2) + euclidean_dist = np.sum(diff_tensor**2) return euclidean_dist / fp32_tensor.size - def mse_impact_lst(self, op_list: List, fp32_model, best_qmodel): + def mse_impact_lst(self, op_list: List, fp32_model, best_qmodel): """Calculate and generate the MSE impact list. Args: op_list (List[Tuple(str, str)]): List of ops in format of [(op_name, op_type), ...]. fp32_model (Model): The original FP32 model before quantization. current_best_model (Model): The currently best quantized model. - + Returns: ordered_op_name_types (List[Tuple(str, str)]): The sorted list of ops by its MSE - impaction, in the same format of 'op_list'. + impaction, in the same format of 'op_list'. """ - op_name_lst = [element[0] for element in op_list ] + op_name_lst = [element[0] for element in op_list] op_mapping = {} - for (op_name, op_type) in list(op_list): + for op_name, op_type in list(op_list): op_mapping[op_name] = (op_name, op_type) current_best_tune_cfg = self._tune_cfg_converter(self.cur_best_tuning_cfg) - fp32_dump_content = self.adaptor.inspect_tensor(fp32_model, - self.calib_dataloader, op_name_lst, [1], inspect_type='activation', - save_to_disk=True, save_path="./nc_workspace/", - quantization_cfg=current_best_tune_cfg) - fp32_tensor_dict = fp32_dump_content['activation'][0] + fp32_dump_content = self.adaptor.inspect_tensor( + fp32_model, + self.calib_dataloader, + op_name_lst, + [1], + inspect_type="activation", + save_to_disk=True, + save_path="./nc_workspace/", + quantization_cfg=current_best_tune_cfg, + ) + fp32_tensor_dict = fp32_dump_content["activation"][0] best_qmodel = self.adaptor.quantize(current_best_tune_cfg, self.model, self.calib_dataloader, self.q_func) - quant_dump_content = self.adaptor.inspect_tensor(best_qmodel, - self.calib_dataloader, op_name_lst, [1], inspect_type='activation', - save_to_disk=True, save_path="./nc_workspace/", - quantization_cfg=current_best_tune_cfg) - dequantize_tensor_dict = quant_dump_content['activation'][0] + quant_dump_content = self.adaptor.inspect_tensor( + best_qmodel, + self.calib_dataloader, + op_name_lst, + [1], + inspect_type="activation", + save_to_disk=True, + save_path="./nc_workspace/", + quantization_cfg=current_best_tune_cfg, + ) + dequantize_tensor_dict = quant_dump_content["activation"][0] ops_mse = { op: self._mse_metric_gap( - list(fp32_tensor_dict[op].values())[0], - list(dequantize_tensor_dict[op].values())[0]) for op in fp32_tensor_dict} + list(fp32_tensor_dict[op].values())[0], list(dequantize_tensor_dict[op].values())[0] + ) + for op in fp32_tensor_dict + } ordered_op_names = sorted(ops_mse.keys(), key=lambda key: ops_mse[key], reverse=self.higher_is_better) - + ordered_op_name_types = [op_mapping[name] for name in ordered_op_names] return ordered_op_name_types - def next_tune_cfg(self): """Generate and yield the next tuning config. - + Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options for calib_sampling_size in calib_sampling_size_lst: op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - # Optype-wise tuning + # Optype-wise tuning early_stop_tuning = True - stage1_cnt = 0 - int8_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - int8_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_cnt = 0 + int8_ops = quant_mode_wise_items["static"] if "static" in quant_mode_wise_items else [] + int8_ops += quant_mode_wise_items["dynamic"] if "dynamic" in quant_mode_wise_items else [] stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpTypeWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 if early_stop_tuning and stage1_cnt > stage1_max: logger.info("Early stopping the stage 1.") break - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg # Fallback the ops supported both static and dynamic from static to dynamic - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] + static_dynamic_items = [ + item + for item in tuning_space.query_items_by_quant_mode("static") + if item in tuning_space.query_items_by_quant_mode("dynamic") + ] if static_dynamic_items: logger.info("Fallback all ops that support both dynamic and static to dynamic.") else: @@ -147,21 +164,22 @@ def next_tune_cfg(self): def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): new_op_tuning_cfg = deepcopy(op_tuning_cfg) - new_op_tuning_cfg.op_quant_mode = 'dynamic' + new_op_tuning_cfg.op_quant_mode = "dynamic" return new_op_tuning_cfg new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) for item in static_dynamic_items: new_op_tuning_cfg[item.name] = dynamic_op_tuning_cfg_from_static(new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + new_op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield new_op_tuning_cfg best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) # Fallback to float point datatypes ('bf16' or 'fp32') - for target_dtype in ['bf16', 'fp32']: - fallback_items_lst = [item for item in int8_ops if - item in tuning_space.query_items_by_quant_mode(target_dtype)] + for target_dtype in ["bf16", "fp32"]: + fallback_items_lst = [ + item for item in int8_ops if item in tuning_space.query_items_by_quant_mode(target_dtype) + ] if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") # Replace it with sorted items list @@ -171,27 +189,37 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): self.ordered_ops = [op_name for (op_name, op_type) in ordered_op_name_types] op_dtypes = OrderedDict(zip(ordered_op_name_types, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc # Do accumulated fallback according to the order in the previous stage if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) + ordered_ops = sorted( + op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better, + ) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg diff --git a/neural_compressor/experimental/strategy/mse_v2.py b/neural_compressor/experimental/strategy/mse_v2.py index ed76b22598d..de5acc1575c 100644 --- a/neural_compressor/experimental/strategy/mse_v2.py +++ b/neural_compressor/experimental/strategy/mse_v2.py @@ -16,33 +16,35 @@ # limitations under the License. """The MSE_V2 tuning strategy.""" import copy -import numpy as np from collections import OrderedDict -from typing import Dict, Any, List -from .strategy import strategy_registry, TuneStrategy -from ...utils import logger -from time import time +from time import time +from typing import Any, Dict, List + +import numpy as np +from ...utils import logger +from .strategy import TuneStrategy, strategy_registry from .utils.tuning_sampler import OpTypeWiseTuningSampler from .utils.tuning_structs import OpTuningConfig + @strategy_registry class MSE_V2TuneStrategy(TuneStrategy): """The `mse_v2` tuning strategy. - - MSE_v2 is a strategy with a two stages fallback and revert fallback. + + MSE_v2 is a strategy with a two stages fallback and revert fallback. Note that, only tensorflow framework and pytorch FX backend is currently supported for mse_v2 tuning strategy. """ - + def _tuning_record_msg(self, records): records_str_lst = [[str(e) for e in record] for record in records] - record_msg = '\n'.join(','.join(record) for record in records_str_lst) + record_msg = "\n".join(",".join(record) for record in records_str_lst) return record_msg def next_tune_cfg(self): """Generate and yield the next tuning config with below order. - + 1. In the fallback stage, it uses multi-batch data to score the op impact and then fallback the op with the highest score util found the quantized model that meets accuracy criteria. @@ -50,29 +52,31 @@ def next_tune_cfg(self): the impact of fallback OPs in the previous stage and selects the op with the lowest score to revert the fallback until the quantized model that does not meets accuracy criteria. - + Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ best_op_tuning_cfg = None if len(self.metric_name) == 1 or self.metric_weight is not None: - best_acc = float('-inf') if self.higher_is_better else float('inf') + best_acc = float("-inf") if self.higher_is_better else float("inf") else: - best_acc = [float('-inf') if higher_is_better else float('inf') for \ - higher_is_better in self.metric_criterion] + best_acc = [ + float("-inf") if higher_is_better else float("inf") for higher_is_better in self.metric_criterion + ] from copy import deepcopy + tuning_space = self.tuning_space initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options for calib_sampling_size in calib_sampling_size_lst: # Collect the ops that support static and dynamic quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + query_order = ["static", "dynamic", "bf16", "fp16", "fp32"] pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -88,24 +92,28 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - # Optype-wise tuning + # Optype-wise tuning early_stop_tuning = True stage1_cnt = 0 - int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] + int8_ops = quant_mode_wise_items["dynamic"] + quant_mode_wise_items["static"] stage1_max = 2 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpTypeWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 if early_stop_tuning and stage1_cnt > stage1_max: logger.info("Early stopping the stage 1.") break - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg # Fallback the ops supported both static and dynamic from static to dynamic - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] + static_dynamic_items = [ + item + for item in tuning_space.query_items_by_quant_mode("static") + if item in tuning_space.query_items_by_quant_mode("dynamic") + ] if static_dynamic_items: logger.info("Fallback all ops that support both dynamic and static to dynamic.") else: @@ -113,89 +121,100 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): new_op_tuning_cfg = deepcopy(op_tuning_cfg) - new_op_tuning_cfg.op_quant_mode = 'dynamic' + new_op_tuning_cfg.op_quant_mode = "dynamic" return new_op_tuning_cfg new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) for item in static_dynamic_items: new_op_tuning_cfg[item.name] = dynamic_op_tuning_cfg_from_static(new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + new_op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield new_op_tuning_cfg # Fallback one by one by op sensitivity(mse) # 1. while the accuracy requirements not met: # to improve the accuracy - # 1) calculate the sensitivity of int8 ops in current state. + # 1) calculate the sensitivity of int8 ops in current state. # 2) fallback the op with higher sensitivity accumulatively - # 2. after the accuracy requirements met: # to improve the performance + # 2. after the accuracy requirements met: # to improve the performance # 1) calculate the sensitivity of fp32 ops in the current state # 2) re-quantize the op with lower sensitivity accumulatively tune_cfg = deepcopy(self.cur_best_tuning_cfg) requantize_cfg = deepcopy(self._tune_cfg_converter(self.cur_best_tuning_cfg)) self.output_op_names = self.adaptor.get_output_op_names(self.last_qmodel) - self.confidence_batches = (self.cfg.tuning.strategy.confidence_batches - if self.cfg.tuning.strategy.confidence_batches != None else 2) + self.confidence_batches = ( + self.cfg.tuning.strategy.confidence_batches + if self.cfg.tuning.strategy.confidence_batches is not None + else 2 + ) tune_cfg_backup = deepcopy(tune_cfg) - quant_ops_in_tune_cfg = self._collect_ops_by_quant_mode(tune_cfg, 'dynamic') + \ - self._collect_ops_by_quant_mode(tune_cfg, 'static') + quant_ops_in_tune_cfg = self._collect_ops_by_quant_mode( + tune_cfg, "dynamic" + ) + self._collect_ops_by_quant_mode(tune_cfg, "static") op_quant_cfgs = {op_info: tune_cfg_backup[op_info] for op_info in quant_ops_in_tune_cfg} fallback_records = [] self.re_quant = True while not self.objectives.compare(self.last_tune_result, self.baseline): # Record the time of calcutating the sensitivity start = time() - ops_lst = self.adaptor.calculate_op_sensitivity(self.model, - self.calib_dataloader, - deepcopy(self._tune_cfg_converter(tune_cfg)), - self.output_op_names, - self.confidence_batches, - fallback=True) + ops_lst = self.adaptor.calculate_op_sensitivity( + self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), + self.output_op_names, + self.confidence_batches, + fallback=True, + ) logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") select_op_info = ops_lst[0] - logger.info(f"*** The op {select_op_info} have the highest sensitivity in the current state, \ - fallback it to fp32.") - tune_cfg[select_op_info] = OpTuningConfig(select_op_info[0], - select_op_info[1], - 'fp32', - self.tuning_space) + logger.info( + f"*** The op {select_op_info} have the highest sensitivity in the current state, \ + fallback it to fp32." + ) + tune_cfg[select_op_info] = OpTuningConfig( + select_op_info[0], select_op_info[1], "fp32", self.tuning_space + ) # Record the fallback history - if not fallback_records: + if not fallback_records: fallback_records = [[select_op_info]] else: fallback_records.append(fallback_records[-1] + [select_op_info]) logger.debug(f"*** The fallback ops record: \n{self._tuning_record_msg(fallback_records)}") yield tune_cfg - logger.info(f"*** The accuracy meeting the accuracy requirements, stop fallback ops.") + logger.info("*** The accuracy meeting the accuracy requirements, stop fallback ops.") while self.objectives.compare(self.last_tune_result, self.baseline): if len(fallback_records) == 0 or len(fallback_records[-1]) <= 1: - logger.info(f"*** Stop re-quant due to no int8 op or only 1 int8 op left.") + logger.info("*** Stop re-quant due to no int8 op or only 1 int8 op left.") break - logger.info(f"*** Start to re-quant the fallback op in the previous stage.") + logger.info("*** Start to re-quant the fallback op in the previous stage.") # Track the current fallback ops - tmp_fallback_ops = fallback_records[-1] if fallback_records else [] + tmp_fallback_ops = fallback_records[-1] if fallback_records else [] start = time() - ops_lst = self.adaptor.calculate_op_sensitivity(self.model, - self.calib_dataloader, - deepcopy(self._tune_cfg_converter(tune_cfg)), - self.output_op_names, - self.confidence_batches, - fallback=False, - requantize_cfgs=requantize_cfg['op']) + ops_lst = self.adaptor.calculate_op_sensitivity( + self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), + self.output_op_names, + self.confidence_batches, + fallback=False, + requantize_cfgs=requantize_cfg["op"], + ) logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") - if not ops_lst: + if not ops_lst: logger.warning("No op to be requantized") break for select_op_info in ops_lst: - #assert select_op_info in tmp_fallback_ops, f"{select_op_info} not in fallback list." + # assert select_op_info in tmp_fallback_ops, f"{select_op_info} not in fallback list." if select_op_info not in tmp_fallback_ops: logger.debug(f"{select_op_info} not in fallback list.") continue - + new_fallback_ops = deepcopy(tmp_fallback_ops) new_fallback_ops.remove(select_op_info) if new_fallback_ops not in fallback_records: - logger.info(f"*** The op {select_op_info} have the lowest sensitivity in the current state, \ - re-quantize it.") + logger.info( + f"*** The op {select_op_info} have the lowest sensitivity in the current state, \ + re-quantize it." + ) tune_cfg[select_op_info] = op_quant_cfgs[select_op_info] fallback_records.append(new_fallback_ops) logger.debug(f"*** The fallback ops record: \n{self._tuning_record_msg(fallback_records)}") @@ -205,4 +224,4 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): logger.debug(f"*** Skip re-qaunt {select_op_info}, due the config has been evallated.") continue self.re_quant = False - logger.info(f"*** The accuracy not meeting the accuracy requirements, stop re-quantize ops.") \ No newline at end of file + logger.info("*** The accuracy not meeting the accuracy requirements, stop re-quantize ops.") diff --git a/neural_compressor/experimental/strategy/random.py b/neural_compressor/experimental/strategy/random.py index 7148100a76a..7f97f7c3442 100644 --- a/neural_compressor/experimental/strategy/random.py +++ b/neural_compressor/experimental/strategy/random.py @@ -15,13 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. """The random tuning strategy.""" -import numpy as np -from .strategy import strategy_registry, TuneStrategy from collections import OrderedDict -from .utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler -from .utils.tuning_structs import OpTuningConfig +import numpy as np + from ...utils import logger +from .strategy import TuneStrategy, strategy_registry +from .utils.tuning_sampler import FallbackTuningSampler, OpWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig + @strategy_registry class RandomTuneStrategy(TuneStrategy): @@ -29,7 +31,7 @@ class RandomTuneStrategy(TuneStrategy): def next_tune_cfg(self): """Generate and yield the next tuning config by random searching in tuning space. - + Random strategy is used to randomly choose quantization tuning configurations from the tuning space. As with the Exhaustive strategy, it also only considers quantization tuning configs to generate a better-performance quantized model. @@ -39,17 +41,16 @@ def next_tune_cfg(self): """ tuning_space = self.tuning_space op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) op_tuning_cfg_lst = list(op_wise_tuning_sampler) op_tuning_cfg_cnt = len(op_tuning_cfg_lst) - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options calib_sampling_size_cnt = len(calib_sampling_size_lst) while True: calib_index = np.random.choice(calib_sampling_size_cnt) calib_sampling_size = calib_sampling_size_lst[calib_index] op_tuning_cfg_index = np.random.choice(op_tuning_cfg_cnt) op_tuning_cfg = op_tuning_cfg_lst[op_tuning_cfg_index] - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg return diff --git a/neural_compressor/experimental/strategy/strategy.py b/neural_compressor/experimental/strategy/strategy.py index c1da699f5d3..e9073ae1505 100644 --- a/neural_compressor/experimental/strategy/strategy.py +++ b/neural_compressor/experimental/strategy/strategy.py @@ -17,44 +17,44 @@ """The base class for tuning strategy.""" -from abc import abstractmethod -from enum import EnumMeta -import os -import math import copy -from copy import deepcopy +import math +import os import pickle +import sys +from abc import abstractmethod from collections import OrderedDict, defaultdict +from copy import deepcopy +from enum import EnumMeta from pathlib import Path -import yaml -import numpy as np +from time import time from typing import OrderedDict as T_OrderedDict +import numpy as np +import yaml + from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor from neural_compressor.config import options -from ...objective import MultiObjective + from ...adaptor import FRAMEWORKS -from ...utils.utility import Statistics, dump_data_to_local -from ...utils.utility import fault_tolerant_file, equal_dicts, GLOBAL_STATE, MODE -from ...utils.create_obj_from_config import create_eval_func, create_train_func -from ...utils.utility import LazyImport -from ...utils import logger -from ...version import __version__ +from ...algorithm import ALGORITHMS, AlgorithmScheduler from ...conf.dotdict import DotDict, deep_get, deep_set -from ...algorithm import AlgorithmScheduler, ALGORITHMS - -import copy -import numpy as np -from collections import OrderedDict -from time import time +from ...objective import MultiObjective from ...utils import logger -import sys - - +from ...utils.create_obj_from_config import create_eval_func, create_train_func +from ...utils.utility import ( + GLOBAL_STATE, + MODE, + LazyImport, + Statistics, + dump_data_to_local, + equal_dicts, + fault_tolerant_file, +) +from ...version import __version__ +from .utils.constant import FALLBACK_RECIPES_SET from .utils.tuning_space import TuningItem, TuningSpace from .utils.tuning_structs import OpTuningConfig -from .utils.constant import FALLBACK_RECIPES_SET - EXP_STRATEGIES = {} @@ -69,19 +69,29 @@ def strategy_registry(cls): cls: The class of register. """ assert cls.__name__.endswith( - 'TuneStrategy' - ), "The name of subclass of TuneStrategy should end with \'TuneStrategy\' substring." - if cls.__name__[:-len('TuneStrategy')].lower() in EXP_STRATEGIES: - raise ValueError('Cannot have two strategies with the same name') - EXP_STRATEGIES[cls.__name__[:-len('TuneStrategy')].lower()] = cls + "TuneStrategy" + ), "The name of subclass of TuneStrategy should end with 'TuneStrategy' substring." + if cls.__name__[: -len("TuneStrategy")].lower() in EXP_STRATEGIES: + raise ValueError("Cannot have two strategies with the same name") + EXP_STRATEGIES[cls.__name__[: -len("TuneStrategy")].lower()] = cls return cls + @strategy_registry class TuneStrategy(object): """Basic class for tuning strategy.""" - def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader=None, - eval_func=None, resume=None, q_hooks=None): + def __init__( + self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_dataloader=None, + eval_func=None, + resume=None, + q_hooks=None, + ): """Init the TuneStrategy. Args: @@ -90,8 +100,8 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. eval_dataloader: Data loader for evaluation. Defaults to None. - eval_func: The evaluation function provided by user. This function takes model as parameter, and - evaluation dataset and metrics should be encapsulated in this function implementation and + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. resume: The dict containing resume information. Defaults to None. q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, @@ -102,8 +112,8 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= self.model = model self.cfg = conf.usr_cfg self.cfg_bk = copy.deepcopy(self.cfg) - self.history_path = self._create_path(self.cfg.tuning.workspace.path, './history.snapshot') - self.deploy_path = self._create_path(self.cfg.tuning.workspace.path, 'deploy.yaml') + self.history_path = self._create_path(self.cfg.tuning.workspace.path, "./history.snapshot") + self.deploy_path = self._create_path(self.cfg.tuning.workspace.path, "deploy.yaml") self.eval_dataloader = eval_dataloader self.calib_dataloader = q_dataloader self.q_func = q_func @@ -147,18 +157,18 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= self.last_tune_result = None self.last_qmodel = None self.last_tune_cfg = None - self.best_qmodel = None + self.best_qmodel = None self.best_tune_result = None - self.best_tuning_cfg = None # track the best tuning config correspondence to the best quantized model - self.cur_best_acc = self.initial_best_acc() # track the current best accuracy - self.cur_best_tuning_cfg = {} # track tuning cfg with the current best accuracy + self.best_tuning_cfg = None # track the best tuning config correspondence to the best quantized model + self.cur_best_acc = self.initial_best_acc() # track the current best accuracy + self.cur_best_tuning_cfg = {} # track tuning cfg with the current best accuracy self.re_quant = False self.capability = self.adaptor.query_fw_capability(model) logger.debug(self.capability) self.set_tuning_space(conf) - - #For algo scheduler + + # For algo scheduler self.algo_scheduler = AlgorithmScheduler(self.cfg.quantization.recipes) self.algo_scheduler.dataloader = self.calib_dataloader # reuse the calibration iteration self.algo_scheduler.origin_model = self.model @@ -170,7 +180,7 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= self.tuning_times = 0 self.fallback_start_point = 0 self.metric_met_point = 0 - + # for recipes # {recipe name: the list of supported value} self._tuning_recipes = OrderedDict() @@ -180,8 +190,8 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= self._not_tuning_recipes_values = {} self._initialize_recipe() self.applied_all_recipes_flag = False - if resume is not None: self.setup_resume(resume) - + if resume is not None: + self.setup_resume(resume) @abstractmethod def next_tune_cfg(self): @@ -196,18 +206,19 @@ def next_tune_cfg(self): tune_config (dict): It's a dict containing the tuning configuration to traverse. """ raise NotImplementedError - + def _initialize_recipe(self): """Divide the recipe into two categories tuning/not tuning.""" - from .utils.utility import get_adaptor_name from ...utils.constant import RECIPES as fwk_recipes from ...utils.constant import RECIPES_PRIORITY as fwk_recipes_priority + from .utils.utility import get_adaptor_name + # get all recipes supported by adaptor. adaptor_name = get_adaptor_name(self.adaptor) - adaptor_recipes = fwk_recipes['common'] + adaptor_recipes = fwk_recipes["common"] # TODO WA due to smooth quant only supported by ort/pt currently. - if not adaptor_name not in ['onnx', 'pytorch']: - adaptor_recipes.pop('smooth_quant', None) + if not adaptor_name not in ["onnx", "pytorch"]: + adaptor_recipes.pop("smooth_quant", None) for adaptor_name_key, adaptor_recipes_val in fwk_recipes.items(): if adaptor_name_key.startswith(adaptor_name): adaptor_recipes.update(adaptor_recipes_val) @@ -225,55 +236,62 @@ def _initialize_recipe(self): for recipe_name in fwk_recipes_priority: if recipe_name in adaptor_recipes and recipe_name not in self._not_tuning_recipes_values: # TODO skip tuning smooth_quant first - if recipe_name == 'smooth_quant': continue + if recipe_name == "smooth_quant": + continue self._tuning_recipes[recipe_name] = adaptor_recipes[recipe_name] self._tuning_recipes_default_values[recipe_name] = adaptor_recipes[recipe_name][0] logger.info(f"{len(self._not_tuning_recipes_values)} recipes specified by user.") logger.debug(self._not_tuning_recipes_values) logger.info(f"{len(self._tuning_recipes)} recipes require future tuning.") logger.debug(self._tuning_recipes) - + def _fallback_ops(self, tune_cfg, recipe_op_lst, tuning_space): """Fallback ops in recipe op list.""" for op_name_type in recipe_op_lst: - tune_cfg.update({op_name_type: OpTuningConfig(op_name_type[0], \ - op_name_type[1],'fp32', tuning_space)}) + tune_cfg.update({op_name_type: OpTuningConfig(op_name_type[0], op_name_type[1], "fp32", tuning_space)}) return tune_cfg - + def apply_all_tuning_recipes(self, tune_cfg): """Apply all tunable recipes with their value.""" - tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) + tune_cfg["recipe_cfgs"] = tune_cfg.get("recipe_cfgs", {}) for recipe_name, recipe_val_lst in self._tuning_recipes.items(): - tune_cfg['recipe_cfgs'][recipe_name] = recipe_val_lst[-1] - if recipe_name in FALLBACK_RECIPES_SET and 'recipes_ops' in self.capability and \ - len(self.capability['recipes_ops'].get(recipe_name, [])) > 0: + tune_cfg["recipe_cfgs"][recipe_name] = recipe_val_lst[-1] + if ( + recipe_name in FALLBACK_RECIPES_SET + and "recipes_ops" in self.capability + and len(self.capability["recipes_ops"].get(recipe_name, [])) > 0 + ): logger.info(f"Applied recipe {recipe_name}.") - tune_cfg = self._fallback_ops(tune_cfg, self.capability['recipes_ops'][recipe_name],\ - self.tuning_space) + tune_cfg = self._fallback_ops(tune_cfg, self.capability["recipes_ops"][recipe_name], self.tuning_space) return tune_cfg - + def apply_recipe_one_by_one(self, tune_cfg): """Apply the tunable recipes one by one. - + For recipes only have two options, apply the last one. For recipes with multiple values. such as alpha of smooth quant, apply it one by one. """ from .utils.tuning_sampler import TuningSamplerRegistry + all_registered_samplers = TuningSamplerRegistry.sampler_dict for recipe_name, recipe_vals in self._tuning_recipes.items(): - if recipe_name in FALLBACK_RECIPES_SET and 'recipes_ops' in self.capability and \ - len(self.capability['recipes_ops'].get(recipe_name, [])) > 0: + if ( + recipe_name in FALLBACK_RECIPES_SET + and "recipes_ops" in self.capability + and len(self.capability["recipes_ops"].get(recipe_name, [])) > 0 + ): logger.info(f"Applied recipe {recipe_name} with value {recipe_vals[-1]}") - new_tune_cfg = self._fallback_ops(copy.deepcopy(tune_cfg), \ - self.capability['recipes_ops'][recipe_name], self.tuning_space) + new_tune_cfg = self._fallback_ops( + copy.deepcopy(tune_cfg), self.capability["recipes_ops"][recipe_name], self.tuning_space + ) yield new_tune_cfg if recipe_name == "smooth_quant": - sq_args = {'smooth_quant': True} - if 'recipe_cfgs' not in new_tune_cfg: - new_tune_cfg['recipe_cfgs'] = sq_args + sq_args = {"smooth_quant": True} + if "recipe_cfgs" not in new_tune_cfg: + new_tune_cfg["recipe_cfgs"] = sq_args else: - new_tune_cfg['recipe_cfgs'].update(sq_args) - new_tune_cfg['recipe_cfgs'] = sq_args + new_tune_cfg["recipe_cfgs"].update(sq_args) + new_tune_cfg["recipe_cfgs"] = sq_args yield new_tune_cfg def set_param_for_pre_quantization_algos(self, algo_scheduler, tune_cfg, fp32_model) -> None: @@ -285,28 +303,26 @@ def set_param_for_pre_quantization_algos(self, algo_scheduler, tune_cfg, fp32_mo fp32_model: the fp32 model """ algo_scheduler.origin_model = fp32_model - algo_scheduler.calib_iter = tune_cfg['calib_iteration'] + algo_scheduler.calib_iter = tune_cfg["calib_iteration"] algo_scheduler.q_model = fp32_model - recipe_cfgs = tune_cfg.get('recipe_cfgs', None) + recipe_cfgs = tune_cfg.get("recipe_cfgs", None) algo_scheduler.reset_exec_algorithms() - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False): # pragma: no cover + if recipe_cfgs and recipe_cfgs.get("smooth_quant", False): # pragma: no cover # skip assign alpha to sq first. # set the alpha to 0.5 by default - smooth_quant_args = recipe_cfgs.get('smooth_quant_args', {'alpha': 0.5}) - sq_algo = ALGORITHMS()['smooth_quant'] - sq_algo.alpha = smooth_quant_args['alpha'] - if 'folding' not in smooth_quant_args: - smooth_quant_args['folding'] = True if self.framework in ['pytorch', 'pytorch_fx'] \ - else False - logger.info("SmoothQuant args 'folding' is not set, it's {} now.".format(smooth_quant_args['folding'])) - if self.framework == 'pytorch_ipex': - smooth_quant_args['folding'] = None # will reset it to True if IPEX version < 2.1. - sq_algo.folding = smooth_quant_args['folding'] + smooth_quant_args = recipe_cfgs.get("smooth_quant_args", {"alpha": 0.5}) + sq_algo = ALGORITHMS()["smooth_quant"] + sq_algo.alpha = smooth_quant_args["alpha"] + if "folding" not in smooth_quant_args: + smooth_quant_args["folding"] = True if self.framework in ["pytorch", "pytorch_fx"] else False + logger.info("SmoothQuant args 'folding' is not set, it's {} now.".format(smooth_quant_args["folding"])) + if self.framework == "pytorch_ipex": + smooth_quant_args["folding"] = None # will reset it to True if IPEX version < 2.1. + sq_algo.folding = smooth_quant_args["folding"] logger.debug(f"Set smooth quant with alpha {smooth_quant_args['alpha']} as the pre-quantization algo.") - algo_scheduler.append_algorithm('pre_quantization', sq_algo) - - + algo_scheduler.append_algorithm("pre_quantization", sq_algo) + def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_optimized_model, q_model) -> None: """Set the parameter for post-quantization algos, such as bias correction, weight correction. @@ -319,21 +335,21 @@ def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_op algo_scheduler.origin_model = pre_optimized_model # if no pre-process algos, return the fp32 model directly. algo_scheduler.q_model = q_model - + algo_scheduler.reset_exec_algorithms() - recipe_cfgs = tune_cfg.get('recipe_cfgs', None) + recipe_cfgs = tune_cfg.get("recipe_cfgs", None) # for fast_bias_correction - if recipe_cfgs and recipe_cfgs.get('fast_bias_correction', False): - fbc_algo = ALGORITHMS()['fast_bias_correction'] + if recipe_cfgs and recipe_cfgs.get("fast_bias_correction", False): + fbc_algo = ALGORITHMS()["fast_bias_correction"] fbc_algo.quantization_cfg = deepcopy(tune_cfg) - algo_scheduler.append_algorithm('post_quantization', fbc_algo) - logger.debug(f"Add fast bias correction as the post quantization algo.") + algo_scheduler.append_algorithm("post_quantization", fbc_algo) + logger.debug("Add fast bias correction as the post quantization algo.") # for weight correction - if recipe_cfgs and recipe_cfgs.get('weight_correction', False): - w_algo = ALGORITHMS()['weight_correction'] + if recipe_cfgs and recipe_cfgs.get("weight_correction", False): + w_algo = ALGORITHMS()["weight_correction"] w_algo.quantization_cfg = deepcopy(tune_cfg) - algo_scheduler.append_algorithm('post_quantization', w_algo) - logger.debug(f"Add weight correction as the post quantization algo.") + algo_scheduler.append_algorithm("post_quantization", w_algo) + logger.debug("Add weight correction as the post quantization algo.") def traverse(self): """Traverse the tuning space. @@ -349,8 +365,8 @@ def traverse(self): trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: - self.last_tune_result = tuning_history['last_tune_result'] - self.best_tune_result = tuning_history['best_tune_result'] + self.last_tune_result = tuning_history["last_tune_result"] + self.best_tune_result = tuning_history["best_tune_result"] logger.warn("Find evaluated tuning config, skip.") continue self._remove_redundant_qmodel() @@ -359,14 +375,15 @@ def traverse(self): self.tuning_times += 1 # set the parameter for pre quantization algos and run self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) - self.model = self.algo_scheduler('pre_quantization') + self.model = self.algo_scheduler("pre_quantization") # quantize q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) assert self.adaptor.pre_optimized_model # set the parameter for post quantization algos and run - self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, - q_model) - self.last_qmodel = self.algo_scheduler('post_quantization') + self.set_param_for_post_quantization_algos( + self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, q_model + ) + self.last_qmodel = self.algo_scheduler("post_quantization") self.last_tune_cfg = copy.deepcopy(tune_cfg) # Remove the reference to model self.algo_scheduler.reset_exec_algorithms() @@ -383,9 +400,7 @@ def traverse(self): # record the tuning history saved_tune_cfg = copy.deepcopy(tune_cfg) saved_last_tune_result = copy.deepcopy(self.last_tune_result) - self._add_tuning_history(saved_tune_cfg, - saved_last_tune_result, - q_config=q_model.q_config) + self._add_tuning_history(saved_tune_cfg, saved_last_tune_result, q_config=q_model.q_config) self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) self.tune_cfg = tune_cfg now_time = time() @@ -407,26 +422,29 @@ def traverse(self): # recover the best quantized model from tuning config self._recover_best_qmodel_from_tuning_cfg() if self.cfg.tuning.diagnosis: - logger.debug(f'*** Start to do diagnosis (inspect tensor).') + logger.debug("*** Start to do diagnosis (inspect tensor).") self._diagnosis() - if self.use_multi_objective and len(self.tune_result_record) > 1 and \ - self.best_tune_result is not None: - best_trail, best_result = self.objectives.best_result(self.tune_result_record, - copy.deepcopy(self.baseline)) + if self.use_multi_objective and len(self.tune_result_record) > 1 and self.best_tune_result is not None: + best_trail, best_result = self.objectives.best_result( + self.tune_result_record, copy.deepcopy(self.baseline) + ) if best_result != self.best_tune_result: from neural_compressor.utils.utility import recover - self.best_qmodel = recover(self.model.model, - os.path.join(self.cfg.tuning.workspace.path, 'history.snapshot'), - best_trail) - logger.debug(f"*** Update the best qmodel by recovering from history.") + + self.best_qmodel = recover( + self.model.model, + os.path.join(self.cfg.tuning.workspace.path, "history.snapshot"), + best_trail, + ) + logger.debug("*** Update the best qmodel by recovering from history.") self.best_tune_result = best_result self._dump_tuning_process_statistics() break self._recover_best_qmodel_from_tuning_cfg() - + def _remove_redundant_qmodel(self): """Remove the redundant quantized model to reduce memory use. - + During the tuning process, the strategy only keeps the best tuning config instead of the best quantized model to reduce memory use. """ @@ -439,26 +457,31 @@ def _can_create_eval_func_from_cfg(self): Returns: Returns True if the eval func can be created from config, False otherwise. """ - if self.cfg.evaluation and self.cfg.evaluation.accuracy and \ - (self.cfg.evaluation.accuracy.metric or self.cfg.evaluation.accuracy.multi_metrics)\ - and self.eval_dataloader: - return True + if ( + self.cfg.evaluation + and self.cfg.evaluation.accuracy + and (self.cfg.evaluation.accuracy.metric or self.cfg.evaluation.accuracy.multi_metrics) + and self.eval_dataloader + ): + return True return False - + def _eval_baseline(self): """Evaluate the fp32 model if needed.""" if not self._can_create_eval_func_from_cfg() and not self.eval_func: - logger.info("Neither evaluation function nor metric is defined." \ - " Generate a quantized model with default quantization configuration.") + logger.info( + "Neither evaluation function nor metric is defined." + " Generate a quantized model with default quantization configuration." + ) self.cfg.tuning.exit_policy.performance_only = True logger.info("Force setting 'tuning.exit_policy.performance_only = True'.") - + if not self.cfg.tuning.exit_policy.performance_only: # get fp32 model baseline if self.baseline is None: logger.info("Get FP32 model baseline.") self._fp32_model = self.model - self.baseline = self._evaluate(self.model) + self.baseline = self._evaluate(self.model) self.objectives.baseline = self.baseline # record the FP32 baseline self._add_tuning_history() @@ -467,23 +490,23 @@ def _eval_baseline(self): def _recover_best_qmodel_from_tuning_cfg(self): """Recover the best quantized model from tuning config.""" if self.best_tuning_cfg and not self.best_qmodel: - self.best_qmodel = self.adaptor.quantize(copy.deepcopy(self.best_tuning_cfg), self.model, - self.calib_dataloader, self.q_func) + self.best_qmodel = self.adaptor.quantize( + copy.deepcopy(self.best_tuning_cfg), self.model, self.calib_dataloader, self.q_func + ) def _fallback_started(self): self.fallback_start_point = self.tuning_times def _update_optype_statistics(self): - self._optype_statistics = defaultdict(lambda:defaultdict(int)) + self._optype_statistics = defaultdict(lambda: defaultdict(int)) - for op_name_type, op_tune_cfg in self.tune_cfg['op'].items(): + for op_name_type, op_tune_cfg in self.tune_cfg["op"].items(): optype = op_name_type[1] - quant_mode = op_tune_cfg['activation']['quant_mode'] + quant_mode = op_tune_cfg["activation"]["quant_mode"] if isinstance(quant_mode, tuple) or isinstance(quant_mode, list): quant_mode = quant_mode[0] - dtype = 'INT8' if quant_mode in ('static', 'dynamic') \ - else quant_mode.upper() - self._optype_statistics[optype]['Total'] += 1 + dtype = "INT8" if quant_mode in ("static", "dynamic") else quant_mode.upper() + self._optype_statistics[optype]["Total"] += 1 self._optype_statistics[optype][dtype] += 1 return @@ -496,7 +519,7 @@ def _dump_tuning_process_statistics(self): logger.debug("Objective(s) met at Tune {}".format(self.metric_met_point)) fallback_stats = self._calculate_fallback_op_count() - if self.fallback_stats_baseline == None: + if self.fallback_stats_baseline is None: self.fallback_stats_baseline = fallback_stats logger.debug(f"Fallbacked ops count: {self.fallback_stats_baseline - fallback_stats}") @@ -505,7 +528,7 @@ def _dump_tuning_process_statistics(self): return - def _calculate_fallback_op_count(self, target_dtype='INT8'): + def _calculate_fallback_op_count(self, target_dtype="INT8"): fallback_stats = defaultdict(int) for optype in self._optype_statistics: @@ -514,11 +537,9 @@ def _calculate_fallback_op_count(self, target_dtype='INT8'): return fallback_stats[target_dtype] - - def _compare_optype_statistics(self, fields=None, optypes=None, - skip_fields=None, skip_optypes=None): - assert(fields == None or skip_fields == None) - assert(optypes == None or skip_optypes == None) + def _compare_optype_statistics(self, fields=None, optypes=None, skip_fields=None, skip_optypes=None): + assert fields is None or skip_fields is None + assert optypes is None or skip_optypes is None if not isinstance(self.adaptor, TensorFlowAdaptor): logger.debug("OpType statistics comparation is only available for TensorFlow adaptor.") return @@ -526,22 +547,22 @@ def _compare_optype_statistics(self, fields=None, optypes=None, adaptor_statistics = self.adaptor.optype_statistics def _field_skipped(field): - if fields != None: + if fields is not None: return field not in fields - elif skip_fields != None: + elif skip_fields is not None: return field in skip_fields def _optype_skipped(optype): - if optypes != None: + if optypes is not None: return optype not in optypes - elif skip_optypes != None: + elif skip_optypes is not None: return optype in skip_optypes - field_names = adaptor_statistics[0][1:] adaptor_data = { - line[0].lower() : {dtype : count for dtype, count in zip(field_names, line[1:])} - for line in adaptor_statistics[1]} + line[0].lower(): {dtype: count for dtype, count in zip(field_names, line[1:])} + for line in adaptor_statistics[1] + } strategy_data = self._optype_statistics # compare adaptor statistics to strategy statistics @@ -549,17 +570,20 @@ def _optype_skipped(optype): has_difference = False difference_count = 0 for optype in adaptor_data: - if optype not in strategy_data or _optype_skipped(optype): continue + if optype not in strategy_data or _optype_skipped(optype): + continue for field in field_names: - if _field_skipped(field): continue + if _field_skipped(field): + continue adaptor_count = adaptor_data[optype][field] strategy_count = strategy_data[optype][field] if adaptor_count != strategy_count: has_difference = True - if field == 'INT8': + if field == "INT8": difference_count += abs(strategy_count - adaptor_count) - logger.debug("\t{}: [adaptor: {} | tune_cfg: {}]".format( - (optype, field), adaptor_count, strategy_count)) + logger.debug( + "\t{}: [adaptor: {} | tune_cfg: {}]".format((optype, field), adaptor_count, strategy_count) + ) if not has_difference: logger.debug("\tNone") logger.debug(f"\tDifference(s) in total: {difference_count}") @@ -567,7 +591,7 @@ def _optype_skipped(optype): def initial_tuning_cfg(self): """Init the tuning config. - + Initialize the tuning config according to the quantization approach. Returns: @@ -575,19 +599,20 @@ def initial_tuning_cfg(self): quant_mode_wise_items (OrderedDict): key is quant_mode/precision; value is item list. initial_op_tuning_cfg (OrderedDict): key is (op_name, op_type); value is the initialized tuning config. """ - from .utils.constant import auto_query_order, static_query_order, dynamic_query_order + from .utils.constant import auto_query_order, dynamic_query_order, static_query_order from .utils.tuning_space import initial_tuning_cfg_with_quant_mode - if self.cfg.quantization.approach == 'post_training_auto_quant': + + if self.cfg.quantization.approach == "post_training_auto_quant": query_order = auto_query_order - elif self.cfg.quantization.approach == 'post_training_dynamic_quant': + elif self.cfg.quantization.approach == "post_training_dynamic_quant": query_order = dynamic_query_order - elif self.cfg.quantization.approach == 'post_training_static_quant': + elif self.cfg.quantization.approach == "post_training_static_quant": query_order = static_query_order - elif self.cfg.quantization.approach == 'quant_aware_training': + elif self.cfg.quantization.approach == "quant_aware_training": logger.info("!!! Currently, the qat tuning is not supported by strategy.") query_order = auto_query_order - quant_mode_wise_items = OrderedDict() # mode, op_item_lst + quant_mode_wise_items = OrderedDict() # mode, op_item_lst pre_items = set() # Collect op items supported the specified mode. for quant_mode in query_order: @@ -606,35 +631,43 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): initial_op_tuning_cfg = {} for op_name_type, quant_mode in op_item_dtype_dict.items(): - initial_op_tuning_cfg[op_name_type] = initial_tuning_cfg_with_quant_mode(op_name_type, - quant_mode, - self.tuning_space) + initial_op_tuning_cfg[op_name_type] = initial_tuning_cfg_with_quant_mode( + op_name_type, quant_mode, self.tuning_space + ) return op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg def show_baseline_info(self): """Display the accuracy and duration of the the baseline model.""" if self.baseline: - self.tune_data['baseline'] = self.baseline[0] if \ - isinstance(self.baseline[0], list) else [self.baseline[0]] - for name, data in zip(self.metric_name, self.tune_data['baseline']): + self.tune_data["baseline"] = self.baseline[0] if isinstance(self.baseline[0], list) else [self.baseline[0]] + for name, data in zip(self.metric_name, self.tune_data["baseline"]): self.tune_data[name] = [data] if self.metric_weight: # baseline is weighted accuracy - self.tune_data['Weighted accuracy'] = \ - [np.mean(np.array(self.tune_data['baseline']) * self.metric_weight)] - self.tune_data['baseline'] = self.tune_data['Weighted accuracy'] - baseline_msg = '[Accuracy:' + \ - ''.join([' {:.4f}'.format(i) for i in self.tune_data['baseline']]) + \ - ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ - self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) + ']' - else: # pragma: no cover + self.tune_data["Weighted accuracy"] = [ + np.mean(np.array(self.tune_data["baseline"]) * self.metric_weight) + ] + self.tune_data["baseline"] = self.tune_data["Weighted accuracy"] + baseline_msg = ( + "[Accuracy:" + + "".join([" {:.4f}".format(i) for i in self.tune_data["baseline"]]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + ) + else: # pragma: no cover if self.metric_weight: - self.tune_data['Weighted accuracy'] = ['n/a'] - self.tune_data['baseline'] = ['n/a'] + self.tune_data["Weighted accuracy"] = ["n/a"] + self.tune_data["baseline"] = ["n/a"] - for name, data in zip(self.metric_name, self.tune_data['baseline']): - self.tune_data[name] = ['n/a'] - baseline_msg = 'n/a' + for name, data in zip(self.metric_name, self.tune_data["baseline"]): + self.tune_data[name] = ["n/a"] + baseline_msg = "n/a" logger.info("FP32 baseline is: {}".format(baseline_msg)) def initial_best_acc(self): @@ -644,10 +677,11 @@ def initial_best_acc(self): The initial value of best accuracy. """ if len(self.metric_name) == 1 or self.metric_weight is not None: - best_acc = float('-inf') if self.higher_is_better else float('inf') + best_acc = float("-inf") if self.higher_is_better else float("inf") else: - best_acc = [float('-inf') if higher_is_better else float('inf') for \ - higher_is_better in self.metric_criterion] + best_acc = [ + float("-inf") if higher_is_better else float("inf") for higher_is_better in self.metric_criterion + ] return best_acc def _tune_cfg_converter(self, op_tuning_cfg): @@ -656,48 +690,50 @@ def _tune_cfg_converter(self, op_tuning_cfg): Args: op_tuning_cfg (Dict): the op tuning config. """ - tune_cfg = {'op': OrderedDict()} + tune_cfg = {"op": OrderedDict()} for op_name_type, op_config in op_tuning_cfg.items(): if isinstance(op_config, OpTuningConfig): - tune_cfg['op'][op_name_type] = op_config.get_state() - op_cap_lst = self.capability['opwise'][op_name_type] + tune_cfg["op"][op_name_type] = op_config.get_state() + op_cap_lst = self.capability["opwise"][op_name_type] # Add pattern for diagnosis for op_cap in op_cap_lst: - if 'pattern' in op_cap: + if "pattern" in op_cap: op_pattern = {} - op_pattern['sequence'] = op_cap['pattern']['sequence'][0] if\ - 'sequence' in op_cap['pattern'] else None - op_pattern['precision'] = op_cap['pattern']['precision'][0] if\ - 'precision' in op_cap['pattern'] else None - tune_cfg['op'][op_name_type]['pattern'] = op_pattern + op_pattern["sequence"] = ( + op_cap["pattern"]["sequence"][0] if "sequence" in op_cap["pattern"] else None + ) + op_pattern["precision"] = ( + op_cap["pattern"]["precision"][0] if "precision" in op_cap["pattern"] else None + ) + tune_cfg["op"][op_name_type]["pattern"] = op_pattern else: tune_cfg[op_name_type] = op_config - tune_cfg['calib_sampling_size'] = op_tuning_cfg['calib_sampling_size'] + tune_cfg["calib_sampling_size"] = op_tuning_cfg["calib_sampling_size"] if self.calib_dataloader is not None: # For the accelerate's DataLoaderShard, use total_batch_size instead of batch_size - bs = getattr(self.calib_dataloader, 'batch_size') or getattr(self.calib_dataloader, 'total_batch_size') + bs = getattr(self.calib_dataloader, "batch_size") or getattr(self.calib_dataloader, "total_batch_size") assert bs > 0, f"Calibration dataloader's batch size should be greater than one but got {bs}" - tune_cfg['calib_iteration'] = math.ceil(int(tune_cfg['calib_sampling_size']) / bs) + tune_cfg["calib_iteration"] = math.ceil(int(tune_cfg["calib_sampling_size"]) / bs) else: - tune_cfg['calib_iteration'] = 1 - tune_cfg['advance'] = self.cfg.quantization.advance - tune_cfg['approach'] = self.cfg.quantization.approach + tune_cfg["calib_iteration"] = 1 + tune_cfg["advance"] = self.cfg.quantization.advance + tune_cfg["approach"] = self.cfg.quantization.approach # Add the recipe config - tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) + tune_cfg["recipe_cfgs"] = tune_cfg.get("recipe_cfgs", {}) # For not tuning recipe, tune cfg use it directly - tune_cfg['recipe_cfgs'].update(self._not_tuning_recipes_values) + tune_cfg["recipe_cfgs"].update(self._not_tuning_recipes_values) # WA for get the smooth quant args - if 'smooth_quant_args' in self.cfg_bk.quantization.recipes: - tune_cfg['recipe_cfgs']['smooth_quant_args'] = self.cfg_bk.quantization.recipes['smooth_quant_args'] + if "smooth_quant_args" in self.cfg_bk.quantization.recipes: + tune_cfg["recipe_cfgs"]["smooth_quant_args"] = self.cfg_bk.quantization.recipes["smooth_quant_args"] # For tuning recipe, use the default value if it not specified by recipe tuning sampler. for recipe_name, recipe_val in self._tuning_recipes_default_values.items(): - if recipe_name not in tune_cfg['recipe_cfgs']: - tune_cfg['recipe_cfgs'][recipe_name] = recipe_val + if recipe_name not in tune_cfg["recipe_cfgs"]: + tune_cfg["recipe_cfgs"][recipe_name] = recipe_val return tune_cfg def set_tuning_space(self, conf): """Create the tuning space. - + Create the tuning space based on the framework capability and user configuration. Args: @@ -707,16 +743,13 @@ def set_tuning_space(self, conf): calib_sampling_size_lst = [int(calib_sampling_size) for calib_sampling_size in calib_sampling_size_lst] if self.calib_dataloader: # For the accelerate's DataLoaderShard, use total_batch_size instead of batch_size - bs = getattr(self.calib_dataloader, 'batch_size') or getattr(self.calib_dataloader, 'total_batch_size') + bs = getattr(self.calib_dataloader, "batch_size") or getattr(self.calib_dataloader, "total_batch_size") assert bs > 0, f"Calibration dataloader's batch size should be greater than one but got {bs}" self.calib_iter = [math.ceil(int(x) / bs) for x in calib_sampling_size_lst] else: self.calib_iter = 1 # create tuning space - adaptor_cap = { - 'calib': {'calib_sampling_size': calib_sampling_size_lst}, - 'op': self.capability['opwise'] - } + adaptor_cap = {"calib": {"calib_sampling_size": calib_sampling_size_lst}, "op": self.capability["opwise"]} self.tuning_space = TuningSpace(adaptor_cap, conf=conf, framework=self.framework) def setup_resume(self, resume): @@ -727,18 +760,16 @@ def setup_resume(self, resume): """ self.__dict__.update(resume) for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.cfg): - self.__dict__.update({k: v for k, v in history.items() \ - if k not in ['version', 'history']}) + if self._same_yaml(history["cfg"], self.cfg): + self.__dict__.update({k: v for k, v in history.items() if k not in ["version", "history"]}) logger.info("Start to resume tuning process.") # resume the best tuning model if needed try: - index = history['id'] - 1 - resume_tuning_cfg = history['history'][index]['tune_cfg'] - self.best_qmodel = self.adaptor.quantize(resume_tuning_cfg, - self.model, - self.calib_dataloader, - self.q_func) + index = history["id"] - 1 + resume_tuning_cfg = history["history"][index]["tune_cfg"] + self.best_qmodel = self.adaptor.quantize( + resume_tuning_cfg, self.model, self.calib_dataloader, self.q_func + ) except: logger.debug("Can not resume the best quantize model from history.") @@ -746,123 +777,141 @@ def setup_resume(self, resume): def set_q_func(self): """Set the training function for quantization aware training.""" - if self.q_func == None and self.cfg.quantization.approach == 'quant_aware_training': + if self.q_func is None and self.cfg.quantization.approach == "quant_aware_training": train_cfg = self.cfg.quantization.train - assert train_cfg, "train field of quantization section in yaml file must " \ - "be configured for quantization aware training if q_func is NOT set." - assert self.calib_dataloader, "dataloader field of train field of quantization " \ - "section in yaml file must be configured." - self.q_func = create_train_func(self.framework, self.calib_dataloader, \ - self.adaptor, train_cfg, hooks=self.q_hooks) + assert train_cfg, ( + "train field of quantization section in yaml file must " + "be configured for quantization aware training if q_func is NOT set." + ) + assert self.calib_dataloader, ( + "dataloader field of train field of quantization " "section in yaml file must be configured." + ) + self.q_func = create_train_func( + self.framework, self.calib_dataloader, self.adaptor, train_cfg, hooks=self.q_hooks + ) def _create_path(self, custom_path, filename): - new_path = os.path.join(os.path.abspath(os.path.expanduser(custom_path)),filename) + new_path = os.path.join(os.path.abspath(os.path.expanduser(custom_path)), filename) path = Path(os.path.dirname(new_path)) path.mkdir(exist_ok=True, parents=True) return new_path def _set_framework_info(self, q_dataloader, q_func=None): - framework_specific_info = {'device': self.cfg.device, - 'approach': self.cfg.quantization.approach, - 'random_seed': self.cfg.tuning.random_seed, - 'performance_only': self.cfg.tuning.exit_policy.performance_only,} + framework_specific_info = { + "device": self.cfg.device, + "approach": self.cfg.quantization.approach, + "random_seed": self.cfg.tuning.random_seed, + "performance_only": self.cfg.tuning.exit_policy.performance_only, + } framework = self.cfg.model.framework.lower() - framework_specific_info.update({'backend': self.cfg.model.get('backend', 'default')}) - framework_specific_info.update({'format': self.cfg.model.get('quant_format', 'default')}) - framework_specific_info.update({'domain': self.cfg.model.get('domain', 'auto')}) + framework_specific_info.update({"backend": self.cfg.model.get("backend", "default")}) + framework_specific_info.update({"format": self.cfg.model.get("quant_format", "default")}) + framework_specific_info.update({"domain": self.cfg.model.get("domain", "auto")}) - self.mixed_precision_mode = bool('mixed_precision' in self.cfg) or \ - bool('graph_optimization' in self.cfg) + self.mixed_precision_mode = bool("mixed_precision" in self.cfg) or bool("graph_optimization" in self.cfg) - if 'tensorflow' in framework: + if "tensorflow" in framework: + framework_specific_info.update( + { + "inputs": self.cfg.model.inputs, + "outputs": self.cfg.model.outputs, + "workspace_path": self.cfg.tuning.workspace.path, + "recipes": self.cfg.quantization.recipes, + "use_bf16": self.cfg.use_bf16 if self.cfg.use_bf16 is not None else False, + } + ) + for item in ["scale_propagation_max_pooling", "scale_propagation_concat"]: + if item not in framework_specific_info["recipes"]: + framework_specific_info["recipes"].update({item: True}) + if self.cfg.model.backend == "itex": + self.cfg.model.framework = "tensorflow_itex" + framework = "tensorflow_itex" + if "keras" in framework: framework_specific_info.update( - {"inputs": self.cfg.model.inputs, - "outputs": self.cfg.model.outputs, - 'workspace_path': self.cfg.tuning.workspace.path, - 'recipes': self.cfg.quantization.recipes, - 'use_bf16': self.cfg.use_bf16 if self.cfg.use_bf16 is not None else False}) - for item in ['scale_propagation_max_pooling', 'scale_propagation_concat']: - if item not in framework_specific_info['recipes']: - framework_specific_info['recipes'].update({item: True}) - if self.cfg.model.backend == 'itex': - self.cfg.model.framework = 'tensorflow_itex' - framework = 'tensorflow_itex' - if 'keras' in framework: - framework_specific_info.update({ - 'workspace_path': self.cfg.tuning.workspace.path, }) - if framework == 'mxnet': + { + "workspace_path": self.cfg.tuning.workspace.path, + } + ) + if framework == "mxnet": framework_specific_info.update({"q_dataloader": q_dataloader}) - if 'onnx' in framework.lower(): + if "onnx" in framework.lower(): if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) framework_specific_info.update({"deploy_path": os.path.dirname(self.deploy_path)}) - framework_specific_info.update({'workspace_path': self.cfg.tuning.workspace.path}) - framework_specific_info.update({'recipes': self.cfg.quantization.recipes}) - framework_specific_info.update({'reduce_range': self.cfg.reduce_range}) - framework_specific_info.update({'recipes': self.cfg.quantization.get('recipes', {})}) - if framework.lower() == 'onnxrt_qdq' or \ - framework_specific_info['backend'] == 'onnxrt_trt_ep': - framework_specific_info.update({'format': 'QDQ'}) - framework = 'onnxrt_qdq' - if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - if self.cfg.model.backend == 'ipex': - self.cfg.model.framework = 'pytorch_ipex' - framework = 'pytorch_ipex' - elif self.cfg.model.backend == 'default': - self.cfg.model.framework = 'pytorch_fx' - framework = 'pytorch_fx' + framework_specific_info.update({"workspace_path": self.cfg.tuning.workspace.path}) + framework_specific_info.update({"recipes": self.cfg.quantization.recipes}) + framework_specific_info.update({"reduce_range": self.cfg.reduce_range}) + framework_specific_info.update({"recipes": self.cfg.quantization.get("recipes", {})}) + if framework.lower() == "onnxrt_qdq" or framework_specific_info["backend"] == "onnxrt_trt_ep": + framework_specific_info.update({"format": "QDQ"}) + framework = "onnxrt_qdq" + if framework == "pytorch_ipex" or framework == "pytorch" or framework == "pytorch_fx": + if self.cfg.model.backend == "ipex": + self.cfg.model.framework = "pytorch_ipex" + framework = "pytorch_ipex" + elif self.cfg.model.backend == "default": + self.cfg.model.framework = "pytorch_fx" + framework = "pytorch_fx" if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) framework_specific_info.update({"q_dataloader": q_dataloader}) - framework_specific_info.update({"use_bf16": self.cfg.use_bf16 \ - if self.cfg.use_bf16 is not None else True}) - framework_specific_info.update( - {"workspace_path": os.path.dirname(self.deploy_path)}) - if self.cfg['quantization']['op_wise'] is not None \ - and 'default_qconfig' in self.cfg['quantization']['op_wise']: + framework_specific_info.update({"use_bf16": self.cfg.use_bf16 if self.cfg.use_bf16 is not None else True}) + framework_specific_info.update({"workspace_path": os.path.dirname(self.deploy_path)}) + if ( + self.cfg["quantization"]["op_wise"] is not None + and "default_qconfig" in self.cfg["quantization"]["op_wise"] + ): framework_specific_info.update( - {"default_qconfig": self.cfg['quantization']['op_wise']['default_qconfig']}) + {"default_qconfig": self.cfg["quantization"]["op_wise"]["default_qconfig"]} + ) framework_specific_info.update({"q_func": q_func}) framework_specific_info.update({"example_inputs": self.cfg.quantization.example_inputs}) return framework, framework_specific_info def _set_objectives(self): self.higher_is_better = bool(self.cfg.tuning.accuracy_criterion.higher_is_better) - self.use_multi_objective = deep_get(self.cfg, 'tuning.multi_objectives') and \ - len(self.cfg.tuning.multi_objectives.objective) > 1 - objectives = [i.lower() for i in self.cfg.tuning.multi_objectives.objective] if \ - self.use_multi_objective else [self.cfg.tuning.objective.lower()] - self.metric_weight = deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.weight') - self.metric_name = ['Accuracy'] if \ - not deep_get(self.cfg, 'evaluation.accuracy.multi_metrics') else \ - self.cfg.evaluation.accuracy.multi_metrics.keys()-{'weight','higher_is_better'} + self.use_multi_objective = ( + deep_get(self.cfg, "tuning.multi_objectives") and len(self.cfg.tuning.multi_objectives.objective) > 1 + ) + objectives = ( + [i.lower() for i in self.cfg.tuning.multi_objectives.objective] + if self.use_multi_objective + else [self.cfg.tuning.objective.lower()] + ) + self.metric_weight = deep_get(self.cfg, "evaluation.accuracy.multi_metrics.weight") + self.metric_name = ( + ["Accuracy"] + if not deep_get(self.cfg, "evaluation.accuracy.multi_metrics") + else self.cfg.evaluation.accuracy.multi_metrics.keys() - {"weight", "higher_is_better"} + ) if len(self.metric_name) == 1: self.metric_criterion = [self.higher_is_better] - elif not deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.higher_is_better'): + elif not deep_get(self.cfg, "evaluation.accuracy.multi_metrics.higher_is_better"): # default is True self.metric_criterion = [True] * len(self.metric_name) else: - self.metric_criterion = \ - deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.higher_is_better') + self.metric_criterion = deep_get(self.cfg, "evaluation.accuracy.multi_metrics.higher_is_better") - self.objectives = MultiObjective(objectives, - self.cfg.tuning.accuracy_criterion, - self.metric_criterion, - self.metric_weight, - deep_get(self.cfg, 'tuning.multi_objectives.higher_is_better'), - deep_get(self.cfg, 'tuning.multi_objectives.weight')) + self.objectives = MultiObjective( + objectives, + self.cfg.tuning.accuracy_criterion, + self.metric_criterion, + self.metric_weight, + deep_get(self.cfg, "tuning.multi_objectives.higher_is_better"), + deep_get(self.cfg, "tuning.multi_objectives.weight"), + ) def _same_yaml(self, src_yaml, dst_yaml): """Check if the two yamls are the same. - - The check will exclude those keys which do not really impact the tuning result, such as + + The check will exclude those keys which do not really impact the tuning result, such as tensorboard, workspace, resume options under the tuning section of YAML. """ - if equal_dicts(src_yaml, dst_yaml, ignore_keys=['tuning']) and \ - equal_dicts(src_yaml.tuning, src_yaml.tuning, compare_keys=['objective', - 'accuracy_criterion', - 'random_seed', - 'exit_policy']): + if equal_dicts(src_yaml, dst_yaml, ignore_keys=["tuning"]) and equal_dicts( + src_yaml.tuning, + src_yaml.tuning, + compare_keys=["objective", "accuracy_criterion", "random_seed", "exit_policy"], + ): return True return False @@ -879,20 +928,26 @@ def update_best_op_tuning_cfg(self, op_tuning_cfg): acc, _ = self.last_tune_result if self.cur_best_tuning_cfg is None: self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) - if not isinstance(acc, list) and ((self.higher_is_better and acc >= self.cur_best_acc) \ - or (not self.higher_is_better and acc <= self.cur_best_acc)): + if not isinstance(acc, list) and ( + (self.higher_is_better and acc >= self.cur_best_acc) + or (not self.higher_is_better and acc <= self.cur_best_acc) + ): self.cur_best_acc = acc self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) elif len(self.metric_name) > 1 and self.metric_weight is not None: acc = np.mean(np.array(acc) * self.metric_weight) - if (self.higher_is_better and acc >= self.cur_best_acc) or \ - (not self.higher_is_better and acc <= self.cur_best_acc): + if (self.higher_is_better and acc >= self.cur_best_acc) or ( + not self.higher_is_better and acc <= self.cur_best_acc + ): self.cur_best_acc = acc self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) elif len(self.metric_name) > 1 and self.metric_weight is None: - if all([acc_i >= best_i if higher_is_better else acc_i <= best_i for \ - acc_i, best_i, higher_is_better in \ - zip(acc, self.cur_best_acc, self.metric_criterion)]): + if all( + [ + acc_i >= best_i if higher_is_better else acc_i <= best_i + for acc_i, best_i, higher_is_better in zip(acc, self.cur_best_acc, self.metric_criterion) + ] + ): self.cur_best_acc = acc self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) logger.debug(f"Best acc is {self.cur_best_acc}.") @@ -900,38 +955,38 @@ def update_best_op_tuning_cfg(self, op_tuning_cfg): def deploy_config(self): """Save the configuration locally for deployment.""" - acc_dataloader_cfg = deep_get(self.cfg, 'evaluation.accuracy.dataloader') - perf_dataloader_cfg = deep_get(self.cfg, 'evaluation.performance.dataloader') + acc_dataloader_cfg = deep_get(self.cfg, "evaluation.accuracy.dataloader") + perf_dataloader_cfg = deep_get(self.cfg, "evaluation.performance.dataloader") # use acc dataloader if perf dataloader is not configured if perf_dataloader_cfg is None: perf_dataloader_cfg = acc_dataloader_cfg self.deploy_cfg = OrderedDict() # int8 dataloader graph transform - if deep_get(perf_dataloader_cfg, 'transform.QuantizedInput') is not None \ - or deep_get(acc_dataloader_cfg, 'transform.QuantizedInput') is not None: + if ( + deep_get(perf_dataloader_cfg, "transform.QuantizedInput") is not None + or deep_get(acc_dataloader_cfg, "transform.QuantizedInput") is not None + ): self.best_qmodel, scale = self.adaptor.quantize_input(self.best_qmodel) - deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') - deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.scale', scale) - deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') - deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.scale', scale) + deep_set(perf_dataloader_cfg, "transform.QuantizedInput.dtype", "int8") + deep_set(perf_dataloader_cfg, "transform.QuantizedInput.scale", scale) + deep_set(acc_dataloader_cfg, "transform.QuantizedInput.dtype", "int8") + deep_set(acc_dataloader_cfg, "transform.QuantizedInput.scale", scale) - self.deploy_cfg['model'] = self.cfg.model - self.deploy_cfg['device'] = self.cfg.device + self.deploy_cfg["model"] = self.cfg.model + self.deploy_cfg["device"] = self.cfg.device if self.cfg.evaluation is not None: - deep_set(self.cfg, 'evaluation.performance.dataloader',\ - perf_dataloader_cfg) - deep_set(self.cfg, 'evaluation.accuracy.dataloader', \ - acc_dataloader_cfg) - self.deploy_cfg['evaluation'] = self.cfg.evaluation + deep_set(self.cfg, "evaluation.performance.dataloader", perf_dataloader_cfg) + deep_set(self.cfg, "evaluation.accuracy.dataloader", acc_dataloader_cfg) + self.deploy_cfg["evaluation"] = self.cfg.evaluation def setup_yaml(): - represent_dict_order = lambda self, \ - data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) + represent_dict_order = lambda self, data: self.represent_mapping("tag:yaml.org,2002:map", data.items()) yaml.add_representer(OrderedDict, represent_dict_order) yaml.add_representer(DotDict, represent_dict_order) + setup_yaml() - with open(self.deploy_path, 'w+') as f: + with open(self.deploy_path, "w+") as f: yaml.dump(self.deploy_cfg, f) logger.info("Save deploy yaml to {}".format(self.deploy_path)) @@ -958,66 +1013,74 @@ def _evaluate(self, model): # Pytorch can insert observer to model in this hook. # Tensorflow don't support this mode for now model = self.adaptor._pre_eval_hook(model) - val = self.objectives.evaluate( - self.eval_func, model if self.framework == "pytorch_ipex" else model.model - ) + val = self.objectives.evaluate(self.eval_func, model if self.framework == "pytorch_ipex" else model.model) if self.cfg.tuning.tensorboard: # post_eval_hook to deal the tensor self.adaptor._post_eval_hook(model, accuracy=val[0]) else: - assert self.cfg.evaluation and self.cfg.evaluation.accuracy and \ - (self.cfg.evaluation.accuracy.metric or \ - self.cfg.evaluation.accuracy.multi_metrics), \ - "metric or multi_metrics field of accuracy field of evaluation" \ - " section should not be empty" + assert ( + self.cfg.evaluation + and self.cfg.evaluation.accuracy + and (self.cfg.evaluation.accuracy.metric or self.cfg.evaluation.accuracy.multi_metrics) + ), ("metric or multi_metrics field of accuracy field of evaluation" " section should not be empty") postprocess_cfg = self.cfg.evaluation.accuracy.postprocess - metric_cfg = self.cfg.evaluation.accuracy.metric if \ - self.cfg.evaluation.accuracy.metric else \ - self.cfg.evaluation.accuracy.multi_metrics - iteration = -1 if self.cfg.evaluation.accuracy.iteration is None \ - else self.cfg.evaluation.accuracy.iteration - eval_func = create_eval_func(self.framework, + metric_cfg = ( + self.cfg.evaluation.accuracy.metric + if self.cfg.evaluation.accuracy.metric + else self.cfg.evaluation.accuracy.multi_metrics + ) + iteration = -1 if self.cfg.evaluation.accuracy.iteration is None else self.cfg.evaluation.accuracy.iteration + eval_func = create_eval_func( + self.framework, self.eval_dataloader, self.adaptor, metric_cfg, postprocess_cfg, iteration, - tensorboard = self.cfg.tuning.tensorboard, - fp32_baseline = self.baseline == None) + tensorboard=self.cfg.tuning.tensorboard, + fp32_baseline=self.baseline is None, + ) - if getattr(self.eval_dataloader, 'distributed', False): - if 'tensorflow' in self.framework: + if getattr(self.eval_dataloader, "distributed", False): + if "tensorflow" in self.framework: import horovod.tensorflow as hvd - elif self.framework in ['pytorch_ipex','pytorch','pytorch_fx']: + elif self.framework in ["pytorch_ipex", "pytorch", "pytorch_fx"]: import horovod.torch as hvd else: - raise NotImplementedError("Currently only TensorFlow and PyTorch " - "support distributed inference in PTQ.") + raise NotImplementedError( + "Currently only TensorFlow and PyTorch " "support distributed inference in PTQ." + ) hvd.init() try: len_dataloader = len(self.eval_dataloader) except: - logger.info("The length of the distributed dataloader is unknown." - "When the iteration of evaluation dataloader in each " - "process is inconsistent, an error may occur.") + logger.info( + "The length of the distributed dataloader is unknown." + "When the iteration of evaluation dataloader in each " + "process is inconsistent, an error may occur." + ) else: list_len_dataloader = hvd.allgather_object(len_dataloader) if hvd.rank() == 0: - for i in range(len(list_len_dataloader)-1): - if list_len_dataloader[i] != list_len_dataloader[i+1]: - raise AttributeError("The evaluation dataloader's iteration is" - "different between processes, please reset " - "dataloader's batch_size.") + for i in range(len(list_len_dataloader) - 1): + if list_len_dataloader[i] != list_len_dataloader[i + 1]: + raise AttributeError( + "The evaluation dataloader's iteration is" + "different between processes, please reset " + "dataloader's batch_size." + ) val = self.objectives.evaluate(eval_func, model) if isinstance(val[0], list): - assert all([np.isscalar(i) for i in val[0]]), \ - "The eval_func should return a scalar or list of scalar, " \ - "but not {}!".format(str([type(i) for i in val[0]])) + assert all( + [np.isscalar(i) for i in val[0]] + ), "The eval_func should return a scalar or list of scalar, " "but not {}!".format( + str([type(i) for i in val[0]]) + ) else: - assert np.isscalar(val[0]), \ - "The eval_func should return a scalar or list of scalar, " \ - "but not {}!".format(str(type(val[0]))) + assert np.isscalar(val[0]), "The eval_func should return a scalar or list of scalar, " "but not {}!".format( + str(type(val[0])) + ) return val @@ -1027,7 +1090,7 @@ def __getstate__(self): Returns: dict: Saved dict for resuming """ - return {'tuning_history': self.tuning_history} + return {"tuning_history": self.tuning_history} def __setstate__(self, d): """Magic method for pickle loading. @@ -1039,15 +1102,16 @@ def __setstate__(self, d): def stop(self, timeout, trials_count): """Check if need to stop traverse. - + Check if need to stop traversing the tuning space, either accuracy goal is met or timeout is reach. Returns: bool: True if need stop, otherwise False """ need_stop = False - if self.cfg.tuning.exit_policy.performance_only or \ - self.objectives.compare(self.best_tune_result, self.baseline): + if self.cfg.tuning.exit_policy.performance_only or self.objectives.compare( + self.best_tune_result, self.baseline + ): self.best_tune_result = self.last_tune_result self.best_qmodel = self.last_qmodel self.best_tuning_cfg = copy.deepcopy(self.last_tune_cfg) @@ -1056,18 +1120,19 @@ def stop(self, timeout, trials_count): self.metric_met_point = self.tuning_times # track the model with highest acc - if self.best_tune_result and self.last_tune_result: # (acc, [perf]) + if self.best_tune_result and self.last_tune_result: # (acc, [perf]) if self.re_quant and self.objectives.accuracy_meets(): self.best_tune_result = self.last_tune_result self.best_qmodel = self.last_qmodel self.best_tuning_cfg = copy.deepcopy(self.last_tune_cfg) logger.debug(f"*** Update the best qmodel with the result {self.best_tune_result}.") else: - logger.debug(f"*** Accuracy not meets the requirements, do not update the best qmodel.") + logger.debug("*** Accuracy not meets the requirements, do not update the best qmodel.") if self.last_tune_result: - last_tune = self.last_tune_result[0] if \ - isinstance(self.last_tune_result[0], list) else [self.last_tune_result[0]] + last_tune = ( + self.last_tune_result[0] if isinstance(self.last_tune_result[0], list) else [self.last_tune_result[0]] + ) for name, data in zip(self.metric_name, last_tune): if len(self.tune_data[name]) == 1: @@ -1078,31 +1143,39 @@ def stop(self, timeout, trials_count): if self.metric_weight and len(last_tune) > 1: weighted_acc = np.mean(np.array(last_tune) * self.metric_weight) - if len(self.tune_data['Weighted accuracy']) == 1: - self.tune_data['Weighted accuracy'].append(weighted_acc) + if len(self.tune_data["Weighted accuracy"]) == 1: + self.tune_data["Weighted accuracy"].append(weighted_acc) else: - self.tune_data['Weighted accuracy'][1] = weighted_acc + self.tune_data["Weighted accuracy"][1] = weighted_acc last_tune = [weighted_acc] - last_tune_msg = '[Accuracy (int8|fp32):' + \ - ''.join([' {:.4f}|{:.4f}'.format(last, base) for last, base in \ - zip(last_tune, self.tune_data['baseline'])]) + \ - ''.join([', {} (int8|fp32): {:.4f}|{:.4f}'.format( \ - x, y, z) for x, y, z in zip( \ - self.objectives.representation, self.last_tune_result[1], self.baseline[1]) \ - if x != 'Accuracy']) + ']' - else: # pragma: no cover - last_tune_msg = 'n/a' - for name in self.tune_data.keys() - {'baseline'}: + last_tune_msg = ( + "[Accuracy (int8|fp32):" + + "".join( + [" {:.4f}|{:.4f}".format(last, base) for last, base in zip(last_tune, self.tune_data["baseline"])] + ) + + "".join( + [ + ", {} (int8|fp32): {:.4f}|{:.4f}".format(x, y, z) + for x, y, z in zip(self.objectives.representation, self.last_tune_result[1], self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + ) + else: # pragma: no cover + last_tune_msg = "n/a" + for name in self.tune_data.keys() - {"baseline"}: if len(self.tune_data[name]) == 1: - self.tune_data[name].append('n/a') + self.tune_data[name].append("n/a") else: - self.tune_data[name][1] = 'n/a' + self.tune_data[name][1] = "n/a" if self.best_tune_result: - best_tune = self.best_tune_result[0] if isinstance(self.best_tune_result[0], list) \ - else [self.best_tune_result[0]] + best_tune = ( + self.best_tune_result[0] if isinstance(self.best_tune_result[0], list) else [self.best_tune_result[0]] + ) for name, data in zip(self.metric_name, best_tune): if len(self.tune_data[name]) == 2: @@ -1113,49 +1186,69 @@ def stop(self, timeout, trials_count): if self.metric_weight and len(best_tune) > 1: weighted_acc = np.mean(np.array(best_tune) * self.metric_weight) - if len(self.tune_data['Weighted accuracy']) == 2: - self.tune_data['Weighted accuracy'].append(weighted_acc) - else: # pragma: no cover - self.tune_data['Weighted accuracy'][2] = weighted_acc + if len(self.tune_data["Weighted accuracy"]) == 2: + self.tune_data["Weighted accuracy"].append(weighted_acc) + else: # pragma: no cover + self.tune_data["Weighted accuracy"][2] = weighted_acc best_tune = [weighted_acc] - best_tune_msg = '[Accuracy:' + ''.join([' {:.4f}'.format(best) \ - for best in best_tune]) + ''.join([', {}: {:.4f}'.format(x,y) \ - for x,y in zip(self.objectives.representation, \ - self.best_tune_result[1]) if x != 'Accuracy']) + ']' + best_tune_msg = ( + "[Accuracy:" + + "".join([" {:.4f}".format(best) for best in best_tune]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.best_tune_result[1]) + if x != "Accuracy" + ] + ) + + "]" + ) else: - best_tune_msg = 'n/a' - for name in self.tune_data.keys() - {'baseline'}: + best_tune_msg = "n/a" + for name in self.tune_data.keys() - {"baseline"}: if len(self.tune_data[name]) == 2: - self.tune_data[name].append('n/a') + self.tune_data[name].append("n/a") else: - self.tune_data[name][2] = 'n/a' - - logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, - last_tune_msg, - best_tune_msg)) - output_data = [[info_type, - '{:.4f} '.format(self.tune_data[info_type][0]) if \ - not isinstance(self.tune_data[info_type][0], str) else self.tune_data[info_type][0], - '{:.4f} '.format(self.tune_data[info_type][1]) if \ - not isinstance(self.tune_data[info_type][1], str) else self.tune_data[info_type][1], - '{:.4f} '.format(self.tune_data[info_type][2]) if \ - not isinstance(self.tune_data[info_type][2], str) else self.tune_data[info_type][2]] \ - for info_type in self.tune_data.keys() if info_type != 'baseline'] - - output_data.extend([[obj, - '{:.4f} '.format(self.baseline[1][i]) if self.baseline else 'n/a', - '{:.4f} '.format(self.last_tune_result[1][i]) if self.last_tune_result else 'n/a', - '{:.4f} '.format(self.best_tune_result[1][i]) if self.best_tune_result else 'n/a'] \ - for i, obj in enumerate(self.objectives.representation)]) + self.tune_data[name][2] = "n/a" + + logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, last_tune_msg, best_tune_msg)) + output_data = [ + [ + info_type, + "{:.4f} ".format(self.tune_data[info_type][0]) + if not isinstance(self.tune_data[info_type][0], str) + else self.tune_data[info_type][0], + "{:.4f} ".format(self.tune_data[info_type][1]) + if not isinstance(self.tune_data[info_type][1], str) + else self.tune_data[info_type][1], + "{:.4f} ".format(self.tune_data[info_type][2]) + if not isinstance(self.tune_data[info_type][2], str) + else self.tune_data[info_type][2], + ] + for info_type in self.tune_data.keys() + if info_type != "baseline" + ] + + output_data.extend( + [ + [ + obj, + "{:.4f} ".format(self.baseline[1][i]) if self.baseline else "n/a", + "{:.4f} ".format(self.last_tune_result[1][i]) if self.last_tune_result else "n/a", + "{:.4f} ".format(self.best_tune_result[1][i]) if self.best_tune_result else "n/a", + ] + for i, obj in enumerate(self.objectives.representation) + ] + ) self.tuning_result_data = output_data - Statistics(output_data, - header='Tune Result Statistics', - field_names=['Info Type', 'Baseline', 'Tune {} result'.format(trials_count), \ - 'Best tune result']).print_stat() - + Statistics( + output_data, + header="Tune Result Statistics", + field_names=["Info Type", "Baseline", "Tune {} result".format(trials_count), "Best tune result"], + ).print_stat() if self.cfg.tuning.exit_policy.performance_only: need_stop = True @@ -1186,9 +1279,9 @@ def _find_tuning_history(self, tune_cfg): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same yam config, excluding # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - if self._same_yaml(tuning_history['cfg'], self.cfg): - for history in tuning_history['history']: - if history and history['tune_cfg'] == tune_cfg: + if self._same_yaml(tuning_history["cfg"], self.cfg): + for history in tuning_history["history"]: + if history and history["tune_cfg"] == tune_cfg: return tuning_history return None @@ -1202,9 +1295,9 @@ def _find_history(self, tune_cfg): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same yam config, excluding # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - if self._same_yaml(tuning_history['cfg'], self.cfg): - for history in tuning_history['history']: - if history and history['tune_cfg'] == tune_cfg: + if self._same_yaml(tuning_history["cfg"], self.cfg): + for history in tuning_history["history"]: + if history and history["tune_cfg"] == tune_cfg: return history return None @@ -1217,7 +1310,7 @@ def _find_self_tuning_history(self): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same yam config, excluding # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - if self._same_yaml(tuning_history['cfg'], self.cfg): + if self._same_yaml(tuning_history["cfg"], self.cfg): return tuning_history return None @@ -1228,28 +1321,28 @@ def _add_tuning_history(self, tune_cfg=None, tune_result=None, **kwargs): Note this record is added under same yaml config. """ found = False - d = {'tune_cfg': tune_cfg, 'tune_result': tune_result} + d = {"tune_cfg": tune_cfg, "tune_result": tune_result} for tuning_history in self.tuning_history: - if self._same_yaml(tuning_history['cfg'], self.cfg): + if self._same_yaml(tuning_history["cfg"], self.cfg): d.update(kwargs) - tuning_history['history'].append(d) - tuning_history['last_tune_result'] = self.last_tune_result - tuning_history['best_tune_result'] = self.best_tune_result - tuning_history['cfg'] = self.cfg + tuning_history["history"].append(d) + tuning_history["last_tune_result"] = self.last_tune_result + tuning_history["best_tune_result"] = self.best_tune_result + tuning_history["cfg"] = self.cfg found = True break if not found: tuning_history = {} - tuning_history['version'] = __version__ - tuning_history['cfg'] = self.cfg - tuning_history['baseline'] = self.baseline - tuning_history['last_tune_result'] = self.last_tune_result - tuning_history['best_tune_result'] = self.best_tune_result - tuning_history['history'] = [] + tuning_history["version"] = __version__ + tuning_history["cfg"] = self.cfg + tuning_history["baseline"] = self.baseline + tuning_history["last_tune_result"] = self.last_tune_result + tuning_history["best_tune_result"] = self.best_tune_result + tuning_history["history"] = [] if tune_cfg and tune_result: d.update(kwargs) - tuning_history['history'].append(d) + tuning_history["history"].append(d) self.tuning_history.append(tuning_history) self._save() @@ -1263,37 +1356,41 @@ def _collect_ops_by_quant_mode(self, tune_cfg, quant_mode): def _diagnosis(self): import logging + logger = logging.getLogger("neural_compressor") iteration_list = [1] - inspect_type = 'all' + inspect_type = "all" save_to_disk = True - save_path = os.path.join(options.workspace, 'inspect_saved') - inspect_node_lst, updated_cfg = self.adaptor.diagnosis_helper(self._fp32_model, - self.last_qmodel, - self.tune_cfg, - save_path = save_path) + save_path = os.path.join(options.workspace, "inspect_saved") + inspect_node_lst, updated_cfg = self.adaptor.diagnosis_helper( + self._fp32_model, self.last_qmodel, self.tune_cfg, save_path=save_path + ) op_list = [] if not op_list: op_list = list(inspect_node_lst) else: op_list = list(set(op_list).intersection(inspect_node_lst)) - logger.debug(f'*** Start to inspect tensor :{op_list} in fp32 model.') - self.adaptor.inspect_tensor(self._fp32_model, - dataloader=self.calib_dataloader, - op_list=op_list, - iteration_list=iteration_list, - inspect_type=inspect_type, - save_to_disk=save_to_disk, - save_path= save_path + '/fp32/', - quantization_cfg=updated_cfg) - - logger.debug(f'*** Start to inspect tensor :{op_list} in quantized model.') - self.adaptor.inspect_tensor(self.last_qmodel, - dataloader=self.calib_dataloader, - op_list=op_list, - iteration_list=iteration_list, - inspect_type=inspect_type, - save_to_disk=save_to_disk, - save_path= save_path + '/quan/', - quantization_cfg=updated_cfg) + logger.debug(f"*** Start to inspect tensor :{op_list} in fp32 model.") + self.adaptor.inspect_tensor( + self._fp32_model, + dataloader=self.calib_dataloader, + op_list=op_list, + iteration_list=iteration_list, + inspect_type=inspect_type, + save_to_disk=save_to_disk, + save_path=save_path + "/fp32/", + quantization_cfg=updated_cfg, + ) + + logger.debug(f"*** Start to inspect tensor :{op_list} in quantized model.") + self.adaptor.inspect_tensor( + self.last_qmodel, + dataloader=self.calib_dataloader, + op_list=op_list, + iteration_list=iteration_list, + inspect_type=inspect_type, + save_to_disk=save_to_disk, + save_path=save_path + "/quan/", + quantization_cfg=updated_cfg, + ) diff --git a/neural_compressor/experimental/strategy/utils/constant.py b/neural_compressor/experimental/strategy/utils/constant.py index 9cbeaa00859..44ac03e79be 100644 --- a/neural_compressor/experimental/strategy/utils/constant.py +++ b/neural_compressor/experimental/strategy/utils/constant.py @@ -14,22 +14,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Strategy constant.""" -PRECISION_SET = {'bf16', 'fp16' , 'fp32',} -QUANT_MODE_SET = {'static', 'dynamic'} -QUNAT_BIT_SET = {'int8', 'uint8', 'int4', 'uint4'} +PRECISION_SET = { + "bf16", + "fp16", + "fp32", +} +QUANT_MODE_SET = {"static", "dynamic"} +QUNAT_BIT_SET = {"int8", "uint8", "int4", "uint4"} -TUNING_ITEMS_LST = [('activation','scheme'), ('activation','algorithm'), ('activation','granularity'), - ('weight','scheme'), ('weight','algorithm'), ('weight','granularity'), 'sampling_size'] +TUNING_ITEMS_LST = [ + ("activation", "scheme"), + ("activation", "algorithm"), + ("activation", "granularity"), + ("weight", "scheme"), + ("weight", "algorithm"), + ("weight", "granularity"), + "sampling_size", +] -PRECISION_SET_V2_0 = {'fp32', 'bf16'} +PRECISION_SET_V2_0 = {"fp32", "bf16"} -auto_query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] -static_query_order = ['static', 'bf16', 'fp16', 'fp32'] -dynamic_query_order = ['dynamic', 'bf16', 'fp16', 'fp32'] +auto_query_order = ["static", "dynamic", "bf16", "fp16", "fp32"] +static_query_order = ["static", "bf16", "fp16", "fp32"] +dynamic_query_order = ["dynamic", "bf16", "fp16", "fp32"] -FALLBACK_RECIPES_SET = {'first_conv_or_matmul_quantization', 'last_conv_or_matmul_quantization' \ - 'pre_post_process_quantization'} \ No newline at end of file +FALLBACK_RECIPES_SET = { + "first_conv_or_matmul_quantization", + "last_conv_or_matmul_quantization" "pre_post_process_quantization", +} diff --git a/neural_compressor/experimental/strategy/utils/tuning_sampler.py b/neural_compressor/experimental/strategy/utils/tuning_sampler.py index 63984f600dd..80e6c348c6c 100644 --- a/neural_compressor/experimental/strategy/utils/tuning_sampler.py +++ b/neural_compressor/experimental/strategy/utils/tuning_sampler.py @@ -17,25 +17,31 @@ """Tuning sampler.""" -from itertools import product import copy -from collections import deque, OrderedDict, defaultdict -from typing import List, Dict, Any +from collections import OrderedDict, defaultdict, deque +from itertools import product +from typing import Any, Dict, List + +from ....utils import logger from .tuning_space import TuningSpace, pattern_to_internal, pattern_to_path, quant_mode_from_pattern from .tuning_structs import OpTuningConfig -from ....utils import logger - -TUNING_ITEM_PRIORITY = [('activation','scheme'), ('activation','algorithm'),('activation','granularity'), - ('activation','compute_dtype'), ('weight','scheme'), ('weight','algorithm'), \ - ('weight','granularity')] +TUNING_ITEM_PRIORITY = [ + ("activation", "scheme"), + ("activation", "algorithm"), + ("activation", "granularity"), + ("activation", "compute_dtype"), + ("weight", "scheme"), + ("weight", "algorithm"), + ("weight", "granularity"), +] class TuningSamplerRegistry: """Class decorator used to register all TuningSampler subclasses.""" - + sampler_dict = {} - + @classmethod def register(cls, name): """Register new tuning sampler. @@ -43,11 +49,14 @@ def register(cls, name): Args: name: the name of new tuning sampler. """ + def decorator(sampler): assert name not in cls.sampler_dict, "Cannot have two sampler with the same name." cls.sampler_dict[name] = sampler + return decorator + class TuningOrder: """Not displayed in API Docs.""" @@ -58,15 +67,17 @@ def __init__(self): class TuningSampler: """Not displayed in API Docs. - + Basic class of tuning sampler. """ - def __init__(self, - tuning_space: TuningSpace, - tuning_order_lst: List[TuningOrder], - initial_op_tuning_cfg: Dict, - kwargs: Dict = {}): + def __init__( + self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict, + kwargs: Dict = {}, + ): """Init tuning sampler. Args: @@ -85,61 +96,64 @@ def __init__(self, def __iter__(self, tune_cfg=None): """Interface for generate the next tuning config.""" pass - + def _set_dtype(self, op_name_type, config_args): - has_weight = op_name_type in self.tuning_space.ops_attr['weight'] - path = self.op_complete_path[op_name_type].get('activation', None) - config_args['activation_dtype'] = self.tuning_space.ops_data_type[op_name_type][path] + has_weight = op_name_type in self.tuning_space.ops_attr["weight"] + path = self.op_complete_path[op_name_type].get("activation", None) + config_args["activation_dtype"] = self.tuning_space.ops_data_type[op_name_type][path] if has_weight: - path = self.op_complete_path[op_name_type].get('weight', None) - config_args['weight_dtype'] = self.tuning_space.ops_data_type[op_name_type][path] - + path = self.op_complete_path[op_name_type].get("weight", None) + config_args["weight_dtype"] = self.tuning_space.ops_data_type[op_name_type][path] + class ModelWiseTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_items_priority: List[str], - tuning_order_lst: List[TuningOrder], - op_dtype_dict: Dict[tuple, str], - initial_op_tuning_cfg: Dict[tuple, OpTuningConfig]): + def __init__( + self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict[tuple, OpTuningConfig], + ): """Model type wise tuning sampler. step1. create a default tuning config for each op step2. collect all tuning items and options, and build the model-wise traverse order - step3. yield the tuning item with option one by one, query the existence of tuning item + step3. yield the tuning item with option one by one, query the existence of tuning item and specific option for one op if exist, use the default tuning config if not exist - + Args: tuning_space: Tuning space. tuning_items_priority: The priority to traverse the tuning items. tuning_order_lst: The tuning orders. op_dtype_dict: The (op name, op type) and its target data type. initial_op_tuning_cfg: The initial tuning config. - """ super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) self.op_dtype_dict = op_dtype_dict self.tuning_space = tuning_space self.default_op_config = {} - tuning_items = defaultdict(set) # item name: options + tuning_items = defaultdict(set) # item name: options for op_name_type, quant_mode in op_dtype_dict.items(): full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, quant_mode) self.op_complete_path[op_name_type] = copy.deepcopy(full_path) # step1, set the default config for each op self.default_op_config[op_name_type] = tuning_space.get_default_config(op_name_type, quant_mode) - if quant_mode[0] == 'precision': continue - mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method + if quant_mode[0] == "precision": + continue + mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method # step2, collect all tuning items and their options for att in mode_items: - if att not in full_path: continue - quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + if att not in full_path: + continue + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type, full_path[att]) for tuning_item in quant_mode_item.options: tuning_items[tuning_item.name] = tuning_items[tuning_item.name].union(tuning_item.options) self.tuning_items = tuning_items - + def __iter__(self): """Yield the next tuning config. @@ -151,39 +165,41 @@ def __iter__(self): # traverse all possible combinations by model-wise level tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) for op_name_type, quant_mode in self.op_dtype_dict.items(): - if quant_mode[0] == 'precision': continue + if quant_mode[0] == "precision": + continue all_exist_flag = True for method_name, method_val in zip(keys, vals): full_path = self.op_complete_path[op_name_type] - if method_name[0] not in full_path: continue - if not self.tuning_space.query_item_option(op_name_type, - full_path[method_name[0]], - method_name, method_val): + if method_name[0] not in full_path: + continue + if not self.tuning_space.query_item_option( + op_name_type, full_path[method_name[0]], method_name, method_val + ): all_exist_flag = False tune_cfg[op_name_type] = self.default_op_config[op_name_type] break if all_exist_flag: config_args = dict(zip(keys, vals)) - self._set_dtype( op_name_type, config_args) + self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(quant_mode) quant_mode = quant_mode_from_pattern(internal_pattern) - tune_cfg[op_name_type] = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - self.tuning_space, - kwargs=config_args) + tune_cfg[op_name_type] = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) yield tune_cfg - + class OpTypeWiseTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_items_priority: List[str], - tuning_order_lst: List[TuningOrder], - op_dtype_dict: Dict[tuple, str], - initial_op_tuning_cfg: Dict[tuple, OpTuningConfig]): + def __init__( + self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict[tuple, OpTuningConfig], + ): """Op type wise tuning sampler. Args: @@ -197,27 +213,28 @@ def __init__(self, tuning_items_priority = TUNING_ITEM_PRIORITY # (op_type, quant_mode) : {tuning_item_name : [option1, option2]} # {('activation', 'scheme'): ['sym', 'sym'], ('activation', 'algorithm'): ['minmax', 'kl', 'minmax', 'kl']} - + self.optype_quant_mode_option = {} self.optype_quant_mode_items_name = defaultdict(list) self.op_type_quant_mode_wise_combination = {} self.op_dtype_dict = op_dtype_dict self.default_op_config = {} - + for op_name_type, quant_mode in op_dtype_dict.items(): full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, quant_mode) self.op_complete_path[op_name_type] = copy.deepcopy(full_path) self.default_op_config[op_name_type] = self.tuning_space.get_default_config(op_name_type, quant_mode) op_name, op_type = op_name_type - if quant_mode[0] == 'precision': continue - mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method + if quant_mode[0] == "precision": + continue + mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method op_type_quant_mode = (op_type, quant_mode) filtered_tuning_items = [] for item_name in tuning_items_priority: att, method_name = item_name if att not in mode_items: continue - quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type, full_path[att]) item = quant_mode_item.get_option_by_name(item_name) if item: if op_type_quant_mode not in self.optype_quant_mode_option: @@ -247,41 +264,40 @@ def __iter__(self): for index, op_type_quant_mode in enumerate(self.op_type_quant_mode_wise_combination.keys()): for op_name_type, quant_mode in self.op_dtype_dict.items(): if op_name_type[1] == op_type_quant_mode[0] and quant_mode == op_type_quant_mode[1]: - op_tuning_items = [item.name for item in \ - self.optype_quant_mode_items_name[op_type_quant_mode]] + op_tuning_items = [item.name for item in self.optype_quant_mode_items_name[op_type_quant_mode]] op_tuning_item_vals = options_lst[index] all_exist_flag = True for method_name, method_val in zip(op_tuning_items, op_tuning_item_vals): full_path = self.op_complete_path[op_name_type] - if not self.tuning_space.query_item_option(op_name_type, - full_path[method_name[0]], - method_name, - method_val): + if not self.tuning_space.query_item_option( + op_name_type, full_path[method_name[0]], method_name, method_val + ): all_exist_flag = False op_tuning_config = self.default_op_config[op_name_type] break if all_exist_flag: config_args = dict(zip(op_tuning_items, op_tuning_item_vals)) - self._set_dtype( op_name_type, config_args) + self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(quant_mode) quant_mode = quant_mode_from_pattern(internal_pattern) - op_tuning_config = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - self.tuning_space, - kwargs=config_args) + op_tuning_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) new_tune_cfg.update({op_name_type: op_tuning_config}) yield new_tune_cfg + class OpWiseTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_items_priority: List[str], - tuning_order_lst: List[TuningOrder], - op_dtype_dict: Dict[tuple, str], - initial_op_tuning_cfg: Dict): + def __init__( + self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict, + ): """Op wise tuning config sampler. Args: @@ -303,13 +319,14 @@ def __init__(self, mode_items = copy.deepcopy(full_path) internal_pattern = pattern_to_internal(op_quant_mode) op_quant_mode = quant_mode_from_pattern(internal_pattern) - if internal_pattern[0] == 'precision': continue + if internal_pattern[0] == "precision": + continue filtered_tuning_items = [] for item_name in tuning_items_priority: att, method_name = item_name if att not in mode_items: continue - quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type, full_path[att]) item = quant_mode_item.get_option_by_name(item_name) if item: filtered_tuning_items.append(item) @@ -333,12 +350,12 @@ def __iter__(self): self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(op_quant_mode) quant_mode = quant_mode_from_pattern(internal_pattern) - op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode, self.tuning_space, - kwargs=config_args) + op_tuning_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) new_tune_cfg.update({op_name_type: op_tuning_config}) yield new_tune_cfg - + def get_opwise_candidate(self): """Collect all op-wise setting. @@ -354,12 +371,12 @@ def get_opwise_candidate(self): self.op_complete_path[op_name_type] = copy.deepcopy(full_path) op_wise_configs[op_name_type] = [] # For precision - if internal_pattern[0] == 'precision': + if internal_pattern[0] == "precision": config_args = {} self._set_dtype(op_name_type, config_args) - op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode, self.tuning_space, - kwargs=config_args) + op_tuning_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) op_wise_configs[op_name_type].append(op_tuning_config) continue # For quantization @@ -368,24 +385,26 @@ def get_opwise_candidate(self): for op_tuning_item_vals in op_options: config_args = dict(zip(op_tuning_items, op_tuning_item_vals)) - self._set_dtype( op_name_type, config_args) - op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode, self.tuning_space, - kwargs=config_args) + self._set_dtype(op_name_type, config_args) + op_tuning_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) op_wise_configs[op_name_type].append(op_tuning_config) return op_wise_configs + class FallbackTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_order_lst: List[TuningOrder], - initial_op_tuning_cfg: Dict[tuple, Any], - op_dtypes: Dict[str, str], - accumulate: bool, - skip_first: bool = True - ): + def __init__( + self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict[tuple, Any], + op_dtypes: Dict[str, str], + accumulate: bool, + skip_first: bool = True, + ): """Sampler for generate the tuning config of fallback stage. Args: @@ -419,9 +438,9 @@ def __iter__(self): self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(target_dtype) quant_mode = quant_mode_from_pattern(internal_pattern) - new_op_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode, self.tuning_space, - kwargs=config_args) + new_op_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) new_tune_cfg.update({op_name_type: new_op_config}) if self.accumulate and skip_first: # skip the first one @@ -430,22 +449,25 @@ def __iter__(self): logger.debug(f"fallback {op_name_type} to {target_dtype}") yield new_tune_cfg # need to skip the first one + @TuningSamplerRegistry.register("smooth_quant") class SmoothQuantSampler(TuningSampler): """Sampler for the hyperparameter tuning of smooth quantization.""" - - def __init__(self, - tuning_space: TuningSpace, - tuning_order_lst: List[TuningOrder], - initial_op_tuning_cfg: Dict, - kwargs: Dict ={}): + + def __init__( + self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict, + kwargs: Dict = {}, + ): """Initialize the sampler.""" super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg, kwargs) # TODO use the alpha list specified by user self._kwargs = kwargs self._alpha_lst = [0.5] - if kwargs.get('smooth_quant_agrs', {}): - self._alpha_lst = kwargs['smooth_quant_agrs'].get('alpha_lst', [0.5]) + if kwargs.get("smooth_quant_agrs", {}): + self._alpha_lst = kwargs["smooth_quant_agrs"].get("alpha_lst", [0.5]) def __iter__(self, tune_cfg=None) -> OpTuningConfig: """Yield the next tuning config with update alpha. @@ -455,9 +477,9 @@ def __iter__(self, tune_cfg=None) -> OpTuningConfig: """ for alpha in self._alpha_lst: new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) if not tune_cfg else copy.deepcopy(tune_cfg) - sq_args = {'smooth_quant': True, 'smooth_quant_args': {'alpha': alpha}} - if 'recipe_cfgs' not in new_tune_cfg: - new_tune_cfg['recipe_cfgs'] = sq_args + sq_args = {"smooth_quant": True, "smooth_quant_args": {"alpha": alpha}} + if "recipe_cfgs" not in new_tune_cfg: + new_tune_cfg["recipe_cfgs"] = sq_args else: - new_tune_cfg['recipe_cfgs'].update(sq_args) - yield new_tune_cfg \ No newline at end of file + new_tune_cfg["recipe_cfgs"].update(sq_args) + yield new_tune_cfg diff --git a/neural_compressor/experimental/strategy/utils/tuning_space.py b/neural_compressor/experimental/strategy/utils/tuning_space.py index ef9698dc53f..55fa63f45fa 100644 --- a/neural_compressor/experimental/strategy/utils/tuning_space.py +++ b/neural_compressor/experimental/strategy/utils/tuning_space.py @@ -17,20 +17,21 @@ """Tuning space.""" -from collections import defaultdict, OrderedDict import os import re -from typing import Dict, Tuple +from collections import OrderedDict, defaultdict from copy import deepcopy +from typing import Dict, Tuple + from ....utils import logger -from .utility import OrderedDefaultDict +from .constant import TUNING_ITEMS_LST from .tuning_structs import OpTuningConfig +from .utility import OrderedDefaultDict -from .constant import TUNING_ITEMS_LST class TuningItem: """Not displayed in API Docs.""" - + def __init__(self, name, options=[], item_type=None): """Init the tuning item. @@ -51,7 +52,7 @@ def options(self): All options. """ return self._options - + def get_options_name(self): """Return the name list of the options.""" return [o.name for o in self.options] @@ -89,14 +90,14 @@ def get_option_by_name(self, option_name): def get_details(self, depth=0): """Get the tuning item and its options recursively. - + Args: depth: recursion depth. Defaults to 0. Returns: The tuning item and its options as a string. """ - details = ['\t' * depth + f"{self.name}, {self.item_type}"] + details = ["\t" * depth + f"{self.name}, {self.item_type}"] for option in self.options: if isinstance(option, int) or isinstance(option, str): details.append("\t" * depth + str(option)) @@ -107,11 +108,10 @@ def get_details(self, depth=0): class TuningSpace: """Not displayed in API Docs. - - 1) capability -> internal format -> merge -> tuning space (tree) + 1) capability -> internal format -> merge -> tuning space (tree) """ - + def __init__(self, capability, conf, framework=None): """Init the tuning space. @@ -122,7 +122,7 @@ def __init__(self, capability, conf, framework=None): """ self.capability = capability self.conf = conf - self.root_item = TuningItem(name='root', options=[], item_type='root') + self.root_item = TuningItem(name="root", options=[], item_type="root") self.quant_mode_wise_items = defaultdict(list) # quant_mode/precision_name: {(op_name, op_type),...} self.op_type_wise_items = defaultdict(list) # op_type: {(op_name, op_type), ...} self.framework = framework @@ -131,49 +131,52 @@ def __init__(self, capability, conf, framework=None): self.op_items = {} # {(op_name, op_type): {(path): data type}} self.ops_data_type = OrderedDefaultDict() - self.ops_attr = {'activation': set(), 'weight': set()} + self.ops_attr = {"activation": set(), "weight": set()} # {(op_name, op_type): {path1, path2, ...} self.ops_path_set = defaultdict(set) - + self._create_tuning_space(capability, usr_cfg) - + def _parse_capability(self, capability: Dict) -> None: """Parse the capability and construct the tuning space(a tree). Args: capability: merged framework capability. """ - calib = TuningItem(name='calib_sampling_size', - options=capability['calib']['calib_sampling_size'], - item_type='calib_sampling_size') + calib = TuningItem( + name="calib_sampling_size", + options=capability["calib"]["calib_sampling_size"], + item_type="calib_sampling_size", + ) self.root_item.append(calib) + def _parse(cap, root, path, op_name_type): if isinstance(cap, dict): for key, val in cap.items(): if isinstance(val, dict): - if len(path) > 1 and path[-2] == 'precision': + if len(path) > 1 and path[-2] == "precision": self.ops_path_set[op_name_type].add(tuple(path + [key])) tuning_item = TuningItem(name=key, options=[], item_type=key) root.append(tuning_item) _parse(val, tuning_item, path + [key], op_name_type) elif isinstance(val, list): - new_key = ('activation', key) if 'activation' in path else ('weight', key) - tuning_item = TuningItem(name=new_key, options=val, item_type='method') + new_key = ("activation", key) if "activation" in path else ("weight", key) + tuning_item = TuningItem(name=new_key, options=val, item_type="method") self.ops_path_set[op_name_type].add(tuple(path)) root.append(tuning_item) else: return - for op_name_type, op_cap in capability['op'].items(): + for op_name_type, op_cap in capability["op"].items(): op_name, op_type = op_name_type - op_item = TuningItem(name=op_name_type, options=[], item_type='op') + op_item = TuningItem(name=op_name_type, options=[], item_type="op") self.op_type_wise_items[op_type].append(op_item) self.root_item.append(op_item) self.op_items[op_name_type] = op_item _parse(op_cap, op_item, [], op_name_type) for q_option in op_item.options: - if q_option and q_option.name == 'precision': - acc_item = q_option.get_option_by_name('activation') + if q_option and q_option.name == "precision": + acc_item = q_option.get_option_by_name("activation") if acc_item and acc_item.options: for dtype_item in acc_item.options: self.quant_mode_wise_items[dtype_item.name].append(op_item) @@ -182,7 +185,7 @@ def _parse(cap, root, path, op_name_type): def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): """Merge the op cfg with user cfg. - + op_user_cfg:{ 'activation':{ 'dtype': ['fp32'] @@ -191,92 +194,99 @@ def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): 'dtype': ['fp32'] } } - + Step1. merge dtype, get the intersection between fw_op_cap and op_user_cfg. Step2. merge method options. - + # if dtype and type intersection with precision set -> only keep the intersection precision # and remove the quantization. # else(no dtype, or no intersection) -> merge the method Args: cur_op_cap: current capability. - op_user_cfg: The user capability. + op_user_cfg: The user capability. fw_op_cap: The fwk capability(baseline). - + Returns: Return the merged capability. """ from .utility import extract_data_type, reverted_data_type + fw_op_cap = deepcopy(fw_op_cap) new_op_cap = deepcopy(cur_op_cap) - for att in ['activation', 'weight']: + for att in ["activation", "weight"]: if op_user_cfg.get(att, None) is not None: - user_dtype_lst = op_user_cfg[att]['dtype'] if op_user_cfg[att]['dtype'] is not None else [] + user_dtype_lst = op_user_cfg[att]["dtype"] if op_user_cfg[att]["dtype"] is not None else [] # Merge the precision part. - fwk_att_precision_cap = fw_op_cap['precision'].get(att, {}) + fwk_att_precision_cap = fw_op_cap["precision"].get(att, {}) fwk_precision_set = set(fwk_att_precision_cap.keys()) # The intersection of user cfg and fwk capability. valid_precision_set = set(fwk_precision_set).intersection(set(user_dtype_lst)) if len(valid_precision_set) != 0: - new_op_cap = dict(filter(lambda item: item[0] == 'precision', new_op_cap.items())) - new_op_cap['precision'][att] = dict(filter(lambda item: item[0] in valid_precision_set,\ - fw_op_cap['precision'][att].items())) + new_op_cap = dict(filter(lambda item: item[0] == "precision", new_op_cap.items())) + new_op_cap["precision"][att] = dict( + filter(lambda item: item[0] in valid_precision_set, fw_op_cap["precision"][att].items()) + ) else: # Filter the valid options for tuning item for quant_mode in fw_op_cap: if quant_mode not in new_op_cap: new_op_cap[quant_mode] = deepcopy(fw_op_cap[quant_mode]) - if quant_mode == 'precision': continue + if quant_mode == "precision": + continue for data_type in new_op_cap[quant_mode][att]: for signed_flag in new_op_cap[quant_mode][att][data_type]: cur_items = new_op_cap[quant_mode][att][data_type][signed_flag] fwk_items = fw_op_cap[quant_mode][att][data_type][signed_flag] for method_name, method_options in op_user_cfg[att].items(): - if method_name not in ['dtype', 'quant_mode'] and method_options: + if method_name not in ["dtype", "quant_mode"] and method_options: # filter the method options - options_intersection = set(fwk_items[method_name]\ - ).intersection(set(method_options)) + options_intersection = set(fwk_items[method_name]).intersection( + set(method_options) + ) # merge with fwk, if intersection -> use intersection if len(options_intersection) > 0: - cur_items[method_name] = [option for option in fwk_items[method_name] if\ - option in options_intersection] + cur_items[method_name] = [ + option + for option in fwk_items[method_name] + if option in options_intersection + ] return new_op_cap def _merge_optype_wise_cfg(self, cap: Dict, optype_wise_usr_cfg: Dict, fw_cap: Dict): for op_type, op_user_cfg in optype_wise_usr_cfg.items(): op_type_pattern = re.compile(op_type) - op_lst = [op_name_type for op_name_type in cap['op'] if op_type_pattern.fullmatch(op_name_type[1])] + op_lst = [op_name_type for op_name_type in cap["op"] if op_type_pattern.fullmatch(op_name_type[1])] for op_name_type in op_lst: - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], - op_user_cfg, - fw_cap['op'][op_name_type]) + cap["op"][op_name_type] = self._merge_op_cfg( + cap["op"][op_name_type], op_user_cfg, fw_cap["op"][op_name_type] + ) def _merge_model_wise_cfg(self, cap: Dict, model_wise_usr_cfg: Dict, fw_cap: Dict): - for op_name_type in cap['op'].keys(): - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], - model_wise_usr_cfg, - fw_cap['op'][op_name_type]) + for op_name_type in cap["op"].keys(): + cap["op"][op_name_type] = self._merge_op_cfg( + cap["op"][op_name_type], model_wise_usr_cfg, fw_cap["op"][op_name_type] + ) def _merge_op_wise_cfg(self, cap: Dict, op_wise_usr_cfg: Dict, fw_cap: Dict): - op_name_types = {key[0]: key for key in cap['op'].keys()} + op_name_types = {key[0]: key for key in cap["op"].keys()} for op_name_pattern, op_user_cfg in op_wise_usr_cfg.items(): op_name_pattern = re.compile(op_name_pattern) for op_name in op_name_types: if op_name_pattern.fullmatch(op_name): op_name_type = op_name_types[op_name] - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], - op_user_cfg, - fw_cap['op'][op_name_type]) - + cap["op"][op_name_type] = self._merge_op_cfg( + cap["op"][op_name_type], op_user_cfg, fw_cap["op"][op_name_type] + ) + def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): """Merge the capability with user config. - + Merge the capability queried from the adaptor with user config in the order of model-wise, optype-wise, and op-wise if needed. The optype-wise user config will override the model-wise user config for their intersection parts, the same as the op-wise and optype-wise. - + Here is an example: capability:{ ('op1','type1'): { @@ -296,7 +306,7 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): 'item2': [item2_option1, item2_option2], } } - + user_config{ model-wise:{ 'item1': [item1_option1] @@ -375,16 +385,16 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): :return: """ fw_capability = deepcopy(capability) - if user_cfg['model_wise'] is not None: - self._merge_model_wise_cfg(capability, user_cfg['model_wise'], fw_capability) - if user_cfg['optype_wise'] is not None: - self._merge_optype_wise_cfg(capability, user_cfg['optype_wise'], fw_capability) - if user_cfg['op_wise'] is not None: - self._merge_op_wise_cfg(capability, user_cfg['op_wise'], fw_capability) - + if user_cfg["model_wise"] is not None: + self._merge_model_wise_cfg(capability, user_cfg["model_wise"], fw_capability) + if user_cfg["optype_wise"] is not None: + self._merge_optype_wise_cfg(capability, user_cfg["optype_wise"], fw_capability) + if user_cfg["op_wise"] is not None: + self._merge_op_wise_cfg(capability, user_cfg["op_wise"], fw_capability) + def _parse_cap_helper(self, cap): """Convert the cpa to internal format. - + Parsed result: (op_name, op_type): { @@ -436,45 +446,48 @@ def _parse_cap_helper(self, cap): } """ from .utility import OrderedDefaultDict, extract_data_type + cap = deepcopy(cap) - parsed_cap = OrderedDict() # {(op_name, op_type): parsed_op_cap} + parsed_cap = OrderedDict() # {(op_name, op_type): parsed_op_cap} for op_name_type, op_cap_lst in cap.items(): - parsed_op_cap = OrderedDefaultDict() # {ptq_type/precision, {}} - parsed_op_cap['precision'] = OrderedDefaultDict() + parsed_op_cap = OrderedDefaultDict() # {ptq_type/precision, {}} + parsed_op_cap["precision"] = OrderedDefaultDict() # WA for some op have extra weight dtype. - has_weight = all(['weight' in op_cap for op_cap in op_cap_lst]) - if has_weight: self.ops_attr['weight'].add(op_name_type) + has_weight = all(["weight" in op_cap for op_cap in op_cap_lst]) + if has_weight: + self.ops_attr["weight"].add(op_name_type) for op_cap in op_cap_lst: - if 'activation' in op_cap: - self.ops_attr['activation'].add(op_name_type) - attrs_lst = ['activation', 'weight'] if has_weight else ['activation'] + if "activation" in op_cap: + self.ops_attr["activation"].add(op_name_type) + attrs_lst = ["activation", "weight"] if has_weight else ["activation"] for att in attrs_lst: # Parse the data info for item that has options. - if 'activation' in op_cap and 'quant_mode' in op_cap['activation']: - quant_mode = op_cap['activation']['quant_mode'] - att_dtype = op_cap[att]['dtype'][0] + if "activation" in op_cap and "quant_mode" in op_cap["activation"]: + quant_mode = op_cap["activation"]["quant_mode"] + att_dtype = op_cap[att]["dtype"][0] signed_flag, _data_type = extract_data_type(att_dtype) for item_name, item_options in op_cap[att].items(): - if item_name == 'dtype': + if item_name == "dtype": # The dtype should be a string, need to align with fwk.yaml. - self.ops_data_type[op_name_type][(quant_mode, att, _data_type, signed_flag)] = \ + self.ops_data_type[op_name_type][(quant_mode, att, _data_type, signed_flag)] = ( item_options[0] if isinstance(item_options, list) else item_options - if item_name not in ['dtype', 'quant_mode']: + ) + if item_name not in ["dtype", "quant_mode"]: parsed_op_cap[quant_mode][att][_data_type][signed_flag][item_name] = item_options else: # Parse the data info for item with unique value. - att_dtype = op_cap[att]['dtype'] + att_dtype = op_cap[att]["dtype"] if isinstance(att_dtype, list): att_dtype = att_dtype[0] - parsed_op_cap['precision'][att][att_dtype] = {'dtype': att_dtype} - self.ops_data_type[op_name_type][('precision', att, att_dtype)] = att_dtype + parsed_op_cap["precision"][att][att_dtype] = {"dtype": att_dtype} + self.ops_data_type[op_name_type][("precision", att, att_dtype)] = att_dtype parsed_cap[op_name_type] = parsed_op_cap return parsed_cap - + def _create_tuning_space(self, capability, usr_cfg): """Create tuning space. - + steo1. convert the capability into internal format. step2. merge the capability with usr_cfg step3. create the tuning space @@ -482,10 +495,10 @@ def _create_tuning_space(self, capability, usr_cfg): :param usr_cfg: :return: """ - capability['op'] = self._parse_cap_helper(deepcopy(capability['op'])) + capability["op"] = self._parse_cap_helper(deepcopy(capability["op"])) if usr_cfg: - self._merge_with_user_cfg(capability, usr_cfg['quantization']) - logger.debug(f"*********** After Merged with user cfg ***********") + self._merge_with_user_cfg(capability, usr_cfg["quantization"]) + logger.debug("*********** After Merged with user cfg ***********") logger.debug(capability) self._parse_capability(capability) @@ -502,7 +515,8 @@ def query_item_option(self, op_name_type, path, method_name, method_val): Return the query result if exist. """ mode_item = self.get_item_by_path((op_name_type, *path)) - if not mode_item: return None + if not mode_item: + return None method_item = mode_item.get_option_by_name(method_name) return method_item is not None and method_val in method_item.options @@ -517,30 +531,30 @@ def get_default_config(self, op_name_type, quant_mode): op_tuning_config: the default config according to the specified quantization mode. """ from .tuning_structs import OpTuningConfig + # For quant_mode static/dynamic/((static, int8), (dynamic, int4)) # set the first option as the default if the not support the required quant mode full_path = self.get_op_default_path_by_pattern(op_name_type, quant_mode) config_args = {} - has_weight = op_name_type in self.ops_attr['weight'] - config_args['activation_dtype'] = self.ops_data_type[op_name_type].get(full_path['activation']) + has_weight = op_name_type in self.ops_attr["weight"] + config_args["activation_dtype"] = self.ops_data_type[op_name_type].get(full_path["activation"]) if has_weight: - config_args['weight_dtype'] = self.ops_data_type[op_name_type].get(full_path['weight']) + config_args["weight_dtype"] = self.ops_data_type[op_name_type].get(full_path["weight"]) for att in full_path: - mode_item = self.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + mode_item = self.query_quant_mode_item_by_full_path(op_name_type, full_path[att]) if mode_item: - method_args = {method_item.name: method_item.options[0] for method_item in mode_item.options \ - if method_item.name in TUNING_ITEMS_LST} + method_args = { + method_item.name: method_item.options[0] + for method_item in mode_item.options + if method_item.name in TUNING_ITEMS_LST + } config_args.update(method_args) quant_mode = quant_mode if isinstance(quant_mode, str) else quant_mode[0] # set the first option as the default for each tuning item - op_tuning_config = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - self, - kwargs=config_args) + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], quant_mode, self, kwargs=config_args) return op_tuning_config - + def get_item_by_path(self, path, default=None): """Get the item according to the path.""" item = self.root_item @@ -564,12 +578,13 @@ def get_default_full_path(self, op_name_type, path): new_path: the complete path. """ # For precision - if path[0] == 'precision': + if path[0] == "precision": # If the path is ('precision', 'activation', dtype), return it directly. - if len(path) == 3: return path + if len(path) == 3: + return path assert len(path) == 2, f"Got the path: {path}, please provide the path include activation or weight." att_item = self.get_item_by_path((op_name_type, *path)) - if not att_item or len(att_item.options) == 0: + if not att_item or len(att_item.options) == 0: logger.debug(f"Could not found item for {op_name_type} with path {path}") return None dtype = att_item.options[0].name @@ -577,14 +592,18 @@ def get_default_full_path(self, op_name_type, path): else: # For quantization assert len(path) >= 2, f"Got the path: {path}, please provide the path include activation or weight." - if path[-1] == None: path = path[:-1] + if path[-1] is None: + path = path[:-1] item = self.get_item_by_path((op_name_type, *path)) new_path = path # For path ('static', 'activation', ...) while item: item_options = item.options - if len(item_options) > 0 and isinstance(item_options[0], TuningItem) and \ - item_options[0].item_type != 'method': + if ( + len(item_options) > 0 + and isinstance(item_options[0], TuningItem) + and item_options[0].item_type != "method" + ): new_path = new_path + (item_options[0].name,) item = item_options[0] else: @@ -596,7 +615,7 @@ def query_quant_mode_item_by_full_path(self, op_name_type, path) -> Tuple[Tuning new_path = (op_name_type, *path) item = self.get_item_by_path(new_path) return item - + def query_items_by_quant_mode(self, quant_mode): """Collect all op items that support the specified mode. @@ -614,23 +633,24 @@ def get_op_default_path_by_pattern(self, op_name_type, pattern): Args: op_name_type: (op_name, op_type) pattern: 'static', 'dynamic', ('static', 'int8'), ('precision', 'fp32') - + Returns: - result(Dict): The default full path of activation and weight if have. + result(Dict): The default full path of activation and weight if have. """ internal_pattern = pattern_to_internal(pattern) - full_path = {'activation': None, 'weight': None} - full_path['activation'], full_path['weight'] = pattern_to_path(internal_pattern) + full_path = {"activation": None, "weight": None} + full_path["activation"], full_path["weight"] = pattern_to_path(internal_pattern) result = {} - has_weight = op_name_type in self.ops_attr['weight'] - att_lst = ['activation', 'weight'] if has_weight else ['activation'] + has_weight = op_name_type in self.ops_attr["weight"] + att_lst = ["activation", "weight"] if has_weight else ["activation"] for att in att_lst: result[att] = self.get_default_full_path(op_name_type, full_path[att]) return result -def pattern_to_internal(pattern, default_dtype='int8'): + +def pattern_to_internal(pattern, default_dtype="int8"): """Convert pattern to internal format. - + 'static' -> ('static', (('int8'),('int8'))) 'dynamic' -> ('dynamic', (('int8'),('int8'))) 'fp32' -> ('precision', (('fp32'), ('fp32'))) @@ -641,25 +661,29 @@ def pattern_to_internal(pattern, default_dtype='int8'): #TODO to add the support for mixed data type of weight and activation """ from .constant import PRECISION_SET_V2_0 + pattern_bk = pattern if isinstance(pattern, str): - pattern = ('precision', pattern) if pattern in PRECISION_SET_V2_0 else (pattern, (None)) + pattern = ("precision", pattern) if pattern in PRECISION_SET_V2_0 else (pattern, (None)) internal_pattern = (pattern[0], ((pattern[1],), (pattern[1],))) return internal_pattern + def pattern_to_path(pattern): """Convert pattern to path.""" - act_path = (pattern[0], 'activation', *pattern[1][0]) - weight_path = (pattern[0], 'weight', *pattern[1][1]) + act_path = (pattern[0], "activation", *pattern[1][0]) + weight_path = (pattern[0], "weight", *pattern[1][1]) return act_path, weight_path + def quant_mode_from_pattern(internal_pattern): """Get quant mode from internal pattern.""" - if internal_pattern[0] == 'precision': + if internal_pattern[0] == "precision": return internal_pattern[1][0] else: return internal_pattern[0] + def initial_tuning_cfg_with_quant_mode(op_name_type, quant_mode, tuning_space: TuningSpace) -> OpTuningConfig: """Initialize the tuning cfg. @@ -667,36 +691,35 @@ def initial_tuning_cfg_with_quant_mode(op_name_type, quant_mode, tuning_space: T op_name_type: (op name, op type) quant_mode: dynamic/static/fp32/bf16/fp16 tuning_space: tuning space. - - step1, convert the quant_mode into internal format. + + step1, convert the quant_mode into internal format. step2, complete the path based. step3, get the mode item. step4, use the first option as value for method. step5, create the op tuning config. - + Returns: - The initial tuning config. + The initial tuning config. """ internal_pattern = pattern_to_internal(quant_mode) - full_path = {'activation': None, 'weight': None} - full_path['activation'], full_path['weight'] = pattern_to_path(internal_pattern) - has_weight = op_name_type in tuning_space.ops_attr['weight'] + full_path = {"activation": None, "weight": None} + full_path["activation"], full_path["weight"] = pattern_to_path(internal_pattern) + has_weight = op_name_type in tuning_space.ops_attr["weight"] config_args = {} - att_lst = ['activation', 'weight'] if has_weight else ['activation'] + att_lst = ["activation", "weight"] if has_weight else ["activation"] for att in att_lst: att_full_path = tuning_space.get_default_full_path(op_name_type, full_path[att]) - config_args[att + '_dtype'] = tuning_space.ops_data_type[op_name_type].get(att_full_path, None) + config_args[att + "_dtype"] = tuning_space.ops_data_type[op_name_type].get(att_full_path, None) mode_item = tuning_space.get_item_by_path((op_name_type, *att_full_path)) if mode_item: - method_args = {method_item.name: method_item.options[0] for method_item in mode_item.options \ - if method_item.name in TUNING_ITEMS_LST} + method_args = { + method_item.name: method_item.options[0] + for method_item in mode_item.options + if method_item.name in TUNING_ITEMS_LST + } config_args.update(method_args) quant_mode = internal_pattern[0] # set the first option as the default for each tuning item - op_tuning_config = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - tuning_space, - kwargs=config_args) - return op_tuning_config \ No newline at end of file + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], quant_mode, tuning_space, kwargs=config_args) + return op_tuning_config diff --git a/neural_compressor/experimental/strategy/utils/tuning_structs.py b/neural_compressor/experimental/strategy/utils/tuning_structs.py index c4222a2f6c3..d94b4dffe18 100644 --- a/neural_compressor/experimental/strategy/utils/tuning_structs.py +++ b/neural_compressor/experimental/strategy/utils/tuning_structs.py @@ -18,12 +18,14 @@ """Tuning structure.""" from typing import Dict -from .constant import QUANT_MODE_SET, TUNING_ITEMS_LST, PRECISION_SET + from ....utils import logger +from .constant import PRECISION_SET, QUANT_MODE_SET, TUNING_ITEMS_LST + class OpTuningConfig: """Op tuning config.""" - + def __init__(self, op_name, op_type, op_quant_mode, tuning_space, kwargs={}): """Create the tuning config. @@ -41,24 +43,24 @@ def __init__(self, op_name, op_type, op_quant_mode, tuning_space, kwargs={}): self.kwargs = kwargs self.act_dtype = None self.weight_dtype = None - self.has_weight = self.op_name_type in tuning_space.ops_attr['weight'] + self.has_weight = self.op_name_type in tuning_space.ops_attr["weight"] self._set_dtype() - + def _set_dtype(self): """Set the date type.""" if self.op_quant_mode in PRECISION_SET: self.act_dtype, self.weight_dtype = self.op_quant_mode, self.op_quant_mode else: - self.act_dtype = self.kwargs.get('activation_dtype', None) - self.weight_dtype = self.kwargs.get('weight_dtype', None) - assert self.act_dtype and isinstance(self.act_dtype, str),\ - (f"Didn't assign the activation data type for {self.op_name, self.op_type}", \ - f"with quant_mode {self.op_quant_mode}") + self.act_dtype = self.kwargs.get("activation_dtype", None) + self.weight_dtype = self.kwargs.get("weight_dtype", None) + assert self.act_dtype and isinstance(self.act_dtype, str), ( + f"Didn't assign the activation data type for {self.op_name, self.op_type}", + f"with quant_mode {self.op_quant_mode}", + ) # if self.has_weight: # assert self.weight_dtype, \ # (f"Didn't assign the weight data type for {self.op_name, self.op_type}", \ # f"with quant_mode {self.op_quant_mode}") - def __repr__(self) -> str: """Display the tuning config as string. @@ -66,9 +68,9 @@ def __repr__(self) -> str: Returns: msg: the tuning config as string. """ - msg = f"op name: {self.op_name}, op type : {self.op_type} \n" + msg = f"op name: {self.op_name}, op type : {self.op_type} \n" msg += f"\t activation dtype: {self.act_dtype} \n" - msg += f"\t weight dtype: {self.weight_dtype} \n" if self.has_weight else "" + msg += f"\t weight dtype: {self.weight_dtype} \n" if self.has_weight else "" for key, val in self.kwargs.items(): if key in TUNING_ITEMS_LST: msg += f"\t {key[0]} {key[1]}: {val}\n" @@ -76,19 +78,19 @@ def __repr__(self) -> str: def get_state(self): """Return the op tuning configuration. - + Returns: Dict: The op tuning state. """ result = {} if self.has_weight: - result['weight'] = { - 'dtype': self.weight_dtype, - } - result['activation'] = { - 'dtype': self.act_dtype, - 'quant_mode': self.op_quant_mode, + result["weight"] = { + "dtype": self.weight_dtype, } + result["activation"] = { + "dtype": self.act_dtype, + "quant_mode": self.op_quant_mode, + } for key, val in self.kwargs.items(): if key in TUNING_ITEMS_LST: result[key[0]][key[1]] = val diff --git a/neural_compressor/experimental/strategy/utils/utility.py b/neural_compressor/experimental/strategy/utils/utility.py index 22b95176e59..4a2e421f886 100644 --- a/neural_compressor/experimental/strategy/utils/utility.py +++ b/neural_compressor/experimental/strategy/utils/utility.py @@ -14,20 +14,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Tuning utility.""" from collections import OrderedDict + class OrderedDefaultDict(OrderedDict): """Ordered default dict.""" - + def __missing__(self, key): """Initialize value for the missing key.""" self[key] = value = OrderedDefaultDict() return value + def extract_data_type(data_type: str) -> str: """Extract data type and signed from data type. @@ -37,11 +38,13 @@ def extract_data_type(data_type: str) -> str: Returns: (signed or unsigned, data type without signed) """ - return ('signed', data_type) if data_type[0] != 'u' else ('unsigned', data_type[1:]) + return ("signed", data_type) if data_type[0] != "u" else ("unsigned", data_type[1:]) + def reverted_data_type(signed_flag: str, data_type: str) -> str: """Revert the data type.""" - return data_type if signed_flag == 'signed' else 'u' + data_type + return data_type if signed_flag == "signed" else "u" + data_type + def get_adaptor_name(adaptor): """Get adaptor name. @@ -50,8 +53,8 @@ def get_adaptor_name(adaptor): adaptor: adaptor instance. """ adaptor_name = type(adaptor).__name__.lower() - adaptor_name_lst = ['onnx', 'tensorflow', 'pytorch'] + adaptor_name_lst = ["onnx", "tensorflow", "pytorch"] for name in adaptor_name_lst: if adaptor_name.startswith(name): return name - return "" \ No newline at end of file + return "" diff --git a/neural_compressor/metric/__init__.py b/neural_compressor/metric/__init__.py index ab53b731065..289133cbdfd 100644 --- a/neural_compressor/metric/__init__.py +++ b/neural_compressor/metric/__init__.py @@ -14,21 +14,37 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - """Intel Neural Compressor Metric.""" -from .metric import (METRICS, Metric, BaseMetric, TensorflowTopK, metric_registry, COCOmAPv2, SquadF1, GeneralTopK, - register_customer_metric) +from .metric import ( + METRICS, + Metric, + BaseMetric, + TensorflowTopK, + metric_registry, + COCOmAPv2, + SquadF1, + GeneralTopK, + register_customer_metric, +) from os.path import dirname, basename, isfile, join import glob modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) -__all__ = ["METRICS", "Metric", "BaseMetric", "TensorflowTopK", "metric_registry", - "COCOmAPv2", "SquadF1", "GeneralTopK", "register_customer_metric"] +__all__ = [ + "METRICS", + "Metric", + "BaseMetric", + "TensorflowTopK", + "metric_registry", + "COCOmAPv2", + "SquadF1", + "GeneralTopK", + "register_customer_metric", +] diff --git a/neural_compressor/metric/bleu.py b/neural_compressor/metric/bleu.py index 9a5e09df572..c0b41be60b3 100644 --- a/neural_compressor/metric/bleu.py +++ b/neural_compressor/metric/bleu.py @@ -17,11 +17,12 @@ """Script for BLEU metric.""" import re -import six import sys import unicodedata from typing import List, Sequence +import six + from .bleu_util import compute_bleu from .metric import metric_registry @@ -31,8 +32,8 @@ class UnicodeRegex(object): Attributes: nondigit_punct_re: The compiled regular expressions to recognize - punctuation preceded with a digit. - punct_nondigit_re: The compiled regular expressions to recognize + punctuation preceded with a digit. + punct_nondigit_re: The compiled regular expressions to recognize punctuation followed by a digit. symbol_re: The compiled regular expressions to recognize symbols. """ @@ -54,8 +55,9 @@ def property_chars(self, prefix: str) -> str: punctuation: The join result of all Unicode strings starting with a specific prefix. """ - punctuation = "".join(six.unichr(x) for x in range(sys.maxunicode) \ - if unicodedata.category(six.unichr(x)).startswith(prefix)) + punctuation = "".join( + six.unichr(x) for x in range(sys.maxunicode) if unicodedata.category(six.unichr(x)).startswith(prefix) + ) return punctuation @@ -81,14 +83,14 @@ def bleu_tokenize(string: str) -> List[str]: return tokens -@metric_registry('BLEU', 'tensorflow, tensorflow_itex') +@metric_registry("BLEU", "tensorflow, tensorflow_itex") class BLEU(object): """Computes the BLEU (Bilingual Evaluation Understudy) score. - BLEU is an algorithm for evaluating the quality of text which has - been machine-translated from one natural language to another. - This implementent approximate the BLEU score since we do not - glue word pieces or decode the ids and tokenize the output. + BLEU is an algorithm for evaluating the quality of text which has + been machine-translated from one natural language to another. + This implementent approximate the BLEU score since we do not + glue word pieces or decode the ids and tokenize the output. By default, we use ngram order of 4 and use brevity penalty. Also, this does not have beam search. @@ -119,9 +121,11 @@ def update(self, prediction: Sequence[str], label: Sequence[str]) -> None: and label are different. """ if len(label) != len(prediction): - raise ValueError("Reference and prediction files have different number " - "of lines. If training only a few steps (100-200), the " - "translation may be empty.") + raise ValueError( + "Reference and prediction files have different number " + "of lines. If training only a few steps (100-200), the " + "translation may be empty." + ) label = [x.lower() for x in label] prediction = [x.lower() for x in prediction] label = [bleu_tokenize(x) for x in label] diff --git a/neural_compressor/metric/bleu_util.py b/neural_compressor/metric/bleu_util.py index 875321f4dd3..678c3ffbf3e 100644 --- a/neural_compressor/metric/bleu_util.py +++ b/neural_compressor/metric/bleu_util.py @@ -35,50 +35,50 @@ https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import collections import math +from typing import List, Sequence, Union import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin from neural_compressor.utils.utility import LazyImport -from six.moves import xrange # pylint: disable=redefined-builtin -from typing import List, Sequence, Union -tf = LazyImport('tensorflow') +tf = LazyImport("tensorflow") + -def _get_ngrams_with_counter(segment: Sequence[str], - max_order: List[int]) -> collections.Counter: +def _get_ngrams_with_counter(segment: Sequence[str], max_order: List[int]) -> collections.Counter: """Extract all n-grams up to a given maximum order from an input segment. Args: segment: The text segment from which n-grams will be extracted. - max_order: The maximum length in tokens of the n-grams returned + max_order: The maximum length in tokens of the n-grams returned by this methods. Returns: - ngram_counts: The Counter containing all n-grams up to max_order + ngram_counts: The Counter containing all n-grams up to max_order in segment with a count of how many times each n-gram occurred. """ ngram_counts = collections.Counter() for order in xrange(1, max_order + 1): for i in xrange(0, len(segment) - order + 1): - ngram = tuple(segment[i:i + order]) + ngram = tuple(segment[i : i + order]) ngram_counts[ngram] += 1 return ngram_counts -def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]], - translation_corpus: Sequence[str], - max_order: int = 4, - use_bp: bool = True) -> float: +def compute_bleu( + reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]], + translation_corpus: Sequence[str], + max_order: int = 4, + use_bp: bool = True, +) -> float: """Compute the BLEU score of translated segments against its references. Args: - reference_corpus: List of references for each translation. + reference_corpus: List of references for each translation. Each reference should be tokenized into a list of tokens. translation_corpus: List of translations to score. Each translation should be tokenized into a list of tokens. @@ -97,21 +97,20 @@ def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]] possible_matches_by_order = [0] * max_order precisions = [] - for (references, translations) in zip(reference_corpus, translation_corpus): + for references, translations in zip(reference_corpus, translation_corpus): reference_length += len(references) translation_length += len(translations) ref_ngram_counts = _get_ngrams_with_counter(references, max_order) translation_ngram_counts = _get_ngrams_with_counter(translations, max_order) - overlap = dict((ngram, - min(count, translation_ngram_counts[ngram])) - for ngram, count in ref_ngram_counts.items()) + overlap = dict( + (ngram, min(count, translation_ngram_counts[ngram])) for ngram, count in ref_ngram_counts.items() + ) for ngram in overlap: matches_by_order[len(ngram) - 1] += overlap[ngram] for ngram in translation_ngram_counts: - possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ - ngram] + possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ngram] precisions = [0] * max_order smooth = 1.0 @@ -120,8 +119,7 @@ def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]] if possible_matches_by_order[i] > 0: precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] if matches_by_order[i] > 0: - precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[ - i] + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] else: smooth *= 2 precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) @@ -134,6 +132,6 @@ def compute_bleu(reference_corpus: Union[Sequence[str], Sequence[Sequence[str]]] if use_bp: ratio = translation_length / reference_length - bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0 + bp = math.exp(1 - 1.0 / ratio) if ratio < 1.0 else 1.0 bleu_score = np.float32(geo_mean * bp) return bleu_score diff --git a/neural_compressor/metric/coco_label_map.py b/neural_compressor/metric/coco_label_map.py index 82327cb6ce1..724842b7d40 100644 --- a/neural_compressor/metric/coco_label_map.py +++ b/neural_compressor/metric/coco_label_map.py @@ -16,88 +16,87 @@ # limitations under the License. # # - """The dict mapping category IDs to its names of labels.""" category_map = { - 1: 'person', - 2: 'bicycle', - 3: 'car', - 4: 'motorcycle', - 5: 'airplane', - 6: 'bus', - 7: 'train', - 8: 'truck', - 9: 'boat', - 10: 'traffic light', - 11: 'fire hydrant', - 13: 'stop sign', - 14: 'parking meter', - 15: 'bench', - 16: 'bird', - 17: 'cat', - 18: 'dog', - 19: 'horse', - 20: 'sheep', - 21: 'cow', - 22: 'elephant', - 23: 'bear', - 24: 'zebra', - 25: 'giraffe', - 27: 'backpack', - 28: 'umbrella', - 31: 'handbag', - 32: 'tie', - 33: 'suitcase', - 34: 'frisbee', - 35: 'skis', - 36: 'snowboard', - 37: 'sports ball', - 38: 'kite', - 39: 'baseball bat', - 40: 'baseball glove', - 41: 'skateboard', - 42: 'surfboard', - 43: 'tennis racket', - 44: 'bottle', - 46: 'wine glass', - 47: 'cup', - 48: 'fork', - 49: 'knife', - 50: 'spoon', - 51: 'bowl', - 52: 'banana', - 53: 'apple', - 54: 'sandwich', - 55: 'orange', - 56: 'broccoli', - 57: 'carrot', - 58: 'hot dog', - 59: 'pizza', - 60: 'donut', - 61: 'cake', - 62: 'chair', - 63: 'couch', - 64: 'potted plant', - 65: 'bed', - 67: 'dining table', - 70: 'toilet', - 72: 'tv', - 73: 'laptop', - 74: 'mouse', - 75: 'remote', - 76: 'keyboard', - 77: 'cell phone', - 78: 'microwave', - 79: 'oven', - 80: 'toaster', - 81: 'sink', - 82: 'refrigerator', - 84: 'book', - 85: 'clock', - 86: 'vase', - 87: 'scissors', - 88: 'teddy bear', - 89: 'hair drier', - 90: 'toothbrush' + 1: "person", + 2: "bicycle", + 3: "car", + 4: "motorcycle", + 5: "airplane", + 6: "bus", + 7: "train", + 8: "truck", + 9: "boat", + 10: "traffic light", + 11: "fire hydrant", + 13: "stop sign", + 14: "parking meter", + 15: "bench", + 16: "bird", + 17: "cat", + 18: "dog", + 19: "horse", + 20: "sheep", + 21: "cow", + 22: "elephant", + 23: "bear", + 24: "zebra", + 25: "giraffe", + 27: "backpack", + 28: "umbrella", + 31: "handbag", + 32: "tie", + 33: "suitcase", + 34: "frisbee", + 35: "skis", + 36: "snowboard", + 37: "sports ball", + 38: "kite", + 39: "baseball bat", + 40: "baseball glove", + 41: "skateboard", + 42: "surfboard", + 43: "tennis racket", + 44: "bottle", + 46: "wine glass", + 47: "cup", + 48: "fork", + 49: "knife", + 50: "spoon", + 51: "bowl", + 52: "banana", + 53: "apple", + 54: "sandwich", + 55: "orange", + 56: "broccoli", + 57: "carrot", + 58: "hot dog", + 59: "pizza", + 60: "donut", + 61: "cake", + 62: "chair", + 63: "couch", + 64: "potted plant", + 65: "bed", + 67: "dining table", + 70: "toilet", + 72: "tv", + 73: "laptop", + 74: "mouse", + 75: "remote", + 76: "keyboard", + 77: "cell phone", + 78: "microwave", + 79: "oven", + 80: "toaster", + 81: "sink", + 82: "refrigerator", + 84: "book", + 85: "clock", + 86: "vase", + 87: "scissors", + 88: "teddy bear", + 89: "hair drier", + 90: "toothbrush", } diff --git a/neural_compressor/metric/coco_tools.py b/neural_compressor/metric/coco_tools.py index 265185baf10..2f9369798df 100644 --- a/neural_compressor/metric/coco_tools.py +++ b/neural_compressor/metric/coco_tools.py @@ -14,8 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - """Wrappers for third party pycocotools to be used within object_detection. Note that nothing in this file is tensorflow related and thus cannot @@ -41,32 +39,29 @@ evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, agnostic_mode=False) metrics = evaluator.ComputeMetrics() - """ import copy import time +from collections import OrderedDict +from typing import Any, Dict, List, Set, Union import numpy as np +from pycocotools import coco, cocoeval, mask -from collections import OrderedDict from neural_compressor.utils import logger -from pycocotools import coco -from pycocotools import cocoeval -from pycocotools import mask -from typing import Any, Dict, List, Set, Union class COCOWrapper(coco.COCO): """Wrapper for the pycocotools COCO class. - + Attributes: dataset: a dictionary holding bounding box annotations in the COCO format. detection_type: type of detections being wrapped. Can be one of ['bbox', 'segmentation'] """ - def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"): """Construct a COCOWrapper. See http://mscoco.org/dataset/#format for a description of the format. @@ -82,11 +77,12 @@ def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): Raises: ValueError: if detection_type is unsupported. """ - supported_detection_types = ['bbox', 'segmentation'] + supported_detection_types = ["bbox", "segmentation"] if detection_type not in supported_detection_types: - raise ValueError('Unsupported detection type: {}. ' - 'Supported values are: {}'.format( - detection_type, supported_detection_types)) + raise ValueError( + "Unsupported detection type: {}. " + "Supported values are: {}".format(detection_type, supported_detection_types) + ) self._detection_type = detection_type coco.COCO.__init__(self) self.dataset = dataset @@ -109,38 +105,36 @@ def LoadAnnotations(self, annotations: list) -> coco.COCO: a coco.COCO datastructure holding object detection annotations results Raises: - ValueError: if (1) annotations is not a list or annotations do not + ValueError: if (1) annotations is not a list or annotations do not correspond to the images contained in self. """ results = coco.COCO() - results.dataset['images'] = [img for img in self.dataset['images']] + results.dataset["images"] = [img for img in self.dataset["images"]] logger.info("Load and prepare annotation results.") tic = time.time() if not isinstance(annotations, list): - raise ValueError('annotations is not a list of objects') - annotation_img_ids = [ann['image_id'] for ann in annotations] - if (set(annotation_img_ids) != (set(annotation_img_ids) - & set(self.getImgIds()))): - raise ValueError('Results do not correspond to current coco set') - results.dataset['categories'] = copy.deepcopy( - self.dataset['categories']) - if self._detection_type == 'bbox': + raise ValueError("annotations is not a list of objects") + annotation_img_ids = [ann["image_id"] for ann in annotations] + if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())): + raise ValueError("Results do not correspond to current coco set") + results.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + if self._detection_type == "bbox": for idx, ann in enumerate(annotations): - bb = ann['bbox'] - ann['area'] = bb[2] * bb[3] - ann['id'] = idx + 1 - ann['iscrowd'] = 0 - elif self._detection_type == 'segmentation': + bb = ann["bbox"] + ann["area"] = bb[2] * bb[3] + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + elif self._detection_type == "segmentation": for idx, ann in enumerate(annotations): - ann['area'] = mask.area(ann['segmentation']) - ann['bbox'] = mask.toBbox(ann['segmentation']) - ann['id'] = idx + 1 - ann['iscrowd'] = 0 - logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + ann["area"] = mask.area(ann["segmentation"]) + ann["bbox"] = mask.toBbox(ann["segmentation"]) + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + logger.info("DONE (t=%0.2fs)", (time.time() - tic)) - results.dataset['annotations'] = annotations + results.dataset["annotations"] = annotations results.createIndex() return results @@ -159,13 +153,15 @@ class COCOEvalWrapper(cocoeval.COCOeval): metrics = evaluator.ComputeMetrics() """ - def __init__(self, - groundtruth: coco.COCO = None, - detections: coco.COCO = None, - agnostic_mode = False, - iou_type: str = 'bbox', - iou_thrs: Union[str, float] = None, - map_points=None): + def __init__( + self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode=False, + iou_type: str = "bbox", + iou_thrs: Union[str, float] = None, + map_points=None, + ): """Construct a COCOEvalWrapper. Note that for the area-based metrics to be meaningful, detection and @@ -186,27 +182,20 @@ class labels, treating all detections as proposals. map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. """ - cocoeval.COCOeval.__init__(self, - groundtruth, - detections, - iouType=iou_type) + cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type) if agnostic_mode: self.params.useCats = 0 - if iou_thrs == '0.5:0.05:0.95': - self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ - endpoint=True) + if iou_thrs == "0.5:0.05:0.95": + self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) elif isinstance(iou_thrs, float): - self.params.iouThrs = [iou_thrs] + self.params.iouThrs = [iou_thrs] if map_points == 101: - self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ - endpoint=True) + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) if map_points == 11: - self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ - endpoint=True) + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True) if map_points == 0: - self.params.recThrs = [-1] - + self.params.recThrs = [-1] def GetCategory(self, category_id: int) -> dict: """Fetch dictionary holding category information given category id. @@ -229,65 +218,67 @@ def GetCategoryIdList(self) -> List[int]: def accumulate(self, p: cocoeval.Params = None): """Accumulate evaluation results per image and store it to self.eval. - + Args: p: input params for evaluation """ - print('Accumulating evaluation results...') + print("Accumulating evaluation results...") tic = time.time() if not self.evalImgs: - print('Please run evaluate() first') + print("Please run evaluate() first") # allows input customized parameters if p is None: p = self.params p.catIds = p.catIds if p.useCats == 1 else [-1] - T = len(p.iouThrs) - R = len(p.recThrs) - K = len(p.catIds) if p.useCats else 1 - A = len(p.areaRng) - M = len(p.maxDets) - precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories - recall = -np.ones((T,K,A,M)) - scores = -np.ones((T,R,K,A,M)) + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories + recall = -np.ones((T, K, A, M)) + scores = -np.ones((T, R, K, A, M)) # create dictionary for future indexing _pe = self._paramsEval - print('-pe', _pe) + print("-pe", _pe) catIds = _pe.catIds if _pe.useCats else [-1] setK = set(catIds) setA = set(map(tuple, _pe.areaRng)) setM = set(_pe.maxDets) setI = set(_pe.imgIds) # get inds to evaluate - k_list = [n for n, k in enumerate(p.catIds) if k in setK] + k_list = [n for n, k in enumerate(p.catIds) if k in setK] m_list = [m for n, m in enumerate(p.maxDets) if m in setM] a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] - i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] I0 = len(_pe.imgIds) A0 = len(_pe.areaRng) # retrieve E at each category, area range, and max number of detections for k, k0 in enumerate(k_list): - Nk = k0*A0*I0 + Nk = k0 * A0 * I0 for a, a0 in enumerate(a_list): - Na = a0*I0 + Na = a0 * I0 for m, maxDet in enumerate(m_list): E = [self.evalImgs[Nk + Na + i] for i in i_list] - E = [e for e in E if not e is None] - if len(E) == 0: continue - dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) # different sorting method generates slightly different results. # mergesort is used to be consistent as Matlab implementation. - inds = np.argsort(-dtScores, kind='mergesort') + inds = np.argsort(-dtScores, kind="mergesort") dtScoresSorted = dtScores[inds] - dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] - dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] - gtIg = np.concatenate([e['gtIgnore'] for e in E]) - npig = np.count_nonzero(gtIg==0 ) - if npig == 0: continue - tps = np.logical_and( dtm, np.logical_not(dtIg) ) - fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) @@ -296,73 +287,72 @@ def accumulate(self, p: cocoeval.Params = None): fp = np.array(fp) nd = len(tp) rc = tp / npig - pr = tp / (fp+tp+np.spacing(1)) + pr = tp / (fp + tp + np.spacing(1)) # calculate precision if R == 1: - rc = np.concatenate(([0.], rc, [1.])) - pr = np.concatenate(([0.], pr, [0.])) - - # compute the precision envelope - for i in range(pr.size - 1, 0, -1): - pr[i - 1] = np.maximum(pr[i - 1], pr[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - change_point = np.where(rc[1:] != rc[:-1])[0] - # and sum (\Delta recall) * recall - res = np.sum((rc[change_point + 1] - rc[change_point]) \ - * pr[change_point + 1]) - precision[t,:,k,a,m] = np.array([res]) + rc = np.concatenate(([0.0], rc, [1.0])) + pr = np.concatenate(([0.0], pr, [0.0])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1]) + precision[t, :, k, a, m] = np.array([res]) else: - q = np.zeros((R,)) + q = np.zeros((R,)) - # numpy is slow without cython optimization for accessing elements - # use python array gets significant speed improvement - pr = pr.tolist(); q = q.tolist() + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() - for i in range(nd-1, 0, -1): - if pr[i] > pr[i-1]: - pr[i-1] = pr[i] + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t, :, k, a, m] = np.array(q) - inds = np.searchsorted(rc, p.recThrs, side='left') - try: - for ri, pi in enumerate(inds): - q[ri] = pr[pi] - except: - pass - precision[t,:,k,a,m] = np.array(q) - # calculate recall if nd: - recall[t,k,a,m] = rc[-1] + recall[t, k, a, m] = rc[-1] else: - recall[t,k,a,m] = 0 + recall[t, k, a, m] = 0 # calculate score ss = np.zeros((R,)) - inds = np.searchsorted(rc, p.recThrs, side='left') + inds = np.searchsorted(rc, p.recThrs, side="left") try: - for ri, pi in enumerate(inds): - ss[ri] = dtScoresSorted[pi] + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] except: - pass - scores[t,:,k,a,m] = np.array(ss) + pass + scores[t, :, k, a, m] = np.array(ss) # exit(0) self.eval = { - 'params': p, - 'counts': [T, R, K, A, M], - 'precision': precision, - 'recall': recall, - 'scores': scores, + "params": p, + "counts": [T, R, K, A, M], + "precision": precision, + "recall": recall, + "scores": scores, } toc = time.time() - print('DONE (t={:0.2f}s).'.format( toc-tic)) - + print("DONE (t={:0.2f}s).".format(toc - tic)) - def ComputeMetrics(self, - include_metrics_per_category: bool = False, - all_metrics_per_category: bool = False): # pragma: no cover + def ComputeMetrics( + self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False + ): # pragma: no cover """Compute detection metrics. Args: @@ -393,7 +383,7 @@ def ComputeMetrics(self, 'Recall/AR@100 (medium)': average recall for medium objects with 100 detections; 'Recall/AR@100 (large)': average recall for large objects with 100 - detections; + detections; and (2) per_category_ap is a dictionary holding category specific results with keys of the form: 'Precision mAP ByCategory/category' (without the supercategory part if no supercategories exist). @@ -409,57 +399,63 @@ def ComputeMetrics(self, self.accumulate() self.summarize() - summary_metrics = OrderedDict([ - ('Precision/mAP', self.stats[0]), - ('Precision/mAP@.50IOU', self.stats[1]), - ('Precision/mAP@.75IOU', self.stats[2]), - ('Precision/mAP (small)', self.stats[3]), - ('Precision/mAP (medium)', self.stats[4]), - ('Precision/mAP (large)', self.stats[5]), - ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), - ('Recall/AR@100', self.stats[8]), - ('Recall/AR@100 (small)', self.stats[9]), - ('Recall/AR@100 (medium)', self.stats[10]), - ('Recall/AR@100 (large)', self.stats[11]) - ]) + summary_metrics = OrderedDict( + [ + ("Precision/mAP", self.stats[0]), + ("Precision/mAP@.50IOU", self.stats[1]), + ("Precision/mAP@.75IOU", self.stats[2]), + ("Precision/mAP (small)", self.stats[3]), + ("Precision/mAP (medium)", self.stats[4]), + ("Precision/mAP (large)", self.stats[5]), + ("Recall/AR@1", self.stats[6]), + ("Recall/AR@10", self.stats[7]), + ("Recall/AR@100", self.stats[8]), + ("Recall/AR@100 (small)", self.stats[9]), + ("Recall/AR@100 (medium)", self.stats[10]), + ("Recall/AR@100 (large)", self.stats[11]), + ] + ) if not include_metrics_per_category: return summary_metrics, {} - if not hasattr(self, 'category_stats'): - raise ValueError('Category stats do not exist') + if not hasattr(self, "category_stats"): + raise ValueError("Category stats do not exist") per_category_ap = OrderedDict([]) if self.GetAgnosticMode(): return summary_metrics, per_category_ap for category_index, category_id in enumerate(self.GetCategoryIdList()): - category = self.GetCategory(category_id)['name'] + category = self.GetCategory(category_id)["name"] # Kept for backward compatilbility # pylint: disable=no-member - per_category_ap['PerformanceByCategory/mAP/{}'.format( - category)] = self.category_stats[0][category_index] + per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index] if all_metrics_per_category: - per_category_ap['Precision mAP ByCategory/{}'.format( - category)] = self.category_stats[0][category_index] - per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( - category)] = self.category_stats[1][category_index] - per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( - category)] = self.category_stats[2][category_index] - per_category_ap['Precision mAP (small) ByCategory/{}'.format( - category)] = self.category_stats[3][category_index] - per_category_ap['Precision mAP (medium) ByCategory/{}'.format( - category)] = self.category_stats[4][category_index] - per_category_ap['Precision mAP (large) ByCategory/{}'.format( - category)] = self.category_stats[5][category_index] - per_category_ap['Recall AR@1 ByCategory/{}'.format( - category)] = self.category_stats[6][category_index] - per_category_ap['Recall AR@10 ByCategory/{}'.format( - category)] = self.category_stats[7][category_index] - per_category_ap['Recall AR@100 ByCategory/{}'.format( - category)] = self.category_stats[8][category_index] - per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( - category)] = self.category_stats[9][category_index] - per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( - category)] = self.category_stats[10][category_index] - per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( - category)] = self.category_stats[11][category_index] + per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index] + per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][ + category_index + ] + per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][ + category_index + ] + per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][ + category_index + ] + per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][ + category_index + ] + per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][ + category_index + ] + per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index] + per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index] + per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index] + per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][ + category_index + ] + per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][ + category_index + ] + per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][ + category_index + ] return summary_metrics, per_category_ap @@ -477,12 +473,7 @@ def _ConvertBoxToCOCOFormat(box): Returns: A list of floats, in COCO format, representing [xmin, ymin, width, height] """ - return [ - float(box[1]), - float(box[0]), - float(box[3] - box[1]), - float(box[2] - box[0]) - ] + return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])] def _RleCompress(masks): @@ -498,13 +489,15 @@ def _RleCompress(masks): return mask.encode(np.asfortranarray(masks)) -def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], - next_annotation_id: int, - category_id_set: Set[str], - groundtruth_boxes: np.array, - groundtruth_classes: np.array, - groundtruth_masks: Union[np.array, None] = None, - groundtruth_is_crowd: Union[np.array, None] = None) -> list: +def ExportSingleImageGroundtruthToCoco( + image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None, +) -> list: """Export groundtruth of a single image to COCO format. This function converts groundtruth detection annotations represented as numpy @@ -540,54 +533,51 @@ def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], have the correct shapes or (3) if image_ids are not integers """ if len(groundtruth_classes.shape) != 1: - raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + raise ValueError("groundtruth_classes is " "expected to be of rank 1.") if len(groundtruth_boxes.shape) != 2: - raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + raise ValueError("groundtruth_boxes is expected to be of " "rank 2.") if groundtruth_boxes.shape[1] != 4: - raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + raise ValueError("groundtruth_boxes should have " "shape[1] == 4.") num_boxes = groundtruth_classes.shape[0] if num_boxes != groundtruth_boxes.shape[0]: raise ValueError( - 'Corresponding entries in groundtruth_classes, ' - 'and groundtruth_boxes should have ' - 'compatible shapes (i.e., agree on the 0th dimension).' - 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % - (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], - image_id)) + "Corresponding entries in groundtruth_classes, " + "and groundtruth_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension)." + "Classes shape: %d. Boxes shape: %d. Image ID: %s" + % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id) + ) has_is_crowd = groundtruth_is_crowd is not None if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: - raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + raise ValueError("groundtruth_is_crowd is expected to be of rank 1.") groundtruth_list = [] for i in range(num_boxes): if groundtruth_classes[i] in category_id_set: iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 export_dict = { - 'id': - next_annotation_id + i, - 'image_id': - image_id, - 'category_id': - int(groundtruth_classes[i]), - 'bbox': - list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), - 'area': - float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * - (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), - 'iscrowd': - iscrowd + "id": next_annotation_id + i, + "image_id": image_id, + "category_id": int(groundtruth_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + "area": float( + (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) + * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]) + ), + "iscrowd": iscrowd, } if groundtruth_masks is not None: - export_dict['segmentation'] = _RleCompress( - groundtruth_masks[i]) + export_dict["segmentation"] = _RleCompress(groundtruth_masks[i]) groundtruth_list.append(export_dict) return groundtruth_list -def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], - category_id_set: Set[int], - detection_boxes: np.array, - detection_scores: np.array, - detection_classes: np.array) -> list: +def ExportSingleImageDetectionBoxesToCoco( + image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: """Export detections of a single image to COCO format. This function converts detections represented as numpy arrays to dictionaries @@ -617,46 +607,41 @@ def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], lists do not have the correct shapes or (3) if image_ids are not integers. """ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: - raise ValueError( - 'All entries in detection_classes and detection_scores' - 'expected to be of rank 1.') + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") if len(detection_boxes.shape) != 2: - raise ValueError('All entries in detection_boxes expected to be of ' - 'rank 2.') + raise ValueError("All entries in detection_boxes expected to be of " "rank 2.") if detection_boxes.shape[1] != 4: - raise ValueError('All entries in detection_boxes should have ' - 'shape[1] == 4.') + raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.") num_boxes = detection_classes.shape[0] if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: raise ValueError( - 'Corresponding entries in detection_classes, ' - 'detection_scores and detection_boxes should have ' - 'compatible shapes (i.e., agree on the 0th dimension). ' - 'Classes shape: %d. Boxes shape: %d. ' - 'Scores shape: %d' % - (detection_classes.shape[0], detection_boxes.shape[0], - detection_scores.shape[0])) + "Corresponding entries in detection_classes, " + "detection_scores and detection_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension). " + "Classes shape: %d. Boxes shape: %d. " + "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0]) + ) detections_list = [] for i in range(num_boxes): if detection_classes[i] in category_id_set: - detections_list.append({ - 'image_id': - image_id, - 'category_id': - int(detection_classes[i]), - 'bbox': - list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), - 'score': - float(detection_scores[i]) - }) + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + "score": float(detection_scores[i]), + } + ) return detections_list -def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], - category_id_set: Set[int], - detection_masks: np.array, - detection_scores: np.array, - detection_classes: np.array) -> list: +def ExportSingleImageDetectionMasksToCoco( + image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: """Export detection masks of a single image to COCO format. This function converts detections represented as numpy arrays to dictionaries @@ -685,29 +670,25 @@ def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], lists do not have the correct shapes or (3) if image_ids are not integers. """ if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: - raise ValueError( - 'All entries in detection_classes and detection_scores' - 'expected to be of rank 1.') + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") num_boxes = detection_classes.shape[0] if not num_boxes == len(detection_masks) == detection_scores.shape[0]: - raise ValueError('Corresponding entries in detection_classes, ' - 'detection_scores and detection_masks should have ' - 'compatible lengths and shapes ' - 'Classes length: %d. Masks length: %d. ' - 'Scores length: %d' % - (detection_classes.shape[0], len(detection_masks), - detection_scores.shape[0])) + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_masks should have " + "compatible lengths and shapes " + "Classes length: %d. Masks length: %d. " + "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0]) + ) detections_list = [] for i in range(num_boxes): if detection_classes[i] in category_id_set: - detections_list.append({ - 'image_id': - image_id, - 'category_id': - int(detection_classes[i]), - 'segmentation': - _RleCompress(detection_masks[i]), - 'score': - float(detection_scores[i]) - }) + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "segmentation": _RleCompress(detection_masks[i]), + "score": float(detection_scores[i]), + } + ) return detections_list diff --git a/neural_compressor/metric/evaluate_squad.py b/neural_compressor/metric/evaluate_squad.py index 20fedd74538..1065c63fde1 100644 --- a/neural_compressor/metric/evaluate_squad.py +++ b/neural_compressor/metric/evaluate_squad.py @@ -14,25 +14,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Official evaluation script for v1.1 of the SQuAD dataset. From https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py """ from __future__ import print_function + import sys from collections import Counter + from .f1 import normalize_answer def f1_score(prediction, ground_truth): """Calculate the F1 score of the prediction and the ground_truth. - + Args: prediction: The predicted result. ground_truth: The ground truth. - + Returns: The F1 score of prediction. Float point number. """ @@ -53,12 +54,12 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): For each answer in ground_truths, evaluate the metric of prediction with this answer, and return the max metric. - + Args: metric_fn: The function to calculate the metric. prediction: The prediction result. ground_truths: A list of correct answers. - + Returns: The max metric. Float point number. """ @@ -73,48 +74,44 @@ def exact_match_score(prediction, ground_truth): """Compute the exact match score between prediction and ground truth. Args: - prediction: The result of predictions to be evaluated. + prediction: The result of predictions to be evaluated. ground_truth: The ground truth. Returns: The exact match score. """ - return (normalize_answer(prediction) == normalize_answer(ground_truth)) + return normalize_answer(prediction) == normalize_answer(ground_truth) def evaluate(dataset, predictions): """Evaluate the average F1 score and the exact match score for Question-Answering results. Args: - dataset: The dataset to evaluate the prediction. A list instance of articles. - An article contains a list of paragraphs, a paragraph contains a list of - question-and-answers (qas), and a question-and-answer cantains an id, a question, + dataset: The dataset to evaluate the prediction. A list instance of articles. + An article contains a list of paragraphs, a paragraph contains a list of + question-and-answers (qas), and a question-and-answer cantains an id, a question, and a list of correct answers. For example: predictions: The result of predictions to be evaluated. A dict mapping the id of - a question to the predicted answer of the question. - + a question to the predicted answer of the question. + Returns: The F1 score and the exact match score. - """ f1 = exact_match = total = 0 for article in dataset: - for paragraph in article['paragraphs']: - for qa in paragraph['qas']: + for paragraph in article["paragraphs"]: + for qa in paragraph["qas"]: total += 1 - if qa['id'] not in predictions: - message = 'Unanswered question ' + qa['id'] + \ - ' will receive score 0.' + if qa["id"] not in predictions: + message = "Unanswered question " + qa["id"] + " will receive score 0." print(message, file=sys.stderr) continue - ground_truths = list(map(lambda x: x['text'], qa['answers'])) - prediction = predictions[qa['id']] - exact_match += metric_max_over_ground_truths( - exact_match_score, prediction, ground_truths) - f1 += metric_max_over_ground_truths( - f1_score, prediction, ground_truths) + ground_truths = list(map(lambda x: x["text"], qa["answers"])) + prediction = predictions[qa["id"]] + exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths) + f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths) exact_match = 100.0 * exact_match / total f1 = 100.0 * f1 / total - return {'exact_match': exact_match, 'f1': f1} \ No newline at end of file + return {"exact_match": exact_match, "f1": f1} diff --git a/neural_compressor/metric/f1.py b/neural_compressor/metric/f1.py index d6b0811ae3c..32c4bbfbe8d 100644 --- a/neural_compressor/metric/f1.py +++ b/neural_compressor/metric/f1.py @@ -14,22 +14,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Official evaluation script for v1.1 of the SQuAD dataset. From https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py """ -from collections import Counter, abc -import string import re +import string +from collections import Counter, abc from typing import Any, Callable, Dict, List, TypeVar + from neural_compressor.utils import logger + def normalize_answer(text: str) -> str: """Normalize the answer text. - Lower text, remove punctuation, articles and extra whitespace, + Lower text, remove punctuation, articles and extra whitespace, and replace other whitespace (newline, tab, etc.) to space. Args: @@ -38,16 +39,16 @@ def normalize_answer(text: str) -> str: Returns: The normalized text. """ - + def _remove_articles(text): - return re.sub(r'\b(a|an|the)\b', ' ', text) + return re.sub(r"\b(a|an|the)\b", " ", text) def _white_space_fix(text): - return ' '.join(text.split()) + return " ".join(text.split()) def _remove_punc(text): exclude = set(string.punctuation) - return ''.join(ch for ch in text if ch not in exclude) + return "".join(ch for ch in text if ch not in exclude) def _lower(text): return text.lower() @@ -65,8 +66,9 @@ def f1_score(prediction: abc.Sequence, ground_truth: abc.Sequence): Returns: The F1 score of prediction. Float point number. """ - assert isinstance(prediction, abc.Sequence) and isinstance(ground_truth, abc.Sequence),\ - 'prediction and ground_truth should be Sequence' + assert isinstance(prediction, abc.Sequence) and isinstance( + ground_truth, abc.Sequence + ), "prediction and ground_truth should be Sequence" common = Counter(prediction) & Counter(ground_truth) num_same = sum(common.values()) if num_same == 0: @@ -76,9 +78,13 @@ def f1_score(prediction: abc.Sequence, ground_truth: abc.Sequence): f1 = (2 * precision * recall) / (precision + recall) return f1 -T = TypeVar('T') -def metric_max_over_ground_truths(metric_fn: Callable[[T, T], float], - prediction: str, ground_truths: List[str]) -> float: + +T = TypeVar("T") + + +def metric_max_over_ground_truths( + metric_fn: Callable[[T, T], float], prediction: str, ground_truths: List[str] +) -> float: """Calculate the max metric for each ground truth. For each answer in ground_truths, evaluate the metric of prediction with @@ -100,45 +106,44 @@ def metric_max_over_ground_truths(metric_fn: Callable[[T, T], float], scores_for_ground_truths.append(score) return max(scores_for_ground_truths) + def evaluate(predictions: Dict[str, str], dataset: List[Dict[str, Any]]) -> float: """Evaluate the average F1 score of Question-Answering results. - The F1 score is the harmonic mean of the precision and recall. It can be computed - with the equation: F1 = 2 * (precision * recall) / (precision + recall). - For all question-and-answers in dataset, it evaluates the f1-score + The F1 score is the harmonic mean of the precision and recall. It can be computed + with the equation: F1 = 2 * (precision * recall) / (precision + recall). + For all question-and-answers in dataset, it evaluates the f1-score Args: predictions: The result of predictions to be evaluated. A dict mapping the id of - a question to the predicted answer of the question. - dataset: The dataset to evaluate the prediction. A list instance of articles. - An article contains a list of paragraphs, a paragraph contains a list of - question-and-answers (qas), and a question-and-answer cantains an id, a question, + a question to the predicted answer of the question. + dataset: The dataset to evaluate the prediction. A list instance of articles. + An article contains a list of paragraphs, a paragraph contains a list of + question-and-answers (qas), and a question-and-answer cantains an id, a question, and a list of correct answers. For example: - + [{'paragraphs': [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, ...], 'question': 'Which NFL team represented the AFC at Super Bowl 50?', 'id': '56be4db0acb8001400a502ec'}]}]}] - + Returns: - The F1 score of this prediction. Float point number in forms of a percentage. + The F1 score of this prediction. Float point number in forms of a percentage. """ f1 = total = 0 for article in dataset: - for paragraph in article['paragraphs']: - for qa in paragraph['qas']: + for paragraph in article["paragraphs"]: + for qa in paragraph["qas"]: total += 1 - if qa['id'] not in predictions: - message = 'Unanswered question ' + qa['id'] + \ - ' will receive score 0.' + if qa["id"] not in predictions: + message = "Unanswered question " + qa["id"] + " will receive score 0." logger.warning(message) continue - ground_truths = list(map(lambda x: x['text'], qa['answers'])) - prediction = predictions[qa['id']] + ground_truths = list(map(lambda x: x["text"], qa["answers"])) + prediction = predictions[qa["id"]] - f1 += metric_max_over_ground_truths( - f1_score, prediction, ground_truths) + f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths) f1 = 100.0 * f1 / total return f1 diff --git a/neural_compressor/metric/metric.py b/neural_compressor/metric/metric.py index c0cf6180ca0..e57fdb17e2f 100644 --- a/neural_compressor/metric/metric.py +++ b/neural_compressor/metric/metric.py @@ -14,22 +14,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Compressor metrics.""" - -import numpy as np from abc import abstractmethod from ctypes import Union -from neural_compressor.utils.utility import LazyImport, singleton -from neural_compressor.utils import logger + +import numpy as np from sklearn.metrics import accuracy_score -torch = LazyImport('torch') -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -transformers = LazyImport('transformers') +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport, singleton + +torch = LazyImport("torch") +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +transformers = LazyImport("transformers") + class Metric(object): """A wrapper of the information needed to construct a Metric. @@ -44,7 +45,7 @@ class Metric(object): name (str, optional): Name for metric. Defaults to 'user_metric'. """ - def __init__(self, name='user_metric', metric_cls=None, **kwargs): + def __init__(self, name="user_metric", metric_cls=None, **kwargs): """Initialize a Metric with needed information.""" self.metric_cls = metric_cls self.name = name @@ -90,7 +91,8 @@ class MXNetMetrics(object): def __init__(self) -> None: """Initialize the metrics collection.""" from neural_compressor.adaptor.mxnet_utils.util import check_mx_version - if check_mx_version('2.0.0'): + + if check_mx_version("2.0.0"): import mxnet.gluon.metric as mx_metrics else: import mxnet.metric as mx_metrics @@ -131,17 +133,19 @@ def __init__(self) -> None: self.metrics.update(ONNXRT_IT_METRICS) -framework_metrics = {"tensorflow": TensorflowMetrics, - "tensorflow_itex": TensorflowMetrics, - "keras": TensorflowMetrics, - "mxnet": MXNetMetrics, - "pytorch": PyTorchMetrics, - "pytorch_ipex": PyTorchMetrics, - "pytorch_fx": PyTorchMetrics, - "onnxrt_qlinearops": ONNXRTQLMetrics, - "onnxrt_integerops": ONNXRTITMetrics, - "onnxrt_qdq": ONNXRTQLMetrics, - "onnxruntime": ONNXRTQLMetrics} +framework_metrics = { + "tensorflow": TensorflowMetrics, + "tensorflow_itex": TensorflowMetrics, + "keras": TensorflowMetrics, + "mxnet": MXNetMetrics, + "pytorch": PyTorchMetrics, + "pytorch_ipex": PyTorchMetrics, + "pytorch_fx": PyTorchMetrics, + "onnxrt_qlinearops": ONNXRTQLMetrics, + "onnxrt_integerops": ONNXRTITMetrics, + "onnxrt_qdq": ONNXRTQLMetrics, + "onnxruntime": ONNXRTQLMetrics, +} # user/model specific metrics will be registered here TENSORFLOW_METRICS = {} @@ -152,18 +156,19 @@ def __init__(self) -> None: ONNXRT_QL_METRICS = {} ONNXRT_IT_METRICS = {} -registry_metrics = {"tensorflow": TENSORFLOW_METRICS, - "tensorflow_itex": TENSORFLOW_ITEX_METRICS, - "keras": KERAS_METRICS, - "mxnet": MXNET_METRICS, - "pytorch": PYTORCH_METRICS, - "pytorch_ipex": PYTORCH_METRICS, - "pytorch_fx": PYTORCH_METRICS, - "onnxrt_qlinearops": ONNXRT_QL_METRICS, - "onnxrt_qdq": ONNXRT_QL_METRICS, - "onnxrt_integerops": ONNXRT_IT_METRICS, - "onnxruntime": ONNXRT_QL_METRICS, - } +registry_metrics = { + "tensorflow": TENSORFLOW_METRICS, + "tensorflow_itex": TENSORFLOW_ITEX_METRICS, + "keras": KERAS_METRICS, + "mxnet": MXNET_METRICS, + "pytorch": PYTORCH_METRICS, + "pytorch_ipex": PYTORCH_METRICS, + "pytorch_fx": PYTORCH_METRICS, + "onnxrt_qlinearops": ONNXRT_QL_METRICS, + "onnxrt_qdq": ONNXRT_QL_METRICS, + "onnxrt_integerops": ONNXRT_IT_METRICS, + "onnxruntime": ONNXRT_QL_METRICS, +} class METRICS(object): @@ -179,11 +184,19 @@ def __init__(self, framework: str): Args: framework: The framwork name. """ - assert framework in ("tensorflow", "tensorflow_itex", "keras", - "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", - "onnxrt_qlinearops", "onnxrt_integerops", "mxnet", - "onnxruntime"), \ - "framework support tensorflow pytorch mxnet onnxrt" + assert framework in ( + "tensorflow", + "tensorflow_itex", + "keras", + "pytorch", + "pytorch_ipex", + "pytorch_fx", + "onnxrt_qdq", + "onnxrt_qlinearops", + "onnxrt_integerops", + "mxnet", + "onnxruntime", + ), "framework support tensorflow pytorch mxnet onnxrt" self.metrics = framework_metrics[framework]().metrics def __getitem__(self, metric_type: str): @@ -195,8 +208,7 @@ def __getitem__(self, metric_type: str): Returns: The metric with the specified type. """ - assert metric_type in self.metrics.keys(), "only support metrics in {}".\ - format(self.metrics.keys()) + assert metric_type in self.metrics.keys(), "only support metrics in {}".format(self.metrics.keys()) return self.metrics[metric_type] @@ -207,9 +219,10 @@ def register(self, name, metric_cls) -> None: name: The name of metric. metric_cls: The metric class. """ - assert name not in self.metrics.keys(), 'registered metric name already exists.' + assert name not in self.metrics.keys(), "registered metric name already exists." self.metrics.update({name: metric_cls}) + def metric_registry(metric_type: str, framework: str): """Decorate for registering all Metric subclasses. @@ -225,7 +238,7 @@ def metric_registry(metric_type: str, framework: str): """ def decorator_metric(cls): - for single_framework in [fwk.strip() for fwk in framework.split(',')]: + for single_framework in [fwk.strip() for fwk in framework.split(",")]: assert single_framework in [ "tensorflow", "tensorflow_itex", @@ -238,12 +251,13 @@ def decorator_metric(cls): "pytorch", "pytorch_ipex", "pytorch_fx", - ], "The framework support tensorflow mxnet pytorch onnxrt" + ], "The framework support tensorflow mxnet pytorch onnxrt" if metric_type in registry_metrics[single_framework].keys(): - raise ValueError('Cannot have two metrics with the same name') + raise ValueError("Cannot have two metrics with the same name") registry_metrics[single_framework][metric_type] = cls return cls + return decorator_metric @@ -385,6 +399,7 @@ def result(self): acc_name, acc = self._metric.get() return acc + class WrapONNXRTMetric(BaseMetric): """The wrapper of Metric class for ONNXRT.""" @@ -413,6 +428,7 @@ def result(self): acc_name, acc = self._metric.get() return acc + def _topk_shape_validate(preds, labels): # preds shape can be Nxclass_num or class_num(N=1 by default) # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax @@ -455,16 +471,17 @@ def _topk_shape_validate(preds, labels): class_num = preds.shape[1] label_N = labels.shape[0] - assert label_N == N, 'labels batch size should same with preds' + assert label_N == N, "labels batch size should same with preds" labels = labels.reshape([N, -1]) # one-hot labels will have 2 dimension not equal 1 if labels.shape[1] != 1: labels = labels.argsort()[..., -1:] return preds, labels + def _shape_validate(preds, labels): - assert type(preds) in [int, list, np.ndarray], 'preds must be in int or list, ndarray' - assert type(labels) in [int, list, np.ndarray], 'labels must be in int or list, ndarray' + assert type(preds) in [int, list, np.ndarray], "preds must be in int or list, ndarray" + assert type(labels) in [int, list, np.ndarray], "labels must be in int or list, ndarray" if isinstance(preds, int): preds = [np.array([preds])] elif isinstance(preds[0], int): @@ -477,18 +494,19 @@ def _shape_validate(preds, labels): labels = [np.array(labels)] else: labels = [np.array(label) for label in labels] - for (pred, label) in zip(preds, labels): - assert pred.shape == label.shape, \ - 'Shape mismatch, label shape {} vs pred shape {}'.format(label.shape, pred.shape) + for pred, label in zip(preds, labels): + assert pred.shape == label.shape, "Shape mismatch, label shape {} vs pred shape {}".format( + label.shape, pred.shape + ) return preds, labels -@metric_registry('F1', 'tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("F1", "tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") class F1(BaseMetric): """F1 score of a binary classification problem. - The F1 score is the harmonic mean of the precision and recall. - It can be computed with the equation: + The F1 score is the harmonic mean of the precision and recall. + It can be computed with the equation: F1 = 2 * (precision * recall) / (precision + recall) """ @@ -504,7 +522,8 @@ def update(self, preds, labels): labels: The labels corresponding to the predictions. """ from .f1 import f1_score - if getattr(self, '_hvd', None) is not None: + + if getattr(self, "_hvd", None) is not None: gathered_preds_list = self._hvd.allgather_object(preds) gathered_labels_list = self._hvd.allgather_object(labels) temp_preds_list, temp_labels_list = [], [] @@ -524,6 +543,7 @@ def result(self): """Compute the F1 score.""" return np.array(self._score_list).mean() + def _accuracy_shape_check(preds, labels): """Check and conver the shape of predictions and labels. @@ -541,13 +561,15 @@ def _accuracy_shape_check(preds, labels): if isinstance(labels, int): labels = [labels] labels = np.array(labels) - if len(labels.shape) != len(preds.shape) and len(labels.shape)+1 != len(preds.shape): + if len(labels.shape) != len(preds.shape) and len(labels.shape) + 1 != len(preds.shape): raise ValueError( - 'labels must have shape of (batch_size, ..) and preds must have' - 'shape of (batch_size, num_classes, ...) or (batch_size, ..),' - 'but given {} and {}.'.format(labels.shape, preds.shape)) + "labels must have shape of (batch_size, ..) and preds must have" + "shape of (batch_size, num_classes, ...) or (batch_size, ..)," + "but given {} and {}.".format(labels.shape, preds.shape) + ) return preds, labels + def _accuracy_type_check(preds, labels): """Determine the type of prediction. @@ -558,21 +580,21 @@ def _accuracy_type_check(preds, labels): Returns: update_type: The type of predictions. """ - if len(preds.shape) == len(labels.shape)+1: + if len(preds.shape) == len(labels.shape) + 1: num_classes = preds.shape[1] if num_classes == 1: - update_type = 'binary' + update_type = "binary" else: - update_type = 'multiclass' + update_type = "multiclass" elif len(preds.shape) == len(labels.shape): - if len(preds.shape) == 1 or preds.shape[1] ==1: - update_type = 'binary' + if len(preds.shape) == 1 or preds.shape[1] == 1: + update_type = "binary" else: - update_type = 'multilabel' + update_type = "multilabel" return update_type -@metric_registry('Accuracy', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("Accuracy", "tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops") class Accuracy(BaseMetric): """The Accuracy for the classification tasks. @@ -601,25 +623,25 @@ def update(self, preds, labels, sample_weight=None): """ preds, labels = _accuracy_shape_check(preds, labels) update_type = _accuracy_type_check(preds, labels) - if update_type == 'binary': + if update_type == "binary": self.pred_list.extend(preds) self.label_list.extend(labels) self.sample += labels.shape[0] - elif update_type == 'multiclass': - self.pred_list.extend(np.argmax(preds, axis=1).astype('int32')) + elif update_type == "multiclass": + self.pred_list.extend(np.argmax(preds, axis=1).astype("int32")) self.label_list.extend(labels) self.sample += labels.shape[0] - elif update_type == 'multilabel': - #(N, C, ...) -> (N*..., C) + elif update_type == "multilabel": + # (N, C, ...) -> (N*..., C) num_label = preds.shape[1] last_dim = len(preds.shape) - if last_dim-1 != 1: + if last_dim - 1 != 1: trans_list = [0] trans_list.extend(list(range(2, len(preds.shape)))) trans_list.extend([1]) preds = preds.transpose(trans_list).reshape(-1, num_label) labels = labels.transpose(trans_list).reshape(-1, num_label) - self.sample += preds.shape[0]*preds.shape[1] + self.sample += preds.shape[0] * preds.shape[1] self.pred_list.append(preds) self.label_list.append(labels) @@ -631,25 +653,27 @@ def reset(self): def result(self): """Compute the accuracy.""" - correct_num = np.sum( - np.array(self.pred_list) == np.array(self.label_list)) - if getattr(self, '_hvd', None) is not None: + correct_num = np.sum(np.array(self.pred_list) == np.array(self.label_list)) + if getattr(self, "_hvd", None) is not None: allghter_correct_num = sum(self._hvd.allgather_object(correct_num)) allgather_sample = sum(self._hvd.allgather_object(self.sample)) return allghter_correct_num / allgather_sample return correct_num / self.sample -class PyTorchLoss(): +class PyTorchLoss: """A dummy PyTorch Metric. A dummy metric that computes the average of predictions and prints it directly. """ def __init__(self): - """Initialize the number of examples, sum of prediction. and device.""" + """Initialize the number of examples, sum of prediction. + + and device. + """ self._num_examples = 0 - self._device = torch.device('cpu') + self._device = torch.device("cpu") self._sum = torch.tensor(0.0, device=self._device) def reset(self): @@ -678,12 +702,14 @@ def compute(self): The dummy loss. """ if self._num_examples == 0: - raise ValueError("Loss must have at least one example \ - before it can be computed.") + raise ValueError( + "Loss must have at least one example \ + before it can be computed." + ) return self._sum.item() / self._num_examples - - -@metric_registry('Loss', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') + + +@metric_registry("Loss", "tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops") class Loss(BaseMetric): """A dummy Metric. @@ -722,32 +748,32 @@ def result(self): Returns: The dummy loss. """ - if getattr(self, '_hvd', None) is not None: + if getattr(self, "_hvd", None) is not None: allgather_sum = sum(self._hvd.allgather_object(self.sum)) allgather_sample = sum(self._hvd.allgather_object(self.sample)) return allgather_sum / allgather_sample return self.sum / self.sample -@metric_registry('MAE', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("MAE", "tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops") class MAE(BaseMetric): """Computes Mean Absolute Error (MAE) loss. - - Mean Absolute Error (MAE) is the mean of the magnitude of + + Mean Absolute Error (MAE) is the mean of the magnitude of difference between the predicted and actual numeric values. - + Attributes: pred_list: List of prediction to score. label_list: List of references corresponding to the prediction result. - compare_label (bool): Whether to compare label. False if there are no + compare_label (bool): Whether to compare label. False if there are no labels and will use FP32 preds as labels. """ - + def __init__(self, compare_label=True): """Initialize the list of prediction and labels. Args: - compare_label: Whether to compare label. False if there are no + compare_label: Whether to compare label. False if there are no labels and will use FP32 preds as labels. """ self.label_list = [] @@ -777,23 +803,22 @@ def result(self): Returns: The MAE score. """ - aes = [abs(a-b) for (a,b) in zip(self.label_list, self.pred_list)] + aes = [abs(a - b) for (a, b) in zip(self.label_list, self.pred_list)] aes_sum = sum([np.sum(ae) for ae in aes]) aes_size = sum([ae.size for ae in aes]) assert aes_size, "predictions shouldn't be none" - if getattr(self, '_hvd', None) is not None: + if getattr(self, "_hvd", None) is not None: aes_sum = sum(self._hvd.allgather_object(aes_sum)) - aes_size = sum(self._hvd.allgather_object(aes_size)) + aes_size = sum(self._hvd.allgather_object(aes_size)) return aes_sum / aes_size -@metric_registry('RMSE', 'tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("RMSE", "tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") class RMSE(BaseMetric): """Computes Root Mean Squared Error (RMSE) loss. Attributes: mse: The instance of MSE Metric. - """ def __init__(self, compare_label=True): @@ -825,13 +850,12 @@ def result(self): Returns: The RMSE score. """ - if getattr(self, '_hvd', None) is not None: + if getattr(self, "_hvd", None) is not None: self.mse._hvd = self._hvd return np.sqrt(self.mse.result()) - -@metric_registry('MSE', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("MSE", "tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops") class MSE(BaseMetric): """Computes Mean Squared Error (MSE) loss. @@ -850,7 +874,7 @@ def __init__(self, compare_label=True): """Initialize the list of prediction and labels. Args: - compare_label: Whether to compare label. False if there are no + compare_label: Whether to compare label. False if there are no labels and will use FP32 preds as labels. """ self.label_list = [] @@ -880,17 +904,17 @@ def result(self): Returns: The MSE score. """ - squares = [(a-b)**2.0 for (a,b) in zip(self.label_list, self.pred_list)] + squares = [(a - b) ** 2.0 for (a, b) in zip(self.label_list, self.pred_list)] squares_sum = sum([np.sum(square) for square in squares]) squares_size = sum([square.size for square in squares]) assert squares_size, "predictions should't be None" - if getattr(self, '_hvd', None) is not None: + if getattr(self, "_hvd", None) is not None: squares_sum = sum(self._hvd.allgather_object(squares_sum)) - squares_size = sum(self._hvd.allgather_object(squares_size)) + squares_size = sum(self._hvd.allgather_object(squares_size)) return squares_sum / squares_size -@metric_registry('topk', 'tensorflow, tensorflow_itex') +@metric_registry("topk", "tensorflow, tensorflow_itex") class TensorflowTopK(BaseMetric): """Compute Top-k Accuracy classification score for Tensorflow model. @@ -922,25 +946,25 @@ def update(self, preds, labels, sample_weight=None): sample_weight: The sample weight. """ # extract the contents from tf.Tensor - if not isinstance(labels, int) and len(labels) > 0 \ - and isinstance(labels[0], tf.Tensor): + if not isinstance(labels, int) and len(labels) > 0 and isinstance(labels[0], tf.Tensor): temp_labels = [] for label_tensor in labels: label_contents = label_tensor.numpy() temp_labels.append(label_contents) - labels = temp_labels - + labels = temp_labels + preds, labels = _topk_shape_validate(preds, labels) labels = labels.reshape([len(labels)]) with tf.Graph().as_default() as acc_graph: - topk = tf.nn.in_top_k(predictions=tf.constant(preds, dtype=tf.float32), - targets=tf.constant(labels, dtype=tf.int32), k=self.k) - fp32_topk = tf.cast(topk, tf.float32) - correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) - with tf.compat.v1.Session() as acc_sess: - correct = acc_sess.run(correct_tensor) + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) self.num_sample += len(labels) self.num_correct += correct @@ -959,14 +983,14 @@ def result(self): if self.num_sample == 0: logger.warning("Sample num during evaluation is 0.") return 0 - elif getattr(self, '_hvd', None) is not None: + elif getattr(self, "_hvd", None) is not None: allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) - return allgather_num_correct / allgather_num_sample + return allgather_num_correct / allgather_num_sample return self.num_correct / self.num_sample -@metric_registry('topk', 'pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("topk", "pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops") class GeneralTopK(BaseMetric): """Compute Top-k Accuracy classification score. @@ -998,7 +1022,7 @@ def update(self, preds, labels, sample_weight=None): sample_weight: The sample weight. """ preds, labels = _topk_shape_validate(preds, labels) - preds = preds.argsort()[..., -self.k:] + preds = preds.argsort()[..., -self.k :] if self.k == 1: correct = accuracy_score(preds, labels, normalize=False) self.num_correct += correct @@ -1007,7 +1031,7 @@ def update(self, preds, labels, sample_weight=None): for p, l in zip(preds, labels): # get top-k labels with np.argpartition # p = np.argpartition(p, -self.k)[-self.k:] - l = l.astype('int32') + l = l.astype("int32") if l in p: self.num_correct += 1 @@ -1027,23 +1051,25 @@ def result(self): if self.num_sample == 0: logger.warning("Sample num during evaluation is 0.") return 0 - elif getattr(self, '_hvd', None) is not None: + elif getattr(self, "_hvd", None) is not None: allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) return allgather_num_correct / allgather_num_sample return self.num_correct / self.num_sample -@metric_registry('COCOmAPv2', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') +@metric_registry("COCOmAPv2", "tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops") class COCOmAPv2(BaseMetric): """Compute mean average precision of the detection task.""" - def __init__(self, - anno_path=None, - iou_thrs='0.5:0.05:0.95', - map_points=101, - map_key='DetectionBoxes_Precision/mAP', - output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + def __init__( + self, + anno_path=None, + iou_thrs="0.5:0.05:0.95", + map_points=101, + map_key="DetectionBoxes_Precision/mAP", + output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2}, + ): """Initialize the metric. Args: @@ -1051,22 +1077,25 @@ def __init__(self, iou_thrs: Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. - map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. - map_key: The key that mapping to pycocotools COCOeval. + map_key: The key that mapping to pycocotools COCOeval. Defaults to 'DetectionBoxes_Precision/mAP'. - output_index_mapping: The output index mapping. + output_index_mapping: The output index mapping. Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. """ self.output_index_mapping = output_index_mapping from .coco_label_map import category_map + if anno_path: import os + import yaml - assert os.path.exists(anno_path), 'Annotation path does not exists!' - with open(anno_path, 'r') as f: + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: label_map = yaml.safe_load(f.read()) - self.category_map_reverse = {k: v for k,v in label_map.items()} + self.category_map_reverse = {k: v for k, v in label_map.items()} else: # label: index self.category_map_reverse = {v: k for k, v in category_map.items()} @@ -1075,8 +1104,7 @@ def __init__(self, self.detection_list = [] self.annotation_id = 1 self.category_map = category_map - self.category_id_set = set( - [cat for cat in self.category_map]) #index + self.category_id_set = set([cat for cat in self.category_map]) # index self.iou_thrs = iou_thrs self.map_points = map_points self.map_key = map_key @@ -1089,69 +1117,65 @@ def update(self, predicts, labels, sample_weight=None): labels: The labels corresponding to the predictions. sample_weight: The sample weight. Defaults to None. """ - from .coco_tools import ExportSingleImageGroundtruthToCoco,\ - ExportSingleImageDetectionBoxesToCoco + from .coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + detections = [] - if 'num_detections' in self.output_index_mapping and \ - self.output_index_mapping['num_detections'] > -1: + if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1: for item in zip(*predicts): detection = {} - num = int(item[self.output_index_mapping['num_detections']]) - detection['boxes'] = np.asarray( - item[self.output_index_mapping['boxes']])[0:num] - detection['scores'] = np.asarray( - item[self.output_index_mapping['scores']])[0:num] - detection['classes'] = np.asarray( - item[self.output_index_mapping['classes']])[0:num] + num = int(item[self.output_index_mapping["num_detections"]]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num] + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num] + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num] detections.append(detection) else: for item in zip(*predicts): detection = {} - detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) - detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) - detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]]) + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]]) + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]]) detections.append(detection) - bboxes, str_labels,int_labels, image_ids = labels + bboxes, str_labels, int_labels, image_ids = labels labels = [] if len(int_labels[0]) == 0: for str_label in str_labels: - str_label = [ - x if type(x) == 'str' else x.decode('utf-8') - for x in str_label - ] + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] labels.append([self.category_map_reverse[x] for x in str_label]) elif len(str_labels[0]) == 0: for int_label in int_labels: labels.append([x for x in int_label]) for idx, image_id in enumerate(image_ids): - image_id = image_id if type( - image_id) == 'str' else image_id.decode('utf-8') + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") if image_id in self.image_ids: continue self.image_ids.append(image_id) ground_truth = {} - ground_truth['boxes'] = np.asarray(bboxes[idx]) - ground_truth['classes'] = np.asarray(labels[idx]) + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) self.ground_truth_list.extend( ExportSingleImageGroundtruthToCoco( image_id=image_id, next_annotation_id=self.annotation_id, category_id_set=self.category_id_set, - groundtruth_boxes=ground_truth['boxes'], - groundtruth_classes=ground_truth['classes'])) - self.annotation_id += ground_truth['boxes'].shape[0] + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] self.detection_list.extend( ExportSingleImageDetectionBoxesToCoco( image_id=image_id, category_id_set=self.category_id_set, - detection_boxes=detections[idx]['boxes'], - detection_scores=detections[idx]['scores'], - detection_classes=detections[idx]['classes'])) + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) def reset(self): """Reset the prediction and labels.""" @@ -1166,69 +1190,63 @@ def result(self): Returns: The mean average precision score. """ - from .coco_tools import COCOWrapper, COCOEvalWrapper + from .coco_tools import COCOEvalWrapper, COCOWrapper + if len(self.ground_truth_list) == 0: logger.warning("Sample num during evaluation is 0.") return 0 else: groundtruth_dict = { - 'annotations': - self.ground_truth_list, - 'images': [{ - 'id': image_id - } for image_id in self.image_ids], - 'categories': [{ - 'id': k, - 'name': v - } for k, v in self.category_map.items()] + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], } coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) - coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( - self.detection_list) - box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, - coco_wrapped_detections, - agnostic_mode=False, - iou_thrs = self.iou_thrs, - map_points = self.map_points) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( - include_metrics_per_category=False, all_metrics_per_category=False) + include_metrics_per_category=False, all_metrics_per_category=False + ) box_metrics.update(box_per_category_ap) - box_metrics = { - 'DetectionBoxes_' + key: value - for key, value in iter(box_metrics.items()) - } + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} return box_metrics[self.map_key] -@metric_registry('mAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("mAP", "tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops") class TensorflowMAP(BaseMetric): """Computes mean average precision.""" - - def __init__(self, - anno_path=None, - iou_thrs=0.5, - map_points=0, - map_key='DetectionBoxes_Precision/mAP'): + + def __init__(self, anno_path=None, iou_thrs=0.5, map_points=0, map_key="DetectionBoxes_Precision/mAP"): """Initialize the metric. - + Args: anno_path: The path of annotation file. iou_thrs: Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. - map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. - map_key: The key that mapping to pycocotools COCOeval. + map_key: The key that mapping to pycocotools COCOeval. Defaults to 'DetectionBoxes_Precision/mAP'. """ from .coco_label_map import category_map + if anno_path: import os + import yaml - assert os.path.exists(anno_path), 'Annotation path does not exists!' - with open(anno_path, 'r') as f: + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: label_map = yaml.safe_load(f.read()) - self.category_map_reverse = {k: v for k,v in label_map.items()} + self.category_map_reverse = {k: v for k, v in label_map.items()} else: # label: index self.category_map_reverse = {v: k for k, v in category_map.items()} @@ -1237,13 +1255,11 @@ def __init__(self, self.detection_list = [] self.annotation_id = 1 self.category_map = category_map - self.category_id_set = set( - [cat for cat in self.category_map]) #index + self.category_id_set = set([cat for cat in self.category_map]) # index self.iou_thrs = iou_thrs self.map_points = map_points self.map_key = map_key - def update(self, predicts, labels, sample_weight=None): """Add the predictions and labels. @@ -1252,70 +1268,70 @@ def update(self, predicts, labels, sample_weight=None): labels: The labels corresponding to the predictions. sample_weight: The sample weight. """ - if getattr(self, '_hvd', None) is not None: + if getattr(self, "_hvd", None) is not None: raise NotImplementedError("Metric TensorflowMAP currently do not support distribued inference.") - from .coco_tools import ExportSingleImageGroundtruthToCoco,\ - ExportSingleImageDetectionBoxesToCoco + from .coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + detections = [] if len(predicts) == 3: for bbox, score, cls in zip(*predicts): detection = {} - detection['boxes'] = np.asarray(bbox) - detection['scores'] = np.asarray(score) - detection['classes'] = np.asarray(cls) + detection["boxes"] = np.asarray(bbox) + detection["scores"] = np.asarray(score) + detection["classes"] = np.asarray(cls) detections.append(detection) elif len(predicts) == 4: for num, bbox, score, cls in zip(*predicts): detection = {} num = int(num) - detection['boxes'] = np.asarray(bbox)[0:num] - detection['scores'] = np.asarray(score)[0:num] - detection['classes'] = np.asarray(cls)[0:num] + detection["boxes"] = np.asarray(bbox)[0:num] + detection["scores"] = np.asarray(score)[0:num] + detection["classes"] = np.asarray(cls)[0:num] detections.append(detection) else: raise ValueError("Unsupported prediction format!") - bboxes, str_labels,int_labels, image_ids = labels + bboxes, str_labels, int_labels, image_ids = labels labels = [] if len(int_labels[0]) == 0: for str_label in str_labels: - str_label = [ - x if type(x) == 'str' else x.decode('utf-8') - for x in str_label - ] + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] labels.append([self.category_map_reverse[x] for x in str_label]) elif len(str_labels[0]) == 0: for int_label in int_labels: labels.append([x for x in int_label]) for idx, image_id in enumerate(image_ids): - image_id = image_id if type( - image_id) == 'str' else image_id.decode('utf-8') + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") if image_id in self.image_ids: continue self.image_ids.append(image_id) ground_truth = {} - ground_truth['boxes'] = np.asarray(bboxes[idx]) - ground_truth['classes'] = np.asarray(labels[idx]) + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) self.ground_truth_list.extend( ExportSingleImageGroundtruthToCoco( image_id=image_id, next_annotation_id=self.annotation_id, category_id_set=self.category_id_set, - groundtruth_boxes=ground_truth['boxes'], - groundtruth_classes=ground_truth['classes'])) - self.annotation_id += ground_truth['boxes'].shape[0] + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] self.detection_list.extend( ExportSingleImageDetectionBoxesToCoco( image_id=image_id, category_id_set=self.category_id_set, - detection_boxes=detections[idx]['boxes'], - detection_scores=detections[idx]['scores'], - detection_classes=detections[idx]['classes'])) + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) def reset(self): """Reset the prediction and labels.""" @@ -1330,49 +1346,40 @@ def result(self): Returns: The mean average precision score. """ - from .coco_tools import COCOWrapper, COCOEvalWrapper + from .coco_tools import COCOEvalWrapper, COCOWrapper + if len(self.ground_truth_list) == 0: logger.warning("Sample num during evaluation is 0.") return 0 else: groundtruth_dict = { - 'annotations': - self.ground_truth_list, - 'images': [{ - 'id': image_id - } for image_id in self.image_ids], - 'categories': [{ - 'id': k, - 'name': v - } for k, v in self.category_map.items()] + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], } coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) - coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( - self.detection_list) - box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, - coco_wrapped_detections, - agnostic_mode=False, - iou_thrs = self.iou_thrs, - map_points = self.map_points) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( - include_metrics_per_category=False, all_metrics_per_category=False) + include_metrics_per_category=False, all_metrics_per_category=False + ) box_metrics.update(box_per_category_ap) - box_metrics = { - 'DetectionBoxes_' + key: value - for key, value in iter(box_metrics.items()) - } + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} return box_metrics[self.map_key] -@metric_registry('COCOmAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("COCOmAP", "tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops") class TensorflowCOCOMAP(TensorflowMAP): """Computes mean average precision using algorithm in COCO.""" - - def __init__(self, - anno_path=None, - iou_thrs=None, - map_points=None, - map_key='DetectionBoxes_Precision/mAP'): + + def __init__(self, anno_path=None, iou_thrs=None, map_points=None, map_key="DetectionBoxes_Precision/mAP"): """Initialize the iou threshold and max points. Args: @@ -1380,24 +1387,21 @@ def __init__(self, iou_thrs: Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. - map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. - map_key: The key that mapping to pycocotools COCOeval. + map_key: The key that mapping to pycocotools COCOeval. Defaults to 'DetectionBoxes_Precision/mAP'. """ super(TensorflowCOCOMAP, self).__init__(anno_path, iou_thrs, map_points, map_key) - self.iou_thrs = '0.5:0.05:0.95' + self.iou_thrs = "0.5:0.05:0.95" self.map_points = 101 -@metric_registry('VOCmAP', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("VOCmAP", "tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops") class TensorflowVOCMAP(TensorflowMAP): """Computes mean average precision using algorithm in VOC.""" - - def __init__(self, - anno_path=None, - iou_thrs=None, - map_points=None, - map_key='DetectionBoxes_Precision/mAP'): + + def __init__(self, anno_path=None, iou_thrs=None, map_points=None, map_key="DetectionBoxes_Precision/mAP"): """Initialize the iou threshold and max points. Args: @@ -1405,9 +1409,9 @@ def __init__(self, iou_thrs: Minimal value for intersection over union that allows to make decision that prediction bounding box is true positive. You can specify one float value between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. - map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 11-point interpolated AP, 0 for area under PR curve. - map_key: The key that mapping to pycocotools COCOeval. + map_key: The key that mapping to pycocotools COCOeval. Defaults to 'DetectionBoxes_Precision/mAP'. """ super(TensorflowVOCMAP, self).__init__(anno_path, iou_thrs, map_points, map_key) @@ -1415,14 +1419,14 @@ def __init__(self, self.map_points = 0 -@metric_registry('SquadF1', 'tensorflow, tensorflow_itex') +@metric_registry("SquadF1", "tensorflow, tensorflow_itex") class SquadF1(BaseMetric): """Evaluate for v1.1 of the SQuAD dataset.""" - + def __init__(self): """Initialize the score list.""" - self._score_list = [] # squad metric only work when all data preds collected - + self._score_list = [] # squad metric only work when all data preds collected + def update(self, preds, labels, sample_weight=None): """Add the predictions and labels. @@ -1433,7 +1437,8 @@ def update(self, preds, labels, sample_weight=None): """ if preds: from .evaluate_squad import evaluate - if getattr(self, '_hvd', None) is not None: + + if getattr(self, "_hvd", None) is not None: gathered_preds_list = self._hvd.allgather_object(preds) gathered_labels_list = self._hvd.allgather_object(labels) temp_preds_list, temp_labels_list = [], [] @@ -1444,21 +1449,22 @@ def update(self, preds, labels, sample_weight=None): labels = temp_labels_list result = evaluate(labels, preds) self._score_list.append(result["f1"]) - + def reset(self): - """Reset the score list.""" - self._score_list = [] - + """Reset the score list.""" + self._score_list = [] + def result(self): """Compute F1 score.""" if len(self._score_list) == 0: - return 0. + return 0.0 return np.array(self._score_list).mean() - -@metric_registry('mIOU', 'tensorflow, tensorflow_itex') + + +@metric_registry("mIOU", "tensorflow, tensorflow_itex") class mIOU(BaseMetric): """Compute the mean IOU(Intersection over Union) score.""" - + def __init__(self, num_classes=21): """Initialize the number of classes. @@ -1479,19 +1485,18 @@ def update(self, preds, labels): labels = labels.flatten() p_dtype = preds.dtype l_dtype = labels.dtype - if getattr(self, '_hvd', None) is not None: + if getattr(self, "_hvd", None) is not None: preds = self._hvd.allgather_object(preds) labels = self._hvd.allgather_object(labels) - preds_list, labels_list = np.array([], dtype = p_dtype), np.array([], dtype = l_dtype) + preds_list, labels_list = np.array([], dtype=p_dtype), np.array([], dtype=l_dtype) for i in range(self._hvd.size()): preds_list = np.append(preds_list, preds[i]) labels_list = np.append(labels_list, labels[i]) preds, labels = preds_list, labels_list mask = (labels >= 0) & (labels < self.num_classes) self.hist += np.bincount( - self.num_classes * labels[mask].astype(int) + - preds[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, - self.num_classes) + self.num_classes * labels[mask].astype(int) + preds[mask], minlength=self.num_classes**2 + ).reshape(self.num_classes, self.num_classes) def reset(self): """Reset the hist.""" @@ -1503,24 +1508,23 @@ def result(self): Returns: The mean IOU score. """ - iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - - np.diag(self.hist)) + iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist)) mean_iu = np.nanmean(iu) return mean_iu -@metric_registry('GLUE', 'onnxrt_qlinearops, onnxrt_integerops') + +@metric_registry("GLUE", "onnxrt_qlinearops, onnxrt_integerops") class ONNXRTGLUE(BaseMetric): """Compute the GLUE score.""" - - def __init__(self, task='mrpc'): + + def __init__(self, task="mrpc"): """Initialize the metric. Args: task:The name of the task (Choices: mrpc, qqp, qnli, rte, sts-b, cola, mnli, wnli.). """ - assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", "mnli", "wnli", "sst-2"], "Unsupported task type" self.pred_list = None self.label_list = None self.task = task @@ -1533,7 +1537,7 @@ def __init__(self, task='mrpc'): "qnli": "acc", "rte": "acc", "wnli": "acc", - "sst-2": "acc" + "sst-2": "acc", } def update(self, preds, labels): @@ -1543,7 +1547,7 @@ def update(self, preds, labels): preds: The predictions. labels: The labels corresponding to the predictions. """ - if getattr(self, '_hvd', None) is not None: + if getattr(self, "_hvd", None) is not None: raise NotImplementedError("Metric ONNXRTGLUE currently do not support distribued inference.") if isinstance(preds, list) and len(preds) == 1: preds = preds[0] @@ -1569,21 +1573,21 @@ def result(self): processed_preds = np.argmax(self.pred_list, axis=1) elif output_mode == "regression": processed_preds = np.squeeze(self.pred_list) - result = transformers.glue_compute_metrics(\ - self.task, processed_preds, self.label_list) + result = transformers.glue_compute_metrics(self.task, processed_preds, self.label_list) return result[self.return_key[self.task]] -@metric_registry('ROC', 'pytorch') + +@metric_registry("ROC", "pytorch") class ROC(BaseMetric): """Computes ROC score.""" - - def __init__(self, task='dlrm'): + + def __init__(self, task="dlrm"): """Initialize the metric. Args: task:The name of the task (Choices: dlrm, dien, wide_deep.). """ - assert task in ['dlrm', 'dien', 'wide_deep'], 'Unsupported task type' + assert task in ["dlrm", "dien", "wide_deep"], "Unsupported task type" self.pred_list = None self.label_list = None self.task = task @@ -1619,6 +1623,7 @@ def reset(self): def result(self): """Compute the ROC score.""" import sklearn.metrics + scores = np.squeeze(self.pred_list) targets = np.squeeze(self.label_list) roc_auc = sklearn.metrics.roc_auc_score(targets, scores) @@ -1627,7 +1632,7 @@ def result(self): def register_customer_metric(user_metric, framework): - """register customer metric class or a dict of built-in metric configures. + """Register customer metric class or a dict of built-in metric configures. 1. neural_compressor have many built-in metrics, user can pass a metric configure dict to tell neural compressor what metric will be use. @@ -1654,7 +1659,6 @@ def register_customer_metric(user_metric, framework): The object of Metric or a dict of built-in metric configurations. framework: framework, such as: tensorflow, pytorch...... - """ if isinstance(user_metric, dict): metric_cfg = user_metric @@ -1670,11 +1674,10 @@ def register_customer_metric(user_metric, framework): metric_cls = user_metric.metric_cls metric_cfg = {name: {**user_metric.kwargs}} else: - for i in ['reset', 'update', 'result']: - assert hasattr(user_metric, i), 'Please realise {} function' \ - 'in user defined metric'.format(i) + for i in ["reset", "update", "result"]: + assert hasattr(user_metric, i), "Please realise {} function" "in user defined metric".format(i) metric_cls = type(user_metric).__name__ - name = 'user_' + metric_cls + name = "user_" + metric_cls metric_cfg = {name: id(user_metric)} metrics = METRICS(framework) metrics.register(name, metric_cls) diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 2444466f691..1036118741d 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -16,27 +16,24 @@ # limitations under the License. """Mix Precision for Neural Compressor.""" import os -import sys import pickle -import numpy as np import random +import sys + +import numpy as np from neural_compressor.data import check_dataloader from neural_compressor.metric import register_customer_metric + from .config import _Config, options from .model import Model from .strategy import STRATEGIES from .utils import alias_param, logger -from .utils.utility import time_limit, CpuInfo +from .utils.utility import CpuInfo, time_limit -@alias_param("conf", param_alias='config') -def fit(model, - conf, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - **kwargs): +@alias_param("conf", param_alias="config") +def fit(model, conf, eval_func=None, eval_dataloader=None, eval_metric=None, **kwargs): """Fit low precision model generation across multiple framework backends. Args: @@ -90,32 +87,39 @@ def fit(model, check_dataloader(eval_dataloader) if conf.precisions in conf.excluded_precisions: - logger.warning("Target precision is in excluded_precisions, " - "please modify precision or excluded_precisions to make it understandable.") + logger.warning( + "Target precision is in excluded_precisions, " + "please modify precision or excluded_precisions to make it understandable." + ) sys.exit(0) wrapped_model = Model(model, conf=conf) precisions = list(set(conf.precisions) - set(conf.excluded_precisions)) - if ('bf16' in precisions or 'fp16' in precisions) and conf.framework == "onnxruntime": # pragma: no cover - if 'fp16' in precisions and not (conf.device == "gpu" and conf.backend == "onnxrt_cuda_ep"): - logger.warning("Mix precision exits due to fp16 for onnx models" \ - "needs 'gpu' device and 'onnxrt_cuda_ep' backend.") + if ("bf16" in precisions or "fp16" in precisions) and conf.framework == "onnxruntime": # pragma: no cover + if "fp16" in precisions and not (conf.device == "gpu" and conf.backend == "onnxrt_cuda_ep"): + logger.warning( + "Mix precision exits due to fp16 for onnx models" "needs 'gpu' device and 'onnxrt_cuda_ep' backend." + ) sys.exit(0) - elif 'bf16' in precisions and (not (conf.backend == "onnxrt_cuda_ep" and conf.device == "gpu") and \ - not (conf.backend == "onnxrt_dnnl_ep" and conf.device == "cpu")): - logger.warning("Mix precision exits due to bf16 for onnx models needs " \ - "'gpu' device and 'onnxrt_cuda_ep' backend, or 'cpu' device and 'onnxrt_dnnl_ep' backend.") + elif "bf16" in precisions and ( + not (conf.backend == "onnxrt_cuda_ep" and conf.device == "gpu") + and not (conf.backend == "onnxrt_dnnl_ep" and conf.device == "cpu") + ): + logger.warning( + "Mix precision exits due to bf16 for onnx models needs " + "'gpu' device and 'onnxrt_cuda_ep' backend, or 'cpu' device and 'onnxrt_dnnl_ep' backend." + ) sys.exit(0) - elif 'bf16' in precisions and not CpuInfo().bf16 and conf.framework != "onnxruntime": # pragma: no cover - if os.getenv('FORCE_BF16') == '1': - logger.warning("Mix precision will generate bf16 graph although " - "the hardware doesn't support bf16 instruction.") + elif "bf16" in precisions and not CpuInfo().bf16 and conf.framework != "onnxruntime": # pragma: no cover + if os.getenv("FORCE_BF16") == "1": + logger.warning( + "Mix precision will generate bf16 graph although " "the hardware doesn't support bf16 instruction." + ) else: - logger.warning("Mix precision exits due to the hardware " - "doesn't support bf16 instruction.") + logger.warning("Mix precision exits due to the hardware " "doesn't support bf16 instruction.") sys.exit(0) - elif 'fp16' in precisions and conf.framework != "onnxruntime": + elif "fp16" in precisions and conf.framework != "onnxruntime": logger.warning("Currently mix precision only supports fp16 for onnx models.") sys.exit(0) @@ -124,12 +128,7 @@ def fit(model, else: metric = None - config = _Config(mixed_precision=conf, - quantization=None, - benchmark=None, - pruning=None, - distillation=None, - nas=None) + config = _Config(mixed_precision=conf, quantization=None, benchmark=None, pruning=None, distillation=None, nas=None) seed = options.random_seed random.seed(seed) np.random.seed(seed) @@ -137,22 +136,23 @@ def fit(model, _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - resume_file = os.path.abspath(os.path.expanduser( - options.resume_from)) if options.workspace and options.resume_from else None + resume_file = ( + os.path.abspath(os.path.expanduser(options.resume_from)) if options.workspace and options.resume_from else None + ) if resume_file: - assert os.path.exists(resume_file), \ - "The specified resume file {} doesn't exist!".format(resume_file) - with open(resume_file, 'rb') as f: + assert os.path.exists(resume_file), "The specified resume file {} doesn't exist!".format(resume_file) + with open(resume_file, "rb") as f: _resume = pickle.load(f).__dict__ - strategy = STRATEGIES['automixedprecision']( + strategy = STRATEGIES["automixedprecision"]( model=wrapped_model, conf=config, eval_func=eval_func, eval_dataloader=eval_dataloader, eval_metric=metric, resume=_resume, - q_hooks=None) + q_hooks=None, + ) try: with time_limit(conf.tuning_criterion.timeout): @@ -162,16 +162,18 @@ def fit(model, except Exception as e: logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) import traceback + traceback.print_exc() finally: if strategy.best_qmodel: logger.info( - "Specified timeout or max trials is reached! " - "Found a quantized model which meet accuracy goal. Exit.") + "Specified timeout or max trials is reached! " "Found a quantized model which meet accuracy goal. Exit." + ) strategy.deploy_config() else: logger.error( "Specified timeout or max trials is reached! " - "Not found any quantized model which meet accuracy goal. Exit.") + "Not found any quantized model which meet accuracy goal. Exit." + ) return strategy.best_qmodel diff --git a/neural_compressor/model/__init__.py b/neural_compressor/model/__init__.py index 7dfe4843432..b98704108e9 100644 --- a/neural_compressor/model/__init__.py +++ b/neural_compressor/model/__init__.py @@ -14,12 +14,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Built-in model for multiple framework backends.""" from .model import MODELS, Model from .base_model import BaseModel __all__ = ["MODELS", "Model", "BaseModel"] - - diff --git a/neural_compressor/model/base_model.py b/neural_compressor/model/base_model.py index a29874a0ba2..b42b3a4e481 100644 --- a/neural_compressor/model/base_model.py +++ b/neural_compressor/model/base_model.py @@ -14,11 +14,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Base model for multiple framework backends.""" from abc import abstractmethod + class BaseModel: """Base class of all neural_compressor.model, will play graph role.""" @@ -26,10 +26,10 @@ def __init__(self, model, **kwargs): """Initialize a BaseModel. Args: - model (object): raw model format. For Tensorflow model, could be path to frozen pb file, + model (object): raw model format. For Tensorflow model, could be path to frozen pb file, path to ckpt or savedmodel folder, loaded estimator/graph_def/graph/keras model object. - For PyTorch model, it's torch.nn.model instance. For MXNet model, it's mxnet.symbol.Symbol - or gluon.HybirdBlock instance. For ONNX model, it's path to onnx model or loaded ModelProto + For PyTorch model, it's torch.nn.model instance. For MXNet model, it's mxnet.symbol.Symbol + or gluon.HybirdBlock instance. For ONNX model, it's path to onnx model or loaded ModelProto model object. """ self.component = None diff --git a/neural_compressor/model/keras_model.py b/neural_compressor/model/keras_model.py index f320f610026..01c981c9355 100644 --- a/neural_compressor/model/keras_model.py +++ b/neural_compressor/model/keras_model.py @@ -14,14 +14,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Class for Keras model.""" import os from abc import abstractmethod + from neural_compressor.model.base_model import BaseModel from neural_compressor.utils.utility import LazyImport, compute_sparsity -tf = LazyImport('tensorflow') + +tf = LazyImport("tensorflow") + class KerasModel(BaseModel): """Build Keras model.""" @@ -58,7 +60,7 @@ def model(self): @property def graph_info(self): """Return graph info.""" - #(TODO) get the graph info + # (TODO) get the graph info return None @abstractmethod @@ -77,7 +79,7 @@ def _export( @abstractmethod def framework(self): """Return framework.""" - return 'keras' + return "keras" def get_all_weight_names(self): """Get weight names of model. @@ -98,11 +100,12 @@ def report_sparsity(self): df (DataFrame): DataFrame of sparsity of each weight. total_sparsity (float): total sparsity of model. """ + import numpy as np import pandas as pd import tensorflow as tf - import numpy as np - df = pd.DataFrame(columns=['Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)', "Sparsity(%)"]) - pd.set_option('display.precision', 2) + + df = pd.DataFrame(columns=["Name", "Shape", "NNZ (dense)", "NNZ (sparse)", "Sparsity(%)"]) + pd.set_option("display.precision", 2) param_dims = [2, 4] params_size = 0 sparse_params_size = 0 @@ -113,27 +116,27 @@ def report_sparsity(self): # as its "type" weights = layer.get_weights()[0] if weights.ndim in param_dims: - param_size, sparse_param_size, dense_param_size = compute_sparsity( - weights) + param_size, sparse_param_size, dense_param_size = compute_sparsity(weights) density = dense_param_size / param_size params_size += param_size sparse_params_size += sparse_param_size - df.loc[len(df.index)] = ([ + df.loc[len(df.index)] = [ index, list(weights.shape), dense_param_size, sparse_param_size, (1 - density) * 100, - ]) + ] total_sparsity = sparse_params_size / params_size * 100 - df.loc[len(df.index)] = ([ - 'Total sparsity:', + df.loc[len(df.index)] = [ + "Total sparsity:", "-", params_size, sparse_params_size, - total_sparsity,]) + total_sparsity, + ] return df, total_sparsity diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index a2fc2b0d224..032f930d4b9 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -14,54 +14,56 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Model for multiple framework backends.""" import copy -import os import importlib +import os import sys + from neural_compressor.config import options -from neural_compressor.utils.utility import LazyImport -from neural_compressor.utils import logger from neural_compressor.model.base_model import BaseModel -from neural_compressor.model.onnx_model import ONNXModel -from neural_compressor.model.mxnet_model import MXNetModel from neural_compressor.model.keras_model import KerasModel +from neural_compressor.model.mxnet_model import MXNetModel +from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.model.tensorflow_model import ( - TensorflowBaseModel, - TensorflowModel, - TensorflowQATModel, - get_model_type - ) + TensorflowBaseModel, + TensorflowModel, + TensorflowQATModel, + get_model_type, +) +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport TORCH = False -if importlib.util.find_spec('torch'): +if importlib.util.find_spec("torch"): TORCH = True from neural_compressor.model.torch_model import * -torch = LazyImport('torch') -tf = LazyImport('tensorflow') -mx = LazyImport('mxnet') -onnx = LazyImport('onnx') +torch = LazyImport("torch") +tf = LazyImport("tensorflow") +mx = LazyImport("mxnet") +onnx = LazyImport("onnx") ort = LazyImport("onnxruntime") -yaml = LazyImport('yaml') -json = LazyImport('json') -np = LazyImport('numpy') - -MODELS = {'tensorflow': TensorflowModel, - 'tensorflow_itex': TensorflowModel, - 'keras': KerasModel, - 'tensorflow_qat': TensorflowQATModel, - 'mxnet': MXNetModel, - 'pytorch': PyTorchModel if TORCH else None, - 'pytorch_ipex': IPEXModel if TORCH else None, - 'pytorch_fx': PyTorchFXModel if TORCH else None, - 'onnxruntime': ONNXModel, - 'onnxrt_qlinearops': ONNXModel, - 'onnxrt_qdq': ONNXModel, - 'onnxrt_integerops': ONNXModel - } +yaml = LazyImport("yaml") +json = LazyImport("json") +np = LazyImport("numpy") + +MODELS = { + "tensorflow": TensorflowModel, + "tensorflow_itex": TensorflowModel, + "keras": KerasModel, + "tensorflow_qat": TensorflowQATModel, + "mxnet": MXNetModel, + "pytorch": PyTorchModel if TORCH else None, + "pytorch_ipex": IPEXModel if TORCH else None, + "pytorch_fx": PyTorchFXModel if TORCH else None, + "onnxruntime": ONNXModel, + "onnxrt_qlinearops": ONNXModel, + "onnxrt_qdq": ONNXModel, + "onnxrt_integerops": ONNXModel, +} + def get_model_fwk_name(model): """Detect the input model belongs to which framework. @@ -70,38 +72,44 @@ def get_model_fwk_name(model): model (string): framework name that supported by Neural Compressor, if there's no available fwk info, then return 'NA'. """ + def _is_onnxruntime(model): from importlib.util import find_spec + try: so = ort.SessionOptions() - if sys.version_info < (3,11) and \ - find_spec('onnxruntime_extensions'): # pragma: no cover + if sys.version_info < (3, 11) and find_spec("onnxruntime_extensions"): # pragma: no cover from onnxruntime_extensions import get_library_path + so.register_custom_ops_library(get_library_path()) if isinstance(model, str): ort.InferenceSession(model, so, providers=ort.get_available_providers()) else: ort.InferenceSession(model.SerializeToString(), so, providers=ort.get_available_providers()) except Exception as e: # pragma: no cover - if 'Message onnx.ModelProto exceeds maximum protobuf size of 2GB' in str(e): - logger.warning('Please use model path instead of onnx model object to quantize') + if "Message onnx.ModelProto exceeds maximum protobuf size of 2GB" in str(e): + logger.warning("Please use model path instead of onnx model object to quantize") else: - logger.warning("If you use an onnx model with custom_ops to do quantiztaion, " - "please ensure onnxruntime-extensions is installed") + logger.warning( + "If you use an onnx model with custom_ops to do quantiztaion, " + "please ensure onnxruntime-extensions is installed" + ) else: - return 'onnxruntime' - return 'NA' + return "onnxruntime" + return "NA" def _is_pytorch(model): try: - if isinstance(model, torch.nn.Module) or isinstance( - model, torch.fx.GraphModule) or isinstance( - model, torch.jit._script.RecursiveScriptModule): - return 'pytorch' + if ( + isinstance(model, torch.nn.Module) + or isinstance(model, torch.fx.GraphModule) + or isinstance(model, torch.jit._script.RecursiveScriptModule) + ): + return "pytorch" else: - return 'NA' + return "NA" except: - return 'NA' + return "NA" def _is_tensorflow(model): try: @@ -111,42 +119,46 @@ def _is_tensorflow(model): except: os.environ.pop("CUDA_DEVICE_ORDER") os.environ.pop("CUDA_VISIBLE_DEVICES") - return 'NA' + return "NA" else: - return 'tensorflow' + return "tensorflow" def _is_mxnet(model): try: - is_mxnet = isinstance(model, mx.gluon.HybridBlock) or \ - (hasattr(model, '__len__') and len(model) > 1 and \ - isinstance(model[0], mx.symbol.Symbol)) + is_mxnet = isinstance(model, mx.gluon.HybridBlock) or ( + hasattr(model, "__len__") and len(model) > 1 and isinstance(model[0], mx.symbol.Symbol) + ) except: - return 'NA' + return "NA" else: - return 'mxnet' if is_mxnet else 'NA' + return "mxnet" if is_mxnet else "NA" if isinstance(model, str): absmodel = os.path.abspath(os.path.expanduser(model)) - assert os.path.exists(absmodel) or os.path.exists(absmodel+'.pb'), \ - 'invalid input path, the file does not exist!' + assert os.path.exists(absmodel) or os.path.exists( + absmodel + ".pb" + ), "invalid input path, the file does not exist!" - #check if the input model is a neural_compressor model + # check if the input model is a neural_compressor model for name, nc_model in MODELS.items(): if nc_model and isinstance(model, nc_model): - return 'pytorch' if name == 'pytorch_ipex' or name == 'pytorch_fx' else name + return "pytorch" if name == "pytorch_ipex" or name == "pytorch_fx" else name if isinstance(model, TensorflowBaseModel): - return 'tensorflow' + return "tensorflow" checker = [_is_tensorflow, _is_pytorch, _is_onnxruntime, _is_mxnet] for handler in checker: fwk_name = handler(model) - if fwk_name != 'NA': + if fwk_name != "NA": break - assert fwk_name != 'NA', 'Framework is not detected correctly from model format. This could be \ -caused by unsupported model or inappropriate framework installation.' + assert ( + fwk_name != "NA" + ), "Framework is not detected correctly from model format. This could be \ +caused by unsupported model or inappropriate framework installation." return fwk_name + class Model(object): """A wrapper to construct a Neural Compressor Model.""" @@ -154,10 +166,10 @@ def __new__(cls, root, **kwargs): """Create a new instance object of Model. Args: - root (object): raw model format. For Tensorflow model, could be path to frozen pb file, + root (object): raw model format. For Tensorflow model, could be path to frozen pb file, path to ckpt or savedmodel folder, loaded estimator/graph_def/graph/keras model object. - For PyTorch model, it's torch.nn.model instance. For MXNet model, it's mxnet.symbol.Symbol - or gluon.HybirdBlock instance. For ONNX model, it's path to onnx model or loaded ModelProto + For PyTorch model, it's torch.nn.model instance. For MXNet model, it's mxnet.symbol.Symbol + or gluon.HybirdBlock instance. For ONNX model, it's path to onnx model or loaded ModelProto model object. Returns: @@ -167,22 +179,18 @@ def __new__(cls, root, **kwargs): if isinstance(root, BaseModel): if conf != "NA" and conf.framework is None: conf.framework = list(MODELS.keys())[list(MODELS.values()).index(type(root))] - if hasattr(conf, 'backend') and conf.backend == "ipex": - assert conf.framework == "pytorch_ipex",\ - "Please wrap the model with correct Model class!" - if hasattr(conf, 'backend') and conf.backend == "itex": - if get_model_type(root.model) == 'keras': - assert conf.framework == "keras",\ - "Please wrap the model with KerasModel class!" + if hasattr(conf, "backend") and conf.backend == "ipex": + assert conf.framework == "pytorch_ipex", "Please wrap the model with correct Model class!" + if hasattr(conf, "backend") and conf.backend == "itex": + if get_model_type(root.model) == "keras": + assert conf.framework == "keras", "Please wrap the model with KerasModel class!" else: - assert conf.framework == "tensorflow", \ - "Please wrap the model with TensorflowModel class!" + assert conf.framework == "tensorflow", "Please wrap the model with TensorflowModel class!" conf.framework = "tensorflow_itex" if getattr(conf, "approach", None) == "quant_aware_training": - assert conf.framework == "tensorflow_qat", \ - "Please wrap the model with TensorflowQATModel class!" + assert conf.framework == "tensorflow_qat", "Please wrap the model with TensorflowQATModel class!" else: - if 'tensorflow' in conf.framework: + if "tensorflow" in conf.framework: if getattr(root, "name", None) is None: root.name = conf.model_name if getattr(root, "output_tensor_names", None) is None: @@ -197,41 +205,41 @@ def __new__(cls, root, **kwargs): if conf == "NA": if framework == "pytorch": framework = "pytorch_fx" - if 'tensorflow' in framework: + if "tensorflow" in framework: if kwargs.get("approach", None) == "quant_aware_training": - return MODELS['tensorflow_qat'](root, **kwargs) - if 'modelType' in kwargs: - model_type = kwargs['modelType'] + return MODELS["tensorflow_qat"](root, **kwargs) + if "modelType" in kwargs: + model_type = kwargs["modelType"] else: model_type = get_model_type(root) if model_type == "keras" and kwargs.get("backend", None) == "itex": - return MODELS['keras'](root, **kwargs) - elif model_type == 'AutoTrackable': # pragma: no cover + return MODELS["keras"](root, **kwargs) + elif model_type == "AutoTrackable": # pragma: no cover return MODELS[framework]("keras", root, **kwargs) else: return MODELS[framework](model_type, root, **kwargs) return MODELS[framework](root, **kwargs) else: conf.framework = framework - if hasattr(conf, 'backend') and conf.backend == "default": + if hasattr(conf, "backend") and conf.backend == "default": if framework == "pytorch": conf.framework = "pytorch_fx" - elif hasattr(conf, 'backend') and conf.backend == "ipex": + elif hasattr(conf, "backend") and conf.backend == "ipex": conf.framework = "pytorch_ipex" - if 'tensorflow' in conf.framework: + if "tensorflow" in conf.framework: if getattr(conf, "approach", None) == "quant_aware_training": - model = MODELS['tensorflow_qat'](root, **kwargs) + model = MODELS["tensorflow_qat"](root, **kwargs) else: - if 'modelType' in kwargs: - model_type = kwargs['modelType'] + if "modelType" in kwargs: + model_type = kwargs["modelType"] else: model_type = get_model_type(root) - if hasattr(conf, 'backend') and conf.backend == "itex": - if model_type == 'keras': + if hasattr(conf, "backend") and conf.backend == "itex": + if model_type == "keras": conf.framework = "keras" model = MODELS[conf.framework](root, **kwargs) - elif model_type == 'AutoTrackable': # pragma: no cover + elif model_type == "AutoTrackable": # pragma: no cover # Workaround using HF model with ITEX conf.framework = "tensorflow_itex" model = MODELS[conf.framework]("keras", root, **kwargs) @@ -239,11 +247,12 @@ def __new__(cls, root, **kwargs): conf.framework = "tensorflow_itex" model = MODELS[conf.framework](model_type, root, **kwargs) else: - model = MODELS['tensorflow']("keras" if model_type == "AutoTrackable" else model_type, - root, **kwargs) + model = MODELS["tensorflow"]( + "keras" if model_type == "AutoTrackable" else model_type, root, **kwargs + ) else: model = MODELS[conf.framework](root, **kwargs) - if 'tensorflow' in conf.framework and hasattr(conf, 'model_name'): + if "tensorflow" in conf.framework and hasattr(conf, "model_name"): model.name = conf.model_name model.output_tensor_names = conf.outputs model.input_tensor_names = conf.inputs diff --git a/neural_compressor/model/mxnet_model.py b/neural_compressor/model/mxnet_model.py index 481ac962282..4324ea7c1ab 100644 --- a/neural_compressor/model/mxnet_model.py +++ b/neural_compressor/model/mxnet_model.py @@ -18,28 +18,32 @@ """Class for MXNet model.""" import os -from neural_compressor.utils.utility import LazyImport + from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport + from .base_model import BaseModel -mx = LazyImport('mxnet') + +mx = LazyImport("mxnet") + class MXNetModel(BaseModel): """Build MXNet model.""" def __init__(self, model, **kwargs): """Initialize a MXNet model. - + Args: model (mxnet model): model path """ - #(TODO) MXNet does not support recover model from tuning history currently + # (TODO) MXNet does not support recover model from tuning history currently self.q_config = None self._model = model self.calib_cache = {} def framework(self): """Return framework.""" - return 'mxnet' + return "mxnet" @property def model(self): @@ -55,6 +59,7 @@ def save(self, root=None): """Save MXNet model.""" if root is None: from neural_compressor import config as cfg + root = cfg.default_workspace root = os.path.abspath(os.path.expanduser(root)) os.makedirs(os.path.dirname(root), exist_ok=True) @@ -65,7 +70,7 @@ def save(self, root=None): else: symnet, args, auxs = self._model symnet = symnet.as_nd_ndarray() - args = {k:v.as_nd_ndarray() for k, v in args.items()} - auxs = {k:v.as_nd_ndarray() for k, v in auxs.items()} + args = {k: v.as_nd_ndarray() for k, v in args.items()} + auxs = {k: v.as_nd_ndarray() for k, v in auxs.items()} mx.model.save_checkpoint(root, 0, symnet, args, auxs, remove_amp_cast=False) logger.info("Save quantized symbol model to {}.".format(root)) diff --git a/neural_compressor/model/nets_factory.py b/neural_compressor/model/nets_factory.py index fb7bd03e4ca..677f337bcf2 100644 --- a/neural_compressor/model/nets_factory.py +++ b/neural_compressor/model/nets_factory.py @@ -14,11 +14,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """TF-Slim nets factory.""" from ..utils.utility import singleton + @singleton class TFSlimNetsFactory(object): """TF-Slim nets factory.""" @@ -26,45 +26,124 @@ class TFSlimNetsFactory(object): def __init__(self): """Initialize a TFSlimNetsFactory.""" # tf_slim only support specific models by default - self.default_slim_models = ['alexnet_v2', 'overfeat', 'vgg_a', 'vgg_16', 'vgg_19', \ - 'inception_v1', 'inception_v2', 'inception_v3', \ - 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v1_200', \ - 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'resnet_v2_200'] + self.default_slim_models = [ + "alexnet_v2", + "overfeat", + "vgg_a", + "vgg_16", + "vgg_19", + "inception_v1", + "inception_v2", + "inception_v3", + "resnet_v1_50", + "resnet_v1_101", + "resnet_v1_152", + "resnet_v1_200", + "resnet_v2_50", + "resnet_v2_101", + "resnet_v2_152", + "resnet_v2_200", + ] from tf_slim.nets import alexnet, inception, overfeat, resnet_v1, resnet_v2, vgg + self.networks_map = { - 'alexnet_v2': {'model': alexnet.alexnet_v2, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1001, 'arg_scope': alexnet.alexnet_v2_arg_scope}, - 'overfeat': {'model': overfeat.overfeat, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1001, 'arg_scope': overfeat.overfeat_arg_scope}, - 'vgg_a': {'model': vgg.vgg_a, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1000, 'arg_scope': vgg.vgg_arg_scope}, - 'vgg_16': {'model': vgg.vgg_16, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1000, 'arg_scope': vgg.vgg_arg_scope}, - 'vgg_19': {'model': vgg.vgg_19, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1000, 'arg_scope': vgg.vgg_arg_scope}, - 'inception_v1': {'model': inception.inception_v1, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1001, 'arg_scope': inception.inception_v1_arg_scope}, - 'inception_v2': {'model': inception.inception_v2, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1001, 'arg_scope': inception.inception_v2_arg_scope}, - 'inception_v3': {'model': inception.inception_v3, 'input_shape': [None, 299, 299, 3], \ - 'num_classes': 1001, 'arg_scope': inception.inception_v3_arg_scope}, - 'resnet_v1_50': {'model': resnet_v1.resnet_v1_50, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1000, 'arg_scope': resnet_v1.resnet_arg_scope}, - 'resnet_v1_101': {'model': resnet_v1.resnet_v1_101, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1000, 'arg_scope': resnet_v1.resnet_arg_scope}, - 'resnet_v1_152': {'model': resnet_v1.resnet_v1_152, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1000, 'arg_scope': resnet_v1.resnet_arg_scope}, - 'resnet_v1_200': {'model': resnet_v1.resnet_v1_200, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1000, 'arg_scope': resnet_v1.resnet_arg_scope}, - 'resnet_v2_50': {'model': resnet_v2.resnet_v2_50, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1001, 'arg_scope': resnet_v2.resnet_arg_scope}, - 'resnet_v2_101': {'model': resnet_v2.resnet_v2_101, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1001, 'arg_scope': resnet_v2.resnet_arg_scope}, - 'resnet_v2_152': {'model': resnet_v2.resnet_v2_152, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1001, 'arg_scope': resnet_v2.resnet_arg_scope}, - 'resnet_v2_200': {'model': resnet_v2.resnet_v2_200, 'input_shape': [None, 224, 224, 3], \ - 'num_classes': 1001, 'arg_scope': resnet_v2.resnet_arg_scope} + "alexnet_v2": { + "model": alexnet.alexnet_v2, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": alexnet.alexnet_v2_arg_scope, + }, + "overfeat": { + "model": overfeat.overfeat, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": overfeat.overfeat_arg_scope, + }, + "vgg_a": { + "model": vgg.vgg_a, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": vgg.vgg_arg_scope, + }, + "vgg_16": { + "model": vgg.vgg_16, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": vgg.vgg_arg_scope, + }, + "vgg_19": { + "model": vgg.vgg_19, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": vgg.vgg_arg_scope, + }, + "inception_v1": { + "model": inception.inception_v1, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": inception.inception_v1_arg_scope, + }, + "inception_v2": { + "model": inception.inception_v2, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": inception.inception_v2_arg_scope, + }, + "inception_v3": { + "model": inception.inception_v3, + "input_shape": [None, 299, 299, 3], + "num_classes": 1001, + "arg_scope": inception.inception_v3_arg_scope, + }, + "resnet_v1_50": { + "model": resnet_v1.resnet_v1_50, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": resnet_v1.resnet_arg_scope, + }, + "resnet_v1_101": { + "model": resnet_v1.resnet_v1_101, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": resnet_v1.resnet_arg_scope, + }, + "resnet_v1_152": { + "model": resnet_v1.resnet_v1_152, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": resnet_v1.resnet_arg_scope, + }, + "resnet_v1_200": { + "model": resnet_v1.resnet_v1_200, + "input_shape": [None, 224, 224, 3], + "num_classes": 1000, + "arg_scope": resnet_v1.resnet_arg_scope, + }, + "resnet_v2_50": { + "model": resnet_v2.resnet_v2_50, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": resnet_v2.resnet_arg_scope, + }, + "resnet_v2_101": { + "model": resnet_v2.resnet_v2_101, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": resnet_v2.resnet_arg_scope, + }, + "resnet_v2_152": { + "model": resnet_v2.resnet_v2_152, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": resnet_v2.resnet_arg_scope, + }, + "resnet_v2_200": { + "model": resnet_v2.resnet_v2_200, + "input_shape": [None, 224, 224, 3], + "num_classes": 1001, + "arg_scope": resnet_v2.resnet_arg_scope, + }, } def register(self, name, model_func, input_shape, arg_scope, **kwargs): @@ -76,8 +155,7 @@ def register(self, name, model_func, input_shape, arg_scope, **kwargs): input_shape (_type_): input tensor shape. arg_scope (_type_): slim arg scope that needed. """ - net_info = {'model': model_func, 'input_shape': input_shape, \ - 'arg_scope':arg_scope} + net_info = {"model": model_func, "input_shape": input_shape, "arg_scope": arg_scope} net = {name: {**net_info, **kwargs}} self.networks_map.update(net) self.default_slim_models.append(name) diff --git a/neural_compressor/model/onnx_model.py b/neural_compressor/model/onnx_model.py index 608816b2e4f..28a2c226406 100644 --- a/neural_compressor/model/onnx_model.py +++ b/neural_compressor/model/onnx_model.py @@ -14,21 +14,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Class for ONNX model.""" -import os import logging +import os from pathlib import Path -from neural_compressor.utils.utility import LazyImport + from neural_compressor.model.base_model import BaseModel +from neural_compressor.utils.utility import LazyImport -onnx = LazyImport('onnx') +onnx = LazyImport("onnx") ort = LazyImport("onnxruntime") ortq = LazyImport("neural_compressor.adaptor.ox_utils.util") logger = logging.getLogger("neural_compressor") + class ONNXModel(BaseModel): """Build ONNX model.""" @@ -44,15 +45,19 @@ def __init__(self, model, **kwargs): try: ort.InferenceSession(self._model.SerializeToString()) except Exception as e: # pragma: no cover - if 'maximum protobuf size of 2GB' in str(e) or 'string length exceeds max size' in str(e) or \ - 'protobuf parsing failed' in str(e): + if ( + "maximum protobuf size of 2GB" in str(e) + or "string length exceeds max size" in str(e) + or "protobuf parsing failed" in str(e) + ): self._is_large_model = True if self._model_path is None: - logger.warning('Please use model path instead of onnx model object to quantize') + logger.warning("Please use model path instead of onnx model object to quantize") self._config = None - if isinstance(model, str) and os.path.exists(Path(model).parent.joinpath('config.json').as_posix()): + if isinstance(model, str) and os.path.exists(Path(model).parent.joinpath("config.json").as_posix()): from transformers import PretrainedConfig + self._config = PretrainedConfig.from_pretrained(Path(model).parent.as_posix()) self.node_name_counter = {} @@ -81,7 +86,7 @@ def model_path(self, path): def framework(self): """Return framework.""" - return 'onnxruntime' + return "onnxruntime" @property def q_config(self): @@ -138,26 +143,28 @@ def _get_graph_info(self): def save(self, root): """Save ONNX model.""" - if os.path.split(root)[0] != '' and not os.path.exists(os.path.split(root)[0]): + if os.path.split(root)[0] != "" and not os.path.exists(os.path.split(root)[0]): raise ValueError('"root" directory does not exists.') - if self.is_large_model: # pragma: no cover - from onnx.external_data_helper import convert_model_to_external_data, \ - load_external_data_for_model + if self.is_large_model: # pragma: no cover + from onnx.external_data_helper import convert_model_to_external_data, load_external_data_for_model + load_external_data_for_model(self._model, os.path.split(self._model_path)[0]) - onnx.save_model(self._model, - root, - save_as_external_data=True, - all_tensors_to_one_file=True, - location="int8_weights.pb", - size_threshold=1024, - convert_attribute=False) + onnx.save_model( + self._model, + root, + save_as_external_data=True, + all_tensors_to_one_file=True, + location="int8_weights.pb", + size_threshold=1024, + convert_attribute=False, + ) else: onnx.save(self._model, root) - + if self._config is not None: - model_type = '' if not hasattr(self._config, 'model_type') else getattr(self._config, 'model_type') - setattr(self._config.__class__, 'model_type', model_type) - output_config_file = Path(root).parent.joinpath('config.json').as_posix() + model_type = "" if not hasattr(self._config, "model_type") else getattr(self._config, "model_type") + setattr(self._config.__class__, "model_type", model_type) + output_config_file = Path(root).parent.joinpath("config.json").as_posix() self._config.to_json_file(output_config_file, use_diff=False) def nodes(self): @@ -238,20 +245,26 @@ def set_initializer(self, tensor, array, raw=False): self.remove_initializer(old_tensor) dims = old_tensor.dims data_type = old_tensor.data_type - new_tensor = onnx.helper.make_tensor(tensor, data_type, dims, array.flatten().tolist()) if not raw \ - else onnx.helper.make_tensor(tensor, data_type, dims, array.tostring(), raw=raw) + new_tensor = ( + onnx.helper.make_tensor(tensor, data_type, dims, array.flatten().tolist()) + if not raw + else onnx.helper.make_tensor(tensor, data_type, dims, array.tostring(), raw=raw) + ) self.add_initializer(new_tensor) - + @property def input_name_to_nodes(self): """Return input names of nodes.""" return self._input_name_to_nodes - + def _get_input_name_to_nodes(self, nodes): """Get input names of nodes.""" for node in nodes: - attrs = [attr for attr in node.attribute if attr.type == onnx.AttributeProto.GRAPH \ - or attr.type == onnx.AttributeProto.GRAPHS] + attrs = [ + attr + for attr in node.attribute + if attr.type == onnx.AttributeProto.GRAPH or attr.type == onnx.AttributeProto.GRAPHS + ] if len(attrs) > 0: for attr in attrs: self._get_input_name_to_nodes(attr.g.node) @@ -270,8 +283,11 @@ def output_name_to_node(self): def _get_output_name_to_node(self, nodes): """Get output names of nodes.""" for node in nodes: - attrs = [attr for attr in node.attribute if attr.type == onnx.AttributeProto.GRAPH \ - or attr.type == onnx.AttributeProto.GRAPHS] + attrs = [ + attr + for attr in node.attribute + if attr.type == onnx.AttributeProto.GRAPH or attr.type == onnx.AttributeProto.GRAPHS + ] if len(attrs) > 0: for attr in attrs: self._get_output_name_to_node(attr.g.node) @@ -287,7 +303,7 @@ def get_siblings(self, node): if child.name != node.name: siblings.append(child) return siblings - + def get_children(self, node, input_name_to_nodes=None): """Get children nodes.""" if input_name_to_nodes is None: @@ -327,7 +343,7 @@ def get_parent(self, node, idx, output_name_to_node=None): def find_node_by_name(self, node_name, new_nodes_list, graph): """Find out node by name.""" - graph_nodes_list = list(graph.node) #deep copy + graph_nodes_list = list(graph.node) # deep copy graph_nodes_list.extend(new_nodes_list) node = ortq.find_by_name(node_name, graph_nodes_list) return node @@ -343,55 +359,62 @@ def find_nodes_by_initializer(self, graph, initializer): def get_scale_zero(self, tensor): """Help function to get scale and zero_point.""" - if not tensor.endswith('_quantized'): + if not tensor.endswith("_quantized"): logger.debug("Find {} in the quantized graph is not quantized.".format(tensor)) return None, None - + def _searcher(tensor_name): """Search scale and zero point tensor recursivly.""" node = self._input_name_to_nodes[tensor_name][0] parent = self._output_name_to_node[tensor_name] if tensor_name in self._output_name_to_node else None - direct_int8 = ['Reshape', 'Transpose', 'Squeeze', 'Unsqueeze', 'MaxPool', 'Pad', 'Split'] + direct_int8 = ["Reshape", "Transpose", "Squeeze", "Unsqueeze", "MaxPool", "Pad", "Split"] if parent is not None and parent.op_type in direct_int8: - fp32_tensor_name = \ - parent.input[0].replace('_quantized', '')\ - .replace('_QuantizeLinear', '').replace('_QuantizeInput', '') - elif node.op_type in ['Gather']: # pragma: no cover - fp32_tensor_name = \ - node.output[0].replace('_quantized', '')\ - .replace('_QuantizeLinear', '').replace('_QuantizeInput', '') + fp32_tensor_name = ( + parent.input[0] + .replace("_quantized", "") + .replace("_QuantizeLinear", "") + .replace("_QuantizeInput", "") + ) + elif node.op_type in ["Gather"]: # pragma: no cover + fp32_tensor_name = ( + node.output[0] + .replace("_quantized", "") + .replace("_QuantizeLinear", "") + .replace("_QuantizeInput", "") + ) else: - fp32_tensor_name = \ - tensor_name.replace('_quantized', '')\ - .replace('_QuantizeLinear', '').replace('_QuantizeInput', '') - scale = fp32_tensor_name + '_scale' + fp32_tensor_name = ( + tensor_name.replace("_quantized", "").replace("_QuantizeLinear", "").replace("_QuantizeInput", "") + ) + scale = fp32_tensor_name + "_scale" scale_tensor = self.get_initializer(scale) - zo = fp32_tensor_name + '_zero_point' + zo = fp32_tensor_name + "_zero_point" zo_tensor = self.get_initializer(zo) if scale_tensor is None or zo_tensor is None: if parent is not None: scale_tensor, zo_tensor = _searcher(parent.input[0]) return scale_tensor, zo_tensor - + node = self._input_name_to_nodes[tensor][0] - #TODO check if scale_tensor and zero_point is needed + # TODO check if scale_tensor and zero_point is needed # for bias of qlinearconv, scale and zero_point is not needed - if (node.op_type == 'QLinearConv' and tensor == node.input[-1]) or \ - (node.op_type == 'QGemm' and tensor == node.input[-3]): + if (node.op_type == "QLinearConv" and tensor == node.input[-1]) or ( + node.op_type == "QGemm" and tensor == node.input[-3] + ): return None, None else: scale_tensor, zo_tensor = _searcher(tensor) - assert scale_tensor, 'missing scale for tensor {}'.format(tensor) - assert zo_tensor, 'missing zero point for tensor {}'.format(tensor) + assert scale_tensor, "missing scale for tensor {}".format(tensor) + assert zo_tensor, "missing zero point for tensor {}".format(tensor) return scale_tensor, zo_tensor def save_model_to_file(self, output_path, use_external_data_format=False): """Save model to external data, which is needed for model size > 2GB.""" if use_external_data_format: - onnx.external_data_helper.convert_model_to_external_data(self._model, - all_tensors_to_one_file=True, - location=Path(output_path).name + ".data") + onnx.external_data_helper.convert_model_to_external_data( + self._model, all_tensors_to_one_file=True, location=Path(output_path).name + ".data" + ) onnx.save_model(self._model, output_path) @staticmethod @@ -402,8 +425,7 @@ def replace_node_input(node, old_input_name, new_input_name): if node.input[j] == old_input_name: node.input[j] = new_input_name - def replace_input_of_all_nodes(self, old_input_name, new_input_name, - white_optype=[], black_optype=[]): + def replace_input_of_all_nodes(self, old_input_name, new_input_name, white_optype=[], black_optype=[]): """Replace inputs of all nodes.""" if len(white_optype) > 0: for node in self.model.graph.node: @@ -414,7 +436,6 @@ def replace_input_of_all_nodes(self, old_input_name, new_input_name, if node.op_type not in black_optype: ONNXModel.replace_node_input(node, old_input_name, new_input_name) - @staticmethod def replace_node_output(node, old_output_name, new_output_name): """Replace output of a node.""" @@ -423,8 +444,7 @@ def replace_node_output(node, old_output_name, new_output_name): if node.output[j] == old_output_name: node.output[j] = new_output_name - def replace_output_of_all_nodes(self, old_output_name, new_output_name, - white_optype=[], black_optype=[]): + def replace_output_of_all_nodes(self, old_output_name, new_output_name, white_optype=[], black_optype=[]): """Replace outputs of all nodes.""" if len(white_optype) > 0: for node in self.model.graph.node: @@ -440,28 +460,32 @@ def remove_unused_nodes(self): unused_nodes = [] nodes = self.nodes() for node in nodes: - if node.op_type == "Constant" and node.output[0] not in self._model.graph.output \ - and node.output[0] not in self._input_name_to_nodes: + if ( + node.op_type == "Constant" + and node.output[0] not in self._model.graph.output + and node.output[0] not in self._input_name_to_nodes + ): unused_nodes.append(node) - elif node.op_type == 'QuantizeLinear' and len(self.get_children(node)) == 1 and \ - self.get_children(node)[0].op_type == 'DequantizeLinear' and \ - node.input[0] not in self._output_name_to_node and \ - self.get_children(node)[0].output[0] not in self._input_name_to_nodes: + elif ( + node.op_type == "QuantizeLinear" + and len(self.get_children(node)) == 1 + and self.get_children(node)[0].op_type == "DequantizeLinear" + and node.input[0] not in self._output_name_to_node + and self.get_children(node)[0].output[0] not in self._input_name_to_nodes + ): unused_nodes.append(node) unused_nodes.extend(self.get_children(node)) else: # remove the node if it does not serve as the input or output of any other nodes unused = True for output in node.output: - if output in self._input_name_to_nodes or \ - output in self.output(): + if output in self._input_name_to_nodes or output in self.output(): unused = False break for input in node.input: if self.get_initializer(input) is not None: continue - elif input in self._output_name_to_node or \ - input in self.input(): + elif input in self._output_name_to_node or input in self.input(): unused = False break if unused: @@ -482,9 +506,10 @@ def remove_unused_nodes(self): def topological_sort(self, enable_subgraph=False): """Topological sort the model.""" + import copy from collections import deque from functools import reduce - import copy + if not enable_subgraph: input_name_to_nodes = {} output_name_to_node = {} @@ -498,7 +523,7 @@ def topological_sort(self, enable_subgraph=False): for output_name in node.output: if len(output_name.strip()) != 0: output_name_to_node[output_name] = node - else: # pragma: no cover + else: # pragma: no cover input_name_to_nodes = self._input_name_to_nodes output_name_to_node = self._output_name_to_node @@ -513,8 +538,7 @@ def topological_sort(self, enable_subgraph=False): while q: n = q.popleft() - if not all([output_name_to_node[i].name in all_nodes for \ - i in n.input if i in output_name_to_node]): + if not all([output_name_to_node[i].name in all_nodes for i in n.input if i in output_name_to_node]): if n not in wait: wait.append(n) continue @@ -522,20 +546,19 @@ def topological_sort(self, enable_subgraph=False): all_nodes[n.name] = n for out in n.output: if out in input_name_to_nodes: - q.extend([i for i in input_name_to_nodes[out] if \ - i.name not in all_nodes and i not in q]) + q.extend([i for i in input_name_to_nodes[out] if i.name not in all_nodes and i not in q]) if len(q) == 0 and len(wait) != 0: q = copy.deepcopy(wait) wait.clear() nodes = [i[1] for i in all_nodes.items()] - assert len(list(set([n.name for n in nodes]))) == \ - len(list(set([n.name for n in self.model.graph.node]))) - self.model.graph.ClearField('node') + assert len(list(set([n.name for n in nodes]))) == len(list(set([n.name for n in self.model.graph.node]))) + self.model.graph.ClearField("node") self.model.graph.node.extend(nodes) def get_nodes_chain(self, start, stop, result_chain=[]): """Get nodes chain with given start node and stop node.""" from collections import deque + from onnx import NodeProto # process start node list @@ -546,9 +569,8 @@ def get_nodes_chain(self, start, stop, result_chain=[]): elif isinstance(node, NodeProto): start_node.append(node.name) else: - assert False, "'get_nodes_chain' function only support list[string]" \ - "or list[NodeProto] params" - + assert False, "'get_nodes_chain' function only support list[string]" "or list[NodeProto] params" + # process stop node list stop_node = [] for node in stop: @@ -557,8 +579,7 @@ def get_nodes_chain(self, start, stop, result_chain=[]): elif isinstance(node, NodeProto): stop_node.append(node.name) else: - assert False, "'get_nodes_chain' function only support list[string]" \ - "or list[NodeProto] params" + assert False, "'get_nodes_chain' function only support list[string]" "or list[NodeProto] params" while start_node: node_name = start_node.popleft() @@ -574,7 +595,7 @@ def get_nodes_chain(self, start, stop, result_chain=[]): start_node.append(parent.name) return result_chain - + def find_qkv_in_attention(self, find_all=False): """Find qkv MatMul in Attention. @@ -587,45 +608,47 @@ def find_qkv_in_attention(self, find_all=False): qkv = [] for node in self._model.graph.node: start_node, qkv_nodes_list = None, None - if node.op_type == 'SkipLayerNormalization': + if node.op_type == "SkipLayerNormalization": start_node = node qkv_nodes_list = [ self.match_parent_path( start_node, ["MatMul", "Reshape", "Transpose", "Reshape", "MatMul"], - [None, 0, 0, 0, 0],) + [None, 0, 0, 0, 0], + ) ] - if node.op_type == 'Add': + if node.op_type == "Add": start_node = node qkv_nodes_list = [ # match base attention structure self.match_parent_path( start_node, ["Add", "MatMul", "Reshape", "Transpose", "MatMul"], - [0, None, 0, 0, 0],), + [0, None, 0, 0, 0], + ), self.match_parent_path( - start_node, - ["Add", "MatMul", "Reshape", "Transpose", "MatMul"], - [1, None, 0, 0, 0]), - + start_node, ["Add", "MatMul", "Reshape", "Transpose", "MatMul"], [1, None, 0, 0, 0] + ), # match gpt attention no past structure self.match_parent_path( start_node, ["Reshape", "Gemm", "Reshape", "Reshape", "Transpose", "MatMul"], - [ None, 0, 0, 0, 0, 0], + [None, 0, 0, 0, 0, 0], output_name_to_node=self.output_name_to_node, - return_indice=[]), - + return_indice=[], + ), # match bart attention structure self.match_parent_path( start_node, ["Add", "MatMul", "Reshape", "Transpose", "Reshape", "MatMul"], - [0, None, 0, 0, 0, 0]), + [0, None, 0, 0, 0, 0], + ), self.match_parent_path( start_node, ["Add", "MatMul", "Reshape", "Transpose", "Reshape", "MatMul"], - [1, None, 0, 0, 0, 0]), - ] + [1, None, 0, 0, 0, 0], + ), + ] if not start_node: continue @@ -653,11 +676,11 @@ def find_qkv_in_attention(self, find_all=False): def export(self, save_path, conf): """Export Qlinear to QDQ model.""" - from neural_compressor.experimental.export import onnx_qlinear_to_qdq from neural_compressor.config import ONNXQlinear2QDQConfig + from neural_compressor.experimental.export import onnx_qlinear_to_qdq + if isinstance(conf, ONNXQlinear2QDQConfig): - add_nodes, remove_nodes, inits = onnx_qlinear_to_qdq(self._model, - self._input_name_to_nodes) + add_nodes, remove_nodes, inits = onnx_qlinear_to_qdq(self._model, self._input_name_to_nodes) self.add_nodes(add_nodes) self.remove_nodes(remove_nodes) self.add_initializers(inits) @@ -666,8 +689,7 @@ def export(self, save_path, conf): self.topological_sort() self.save(save_path) else: - logger.warning("Unsupported config for export, " - "only ONNXQlinear2QDQConfig is supported!") + logger.warning("Unsupported config for export, " "only ONNXQlinear2QDQConfig is supported!") exit(0) def add_tensors_to_outputs(self, tensor_names): @@ -746,10 +768,7 @@ def match_parent( output_name_to_node = self._output_name_to_node if input_index is None: - parent, index = self.match_first_parent(node, - parent_op_type, - output_name_to_node, - exclude) + parent, index = self.match_first_parent(node, parent_op_type, output_name_to_node, exclude) if return_indice is not None: return_indice.append(index) return parent @@ -772,14 +791,26 @@ def get_absorb_pairs(self, target_optype): Returns: absorb_pairs (dict): a dict of absorb pairs {parent: list of absorbable children}. """ - absorbable_optypes = ["LayerNormalization", "BatchNormalization", "InstanceNormalization", "Conv", - "SimplifiedLayerNormalization", "MatMul", "Gemm", "Mul", "FusedConv"] + absorbable_optypes = [ + "LayerNormalization", + "BatchNormalization", + "InstanceNormalization", + "Conv", + "SimplifiedLayerNormalization", + "MatMul", + "Gemm", + "Mul", + "FusedConv", + ] absorb_pairs = {} for node in self.nodes(): if node.op_type in target_optype and self.get_initializer(node.input[1]) is not None: parent = self.get_parent(node, 0) - if parent is None or parent.op_type not in absorbable_optypes or \ - self.get_initializer(parent.input[1]) is None: + if ( + parent is None + or parent.op_type not in absorbable_optypes + or self.get_initializer(parent.input[1]) is None + ): continue absorb_pairs.setdefault(parent.name, []).append(node) return absorb_pairs @@ -797,10 +828,10 @@ def match_parent_path( Args: node (str): current node name. parent_op_types (str): constraint of parent node op_type of each input edge. - parent_input_index (list): constraint of input index of each input edge. + parent_input_index (list): constraint of input index of each input edge. None means no constraint. output_name_to_node (dict): dictionary with output name as key, and node as value. - return_indice (list): a list to append the input index when there is + return_indice (list): a list to append the input index when there is no constraint on input index of an edge. Returns: @@ -839,4 +870,4 @@ def is_smoothquant_model(self): for init in self.model.graph.initializer: if "_smooth_scale" in init.name: return True - return False \ No newline at end of file + return False diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py index a71171d67be..ad52957d651 100644 --- a/neural_compressor/model/tensorflow_model.py +++ b/neural_compressor/model/tensorflow_model.py @@ -14,27 +14,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Class for Tensorflow model.""" import copy +import importlib +import json import os import shutil -import importlib -from abc import abstractmethod -import tempfile import sys -import json -from neural_compressor.utils.utility import LazyImport, compute_sparsity -from neural_compressor.utils.utility import version1_lt_version2, version1_gt_version2, version1_gte_version2 -from neural_compressor.utils import logger +import tempfile +from abc import abstractmethod + from neural_compressor import config as cfg from neural_compressor.model.base_model import BaseModel +from neural_compressor.utils import logger +from neural_compressor.utils.utility import ( + LazyImport, + compute_sparsity, + version1_gt_version2, + version1_gte_version2, + version1_lt_version2, +) + +tf = LazyImport("tensorflow") +np = LazyImport("numpy") -tf = LazyImport('tensorflow') -np = LazyImport('numpy') +tensor_to_node = lambda s: list(set([x.split(":")[0] for x in s])) -tensor_to_node = lambda s: list(set([x.split(':')[0] for x in s])) def get_model_type(model): """Get Tensorflow mode type. @@ -45,54 +51,55 @@ def get_model_type(model): Returns: string: model type """ - from neural_compressor.adaptor.tf_utils.util import is_saved_model_format, is_ckpt_format + from neural_compressor.adaptor.tf_utils.util import is_ckpt_format, is_saved_model_format + if isinstance(model, str): model = os.path.abspath(os.path.expanduser(model)) - if (model.endswith('.h5') and os.path.isfile(model)) or \ - is_saved_model_format(os.path.dirname(model)) or \ - (os.path.isdir(model) and is_saved_model_format(model)): - if version1_lt_version2(tf.version.VERSION, '2.10.0'): # pragma: no cover - logger.warn("keras model running on tensorflow 2.10.0 and" - " lower not support intel ITEX.") + if ( + (model.endswith(".h5") and os.path.isfile(model)) + or is_saved_model_format(os.path.dirname(model)) + or (os.path.isdir(model) and is_saved_model_format(model)) + ): + if version1_lt_version2(tf.version.VERSION, "2.10.0"): # pragma: no cover + logger.warn("keras model running on tensorflow 2.10.0 and" " lower not support intel ITEX.") try: model = tf.keras.models.load_model(model) - if isinstance(model, tf.keras.Model) and hasattr(model, 'to_json'): - return 'keras' - return 'saved_model' + if isinstance(model, tf.keras.Model) and hasattr(model, "to_json"): + return "keras" + return "saved_model" except: pass - if isinstance(model, tf.keras.Model) and hasattr(model, 'to_json'): - if json.loads(model.to_json())["class_name"] in ["Sequential","Functional"]: + if isinstance(model, tf.keras.Model) and hasattr(model, "to_json"): + if json.loads(model.to_json())["class_name"] in ["Sequential", "Functional"]: # Keras adaptor only support Sequential or Functional model - return 'keras' + return "keras" else: # otherwise, the backend will fallback to tensorflow_itex - return 'AutoTrackable' + return "AutoTrackable" if isinstance(model, tf.Graph): - return 'graph' + return "graph" elif isinstance(model, tf.compat.v1.GraphDef): - return 'graph_def' + return "graph_def" elif isinstance(model, tf.compat.v1.estimator.Estimator): - return 'estimator' + return "estimator" elif isinstance(model, str): model = os.path.abspath(os.path.expanduser(model)) - if (model.endswith('.pb') and os.path.isfile(model)): + if model.endswith(".pb") and os.path.isfile(model): if is_saved_model_format(os.path.dirname(model)): - return 'saved_model' + return "saved_model" else: - return 'frozen_pb' - elif model.endswith('.ckpt') and os.path.isfile(model): - return 'slim' + return "frozen_pb" + elif model.endswith(".ckpt") and os.path.isfile(model): + return "slim" elif os.path.isdir(model): if is_ckpt_format(model): - return 'checkpoint' + return "checkpoint" elif is_saved_model_format(model): - return 'saved_model' - elif os.path.isfile(model + '.pb'): - return 'frozen_pb' - - raise ValueError('model {} has not recognized model type....'.format(model)) + return "saved_model" + elif os.path.isfile(model + ".pb"): + return "frozen_pb" + raise ValueError("model {} has not recognized model type....".format(model)) def validate_graph_node(graph_def, node_names): @@ -107,14 +114,12 @@ def validate_graph_node(graph_def, node_names): all_node_name = [node.name for node in graph_def.node] for user_name in node_names: if user_name not in all_node_name: - logger.warn( - str("Node name {} specified in yaml doesn't exist in the model."). - format(user_name)) + logger.warn(str("Node name {} specified in yaml doesn't exist in the model.").format(user_name)) return False return True -def validate_and_inference_input_output(graph_def, \ - input_tensor_names, output_tensor_names): + +def validate_and_inference_input_output(graph_def, input_tensor_names, output_tensor_names): """Validate and inference the input and output tensor names of graph_def. Args: @@ -127,6 +132,7 @@ def validate_and_inference_input_output(graph_def, \ output_tensor_names (list of string): validated output_tensor_names. """ from neural_compressor.adaptor.tf_utils.util import get_input_output_node_names + temp_output_tensor_names = [] if validate_graph_node(graph_def, tensor_to_node(input_tensor_names)): input_tensor_names = input_tensor_names @@ -142,6 +148,7 @@ def validate_and_inference_input_output(graph_def, \ return input_tensor_names, output_tensor_names + def graph_session(model, input_tensor_names, output_tensor_names, **kwargs): """Helper to build session with tf.compat.v1.Graph. @@ -160,11 +167,13 @@ def graph_session(model, input_tensor_names, output_tensor_names, **kwargs): config.inter_op_parallelism_threads = 1 sess = tf.compat.v1.Session(graph=model, config=config) - input_tensor_names, output_tensor_names = validate_and_inference_input_output(\ - model.as_graph_def(), input_tensor_names, output_tensor_names) + input_tensor_names, output_tensor_names = validate_and_inference_input_output( + model.as_graph_def(), input_tensor_names, output_tensor_names + ) return sess, input_tensor_names, output_tensor_names + def graph_def_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with tf.compat.v1.GraphDef. @@ -178,31 +187,34 @@ def graph_def_session(model, input_tensor_names, output_tensor_names, **kwargs): input_tensor_names (list of string): validated input_tensor_names output_tensor_names (list of string): validated output_tensor_names """ - device = kwargs.get('device') + device = kwargs.get("device") graph = tf.Graph() - if version1_lt_version2(tf.version.VERSION, '2.0.0'): # pragma: no cover + if version1_lt_version2(tf.version.VERSION, "2.0.0"): # pragma: no cover from tensorflow._api.v1.config import experimental + list_physical_devices = experimental.list_physical_devices else: list_physical_devices = tf.config.list_physical_devices try: with graph.as_default(): - tf.import_graph_def(model, name='') + tf.import_graph_def(model, name="") except: - input_tensor_names, output_tensor_names = validate_and_inference_input_output(\ - model, input_tensor_names, output_tensor_names) - from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def - from neural_compressor.adaptor.tf_utils.util import strip_unused_nodes + input_tensor_names, output_tensor_names = validate_and_inference_input_output( + model, input_tensor_names, output_tensor_names + ) + from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def, strip_unused_nodes + model = fix_ref_type_of_graph_def(model) input_node_names = tensor_to_node(input_tensor_names) output_node_names = tensor_to_node(output_tensor_names) model = strip_unused_nodes(model, input_node_names, output_node_names) with graph.as_default(): - tf.import_graph_def(model, name='') + tf.import_graph_def(model, name="") return graph_session(graph, input_tensor_names, output_tensor_names, **kwargs) + def frozen_pb_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with frozen pb. @@ -217,20 +229,20 @@ def frozen_pb_session(model, input_tensor_names, output_tensor_names, **kwargs): output_tensor_names (list of string): validated output_tensor_names. """ graph_def = tf.compat.v1.GraphDef() - model = model if model.endswith('.pb') else model + '.pb' - with open(model, 'rb') as f: + model = model if model.endswith(".pb") else model + ".pb" + with open(model, "rb") as f: graph_def.ParseFromString(f.read()) - return graph_def_session(graph_def, input_tensor_names, \ - output_tensor_names, **kwargs) + return graph_def_session(graph_def, input_tensor_names, output_tensor_names, **kwargs) + def _contains_function_with_implements_attr(saved_model_proto): meta_graph = saved_model_proto.meta_graphs[0] for function in meta_graph.graph_def.library.function: - if function.attr.get("_implements", None) or function.attr.get( - "api_implements", None): + if function.attr.get("_implements", None) or function.attr.get("api_implements", None): return True return False + def load_saved_model(model, saved_model_tags, input_tensor_names, output_tensor_names): """Load graph_def from saved model with the default serving signature key. @@ -247,79 +259,73 @@ def load_saved_model(model, saved_model_tags, input_tensor_names, output_tensor_ config = tf.compat.v1.ConfigProto() config.use_per_session_threads = 1 config.inter_op_parallelism_threads = 1 - if not os.listdir(os.path.join(model,'variables')): + if not os.listdir(os.path.join(model, "variables")): sess = tf.compat.v1.Session(graph=tf.Graph(), config=config) loader = tf.compat.v1.saved_model.loader.load(sess, ["serve"], model) if len(input_tensor_names) == 0: - input_tensor_names = [i.name for _, i in \ - loader.signature_def['serving_default'].inputs.items()] + input_tensor_names = [i.name for _, i in loader.signature_def["serving_default"].inputs.items()] else: - assert validate_graph_node(\ - sess.graph.as_graph_def(), tensor_to_node(input_tensor_names)), \ - 'tensor names {} not in the graph'.format(input_tensor_names) + assert validate_graph_node( + sess.graph.as_graph_def(), tensor_to_node(input_tensor_names) + ), "tensor names {} not in the graph".format(input_tensor_names) if len(output_tensor_names) == 0: - output_tensor_names = [i.name for _, i in \ - loader.signature_def['serving_default'].outputs.items()] + output_tensor_names = [i.name for _, i in loader.signature_def["serving_default"].outputs.items()] else: - assert validate_graph_node(\ - sess.graph.as_graph_def(), tensor_to_node(output_tensor_names)), \ - 'tensor names {} not in the graph'.format(output_tensor_names) + assert validate_graph_node( + sess.graph.as_graph_def(), tensor_to_node(output_tensor_names) + ), "tensor names {} not in the graph".format(output_tensor_names) return sess.graph.as_graph_def(), input_tensor_names, output_tensor_names else: + from tensorflow.core.protobuf import config_pb2, meta_graph_pb2 from tensorflow.python.eager import context - from tensorflow.python.saved_model import load - from tensorflow.python.saved_model import tag_constants - from tensorflow.python.saved_model import signature_constants - from tensorflow.python.framework.convert_to_constants import \ - convert_variables_to_constants_v2 - from tensorflow.python.training import saver - from tensorflow.core.protobuf import config_pb2 + from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 from tensorflow.python.grappler import tf_optimizer - from tensorflow.core.protobuf import meta_graph_pb2 + from tensorflow.python.saved_model import load, signature_constants, tag_constants + from tensorflow.python.training import saver + _saved_model = load.load(model, [tag_constants.SERVING]) func = _saved_model.signatures[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] frozen_func = convert_variables_to_constants_v2(func) grappler_meta_graph_def = saver.export_meta_graph( - graph_def=frozen_func.graph.as_graph_def(), graph=frozen_func.graph) + graph_def=frozen_func.graph.as_graph_def(), graph=frozen_func.graph + ) if len(input_tensor_names) == 0: - input_tensor_names = [i.name.split(':')[0] for i in frozen_func.inputs] + input_tensor_names = [i.name.split(":")[0] for i in frozen_func.inputs] if len(output_tensor_names) == 0: - output_tensor_names = [i.name.split(':')[0] for i in frozen_func.outputs] + output_tensor_names = [i.name.split(":")[0] for i in frozen_func.outputs] # Add a collection 'train_op' so that Grappler knows the outputs. fetch_collection = meta_graph_pb2.CollectionDef() for array in frozen_func.inputs + frozen_func.outputs: fetch_collection.node_list.value.append(array.name) - grappler_meta_graph_def.collection_def["train_op"].CopyFrom( - fetch_collection) + grappler_meta_graph_def.collection_def["train_op"].CopyFrom(fetch_collection) grappler_session_config = config_pb2.ConfigProto() rewrite_options = grappler_session_config.graph_options.rewrite_options rewrite_options.min_graph_nodes = -1 - opt = tf_optimizer.OptimizeGraph(grappler_session_config, - grappler_meta_graph_def, graph_id=b"tf_graph") + opt = tf_optimizer.OptimizeGraph(grappler_session_config, grappler_meta_graph_def, graph_id=b"tf_graph") return opt, input_tensor_names, output_tensor_names -def _get_graph_from_saved_model_v2(saved_model_dir, - input_tensor_names, output_tensor_names): - from tensorflow.python.saved_model import tag_constants - from tensorflow.python.saved_model import signature_constants - saved_model_exported_names = [ - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY - ] + +def _get_graph_from_saved_model_v2(saved_model_dir, input_tensor_names, output_tensor_names): + from tensorflow.python.saved_model import signature_constants, tag_constants + + saved_model_exported_names = [signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] saved_model_tags = set([tag_constants.SERVING]) - return load_saved_model(saved_model_dir, saved_model_tags, - input_tensor_names, output_tensor_names) + return load_saved_model(saved_model_dir, saved_model_tags, input_tensor_names, output_tensor_names) + def _get_graph_from_original_keras_v2(model, output_dir): - from tensorflow.python.eager import def_function - from tensorflow.lite.python.util import trace_model_call - from tensorflow.lite.python.util import model_input_signature - from tensorflow.python.framework import convert_to_constants - from tensorflow.python.framework import dtypes - from tensorflow.lite.python.util import run_graph_optimizations from tensorflow.lite.python.convert import OpsSet - from tensorflow.lite.python.util import get_grappler_config + from tensorflow.lite.python.util import ( + get_grappler_config, + model_input_signature, + run_graph_optimizations, + trace_model_call, + ) + from tensorflow.python.eager import def_function + from tensorflow.python.framework import convert_to_constants, dtypes + input_signature = None # If the model's call is not a `tf.function`, then we need to first get its # input signature from `model_input_signature` method. @@ -330,14 +336,11 @@ def _get_graph_from_original_keras_v2(model, output_dir): concrete_func = func.get_concrete_function() funcs = [concrete_func] - frozen_func, graph_def = ( - convert_to_constants.convert_variables_to_constants_v2_as_graph( - funcs[0], lower_control_flow=False)) + frozen_func, graph_def = convert_to_constants.convert_variables_to_constants_v2_as_graph( + funcs[0], lower_control_flow=False + ) - input_tensors = [ - tensor for tensor in frozen_func.inputs - if tensor.dtype != dtypes.resource - ] + input_tensors = [tensor for tensor in frozen_func.inputs if tensor.dtype != dtypes.resource] output_tensors = frozen_func.outputs # Grappler will also try to lower while loop into switch merge # representation which is undesired for Ophints, so we simply remove @@ -351,41 +354,37 @@ def _get_graph_from_original_keras_v2(model, output_dir): # input_tensors, # output_tensors, # config=grappler_config) - input_names = [tensor.name.split(':')[0] for tensor in input_tensors] - output_names = [tensor.name.split(':')[0] for tensor in output_tensors] + input_names = [tensor.name.split(":")[0] for tensor in input_tensors] + output_names = [tensor.name.split(":")[0] for tensor in output_tensors] return graph_def, input_names, output_names + def _check_keras_format(model, saved_model_dir): from tensorflow.python import saved_model - from tensorflow.python.saved_model.load import load from tensorflow.python.saved_model import save_options + from tensorflow.python.saved_model.load import load from tensorflow.python.saved_model.loader_impl import parse_saved_model_with_debug_info - version = 'saved_model_v2' + + version = "saved_model_v2" try: - saved_model.save( - model, - saved_model_dir, - options=save_options.SaveOptions(save_debug_info=True)) + saved_model.save(model, saved_model_dir, options=save_options.SaveOptions(save_debug_info=True)) except: - return 'trackable_object' + return "trackable_object" saved_model_proto, _ = parse_saved_model_with_debug_info(saved_model_dir) saved_model_version = saved_model_proto.saved_model_schema_version if saved_model_version == 0: - return 'saved_model_v1' + return "saved_model_v1" if saved_model_version not in [1, 2]: - raise ValueError("SavedModel file format({0}) is not supported".format( - saved_model_version)) + raise ValueError("SavedModel file format({0}) is not supported".format(saved_model_version)) return version + def _get_graph_from_saved_model_v1(model): - from tensorflow.python.framework import ops - from tensorflow.python.saved_model import constants + from tensorflow.lite.python.convert_saved_model import get_inputs_outputs, get_meta_graph_def, get_signature_def from tensorflow.python.client import session - from tensorflow.python.saved_model import tag_constants - from tensorflow.python.saved_model import signature_constants - from tensorflow.lite.python.convert_saved_model import get_meta_graph_def - from tensorflow.lite.python.convert_saved_model import get_signature_def - from tensorflow.lite.python.convert_saved_model import get_inputs_outputs + from tensorflow.python.framework import ops + from tensorflow.python.saved_model import constants, signature_constants, tag_constants + saved_model_tags = set([tag_constants.SERVING]) signature_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -397,29 +396,30 @@ def _get_graph_from_saved_model_v1(model): if constants.ASSETS_KEY in collection_def: raise ValueError("SavedModels with assets/ directory are not supported.") - from tensorflow.python.saved_model import loader from tensorflow.compat.v1 import graph_util as tf_graph_util + from tensorflow.python.saved_model import loader + graph = ops.Graph() import tensorflow as tf + with session.Session(graph=graph) as sess: loader.load(sess, meta_graph.meta_info_def.tags, model) sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) - output_nodes = list(set([output.split(':')[0] for output in outputs])) + output_nodes = list(set([output.split(":")[0] for output in outputs])) node_ops = [node.op for node in graph.as_graph_def().node] - if 'MakeIterator' in node_ops: - output_nodes.append('MakeIterator') - table_ops = tf.compat.v1.get_collection( - tf.compat.v1.GraphKeys.TABLE_INITIALIZERS) + if "MakeIterator" in node_ops: + output_nodes.append("MakeIterator") + table_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS) # For table initialization for table_op in table_ops: output_nodes.append(table_op.name) if len(table_ops) > 0: - output_nodes.append('init_all_tables') - graph_def = tf_graph_util.convert_variables_to_constants( - sess, graph.as_graph_def(), output_nodes) + output_nodes.append("init_all_tables") + graph_def = tf_graph_util.convert_variables_to_constants(sess, graph.as_graph_def(), output_nodes) return graph_def, inputs, outputs + def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with keras model. @@ -434,30 +434,30 @@ def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): output_tensor_names (list of string): validated output_tensor_names. """ temp_dir = tempfile.mkdtemp() - if tf.version.VERSION > '2.1.0': + if tf.version.VERSION > "2.1.0": if not isinstance(model, tf.keras.Model): model = tf.keras.models.load_model(model) keras_format = _check_keras_format(model, temp_dir) - if keras_format == 'saved_model_v2': + if keras_format == "saved_model_v2": try: graph_def, input_names, output_names = _get_graph_from_saved_model_v2( - temp_dir, input_tensor_names, output_tensor_names) - if '_FusedBatchNormEx' in [node.op for node in graph_def.node]: - keras_format = 'trackable_object' + temp_dir, input_tensor_names, output_tensor_names + ) + if "_FusedBatchNormEx" in [node.op for node in graph_def.node]: + keras_format = "trackable_object" except: - keras_format = 'trackable_object' - if keras_format == 'trackable_object': + keras_format = "trackable_object" + if keras_format == "trackable_object": try: - graph_def, input_names, output_names = _get_graph_from_original_keras_v2( - model, temp_dir) + graph_def, input_names, output_names = _get_graph_from_original_keras_v2(model, temp_dir) except: - keras_format = 'saved_model_v1' - if keras_format == 'saved_model_v1': + keras_format = "saved_model_v1" + if keras_format == "saved_model_v1": try: tf.keras.backend.set_learning_phase(0) graph_def, input_names, output_names = _get_graph_from_saved_model_v1(model) except: - raise ValueError('Not supported keras model type...') + raise ValueError("Not supported keras model type...") # tensorflow 1.x use v1 convert method else: @@ -467,7 +467,7 @@ def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): return graph_def_session(graph_def, input_names, output_names, **kwargs) -def slim_session(model, input_tensor_names, output_tensor_names, **kwargs): # pragma: no cover +def slim_session(model, input_tensor_names, output_tensor_names, **kwargs): # pragma: no cover """Build session with slim model. Args: @@ -480,41 +480,43 @@ def slim_session(model, input_tensor_names, output_tensor_names, **kwargs): # pr input_tensor_names (list of string): validated input_tensor_names. output_tensor_names (list of string): validated output_tensor_names. """ - assert version1_lt_version2(tf.version.VERSION, '2.0.0'), 'slim model only used in tensorflow 1.x' + assert version1_lt_version2(tf.version.VERSION, "2.0.0"), "slim model only used in tensorflow 1.x" from .nets_factory import TFSlimNetsFactory + factory = TFSlimNetsFactory() - assert 'name' in kwargs, 'model name should be set in slim checkpoint....' - assert kwargs['name'] in factory.default_slim_models, \ - 'only support topology {}'.format(factory.default_slim_models) - net = copy.deepcopy(factory.networks_map[kwargs['name']]) - model_func = net.pop('model') - arg_scope = net.pop('arg_scope')() - inputs_shape = net.pop('input_shape') + assert "name" in kwargs, "model name should be set in slim checkpoint...." + assert kwargs["name"] in factory.default_slim_models, "only support topology {}".format(factory.default_slim_models) + net = copy.deepcopy(factory.networks_map[kwargs["name"]]) + model_func = net.pop("model") + arg_scope = net.pop("arg_scope")() + inputs_shape = net.pop("input_shape") kwargs = net import tf_slim as slim + with tf.Graph().as_default(): - images = tf.compat.v1.placeholder(name='input', dtype=tf.float32, \ - shape=inputs_shape) + images = tf.compat.v1.placeholder(name="input", dtype=tf.float32, shape=inputs_shape) with tf.compat.v1.Session() as sess: with slim.arg_scope(arg_scope) as scope: # pylint: disable=not-context-manager model_func(images, is_training=False, **kwargs) graph_def = sess.graph.as_graph_def() - output_tensor_names = output_tensor_names if len(output_tensor_names) > 0 \ - else [graph_def.node[-1].name] + output_tensor_names = output_tensor_names if len(output_tensor_names) > 0 else [graph_def.node[-1].name] from tensorflow.python.tools.freeze_graph import freeze_graph_with_def_protos + graph_def = freeze_graph_with_def_protos( input_graph_def=graph_def, input_saver_def=None, input_checkpoint=model, - output_node_names=','.join(output_tensor_names), - restore_op_name='save/restore_all', - filename_tensor_name='save/Const:0', - output_graph='', + output_node_names=",".join(output_tensor_names), + restore_op_name="save/restore_all", + filename_tensor_name="save/Const:0", + output_graph="", clear_devices=True, - initializer_nodes='') + initializer_nodes="", + ) + + return graph_def_session(graph_def, ["input"], output_tensor_names, **kwargs) - return graph_def_session(graph_def, ['input'], output_tensor_names, **kwargs) def checkpoint_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with ckpt model. @@ -529,45 +531,48 @@ def checkpoint_session(model, input_tensor_names, output_tensor_names, **kwargs) input_tensor_names (list of string): validated input_tensor_names. output_tensor_names (list of string): validated output_tensor_names. """ - assert output_tensor_names is not None and len(output_tensor_names) > 0, \ - 'outputs should not be None of checkpoint....' + assert ( + output_tensor_names is not None and len(output_tensor_names) > 0 + ), "outputs should not be None of checkpoint...." - ckpt_prefix = [os.path.splitext(i)[0] for i in os.listdir(model) \ - if i.endswith(".meta")][0] + ckpt_prefix = [os.path.splitext(i)[0] for i in os.listdir(model) if i.endswith(".meta")][0] config = tf.compat.v1.ConfigProto() config.use_per_session_threads = 1 config.inter_op_parallelism_threads = 1 graph = tf.Graph() sess = tf.compat.v1.Session(graph=graph, config=config) - if version1_lt_version2(tf.version.VERSION, '2.0.0'): # pragma: no cover + if version1_lt_version2(tf.version.VERSION, "2.0.0"): # pragma: no cover from tensorflow._api.v1.config import experimental + list_physical_devices = experimental.list_physical_devices else: list_physical_devices = tf.config.list_physical_devices with graph.as_default(): - device = kwargs.get('device') + device = kwargs.get("device") if device == "cpu": cpus = list_physical_devices("CPU") - node_device = cpus[0].name.replace('physical_device:', '') + node_device = cpus[0].name.replace("physical_device:", "") with graph.device(node_device): - saver = tf.compat.v1.train.import_meta_graph(\ - os.path.join(model, ckpt_prefix + '.meta'), clear_devices=True) - else: # pragma: no cover - saver = tf.compat.v1.train.import_meta_graph(\ - os.path.join(model, ckpt_prefix + '.meta'), clear_devices=True) + saver = tf.compat.v1.train.import_meta_graph( + os.path.join(model, ckpt_prefix + ".meta"), clear_devices=True + ) + else: # pragma: no cover + saver = tf.compat.v1.train.import_meta_graph(os.path.join(model, ckpt_prefix + ".meta"), clear_devices=True) sess.run(tf.compat.v1.global_variables_initializer()) saver.restore(sess, os.path.join(model, ckpt_prefix)) from neural_compressor.adaptor.tf_utils.util import get_input_output_node_names + if validate_graph_node(sess.graph.as_graph_def(), tensor_to_node(input_tensor_names)): input_tensor_names = input_tensor_names else: input_tensor_names, _ = get_input_output_node_names(sess.graph.as_graph_def()) return sess, input_tensor_names, output_tensor_names + def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with estimator model. @@ -582,17 +587,17 @@ def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs): input_tensor_names (list of string): validated input_tensor_names. output_tensor_names (list of string): validated output_tensor_names. """ - assert 'input_fn' in kwargs, 'input func should be supplied for estimator session....' + assert "input_fn" in kwargs, "input func should be supplied for estimator session...." with tf.Graph().as_default() as g: - features, input_hooks = model._get_features_from_input_fn( - kwargs['input_fn'], tf.estimator.ModeKeys.PREDICT) - estimator_spec = model._call_model_fn(features, None, - tf.estimator.ModeKeys.PREDICT, model.config) + features, input_hooks = model._get_features_from_input_fn(kwargs["input_fn"], tf.estimator.ModeKeys.PREDICT) + estimator_spec = model._call_model_fn(features, None, tf.estimator.ModeKeys.PREDICT, model.config) if len(output_tensor_names) == 0: - outputs = [tensor.name for tensor in estimator_spec.predictions.values()] if\ - isinstance(estimator_spec.predictions, dict) else \ - [estimator_spec.predictions.name] + outputs = ( + [tensor.name for tensor in estimator_spec.predictions.values()] + if isinstance(estimator_spec.predictions, dict) + else [estimator_spec.predictions.name] + ) else: outputs = output_tensor_names @@ -604,15 +609,15 @@ def estimator_session(model, input_tensor_names, output_tensor_names, **kwargs): # dictionary # When a model uses Iterator, we need to have 'MakeIterator' (default # name used by TF) in the output_node_names as well. - output_nodes = list(set([output.split(':')[0] for output in outputs])) - if 'MakeIterator' in [node.op for node in g.as_graph_def().node]: - output_nodes.append('MakeIterator') + output_nodes = list(set([output.split(":")[0] for output in outputs])) + if "MakeIterator" in [node.op for node in g.as_graph_def().node]: + output_nodes.append("MakeIterator") - graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, - g.as_graph_def(), output_nodes) + graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, g.as_graph_def(), output_nodes) return graph_def_session(graph_def, input_tensor_names, outputs, **kwargs) + def saved_model_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with saved model. @@ -628,21 +633,25 @@ def saved_model_session(model, input_tensor_names, output_tensor_names, **kwargs """ try: graph_def, input_names, output_names = _get_graph_from_saved_model_v2( - model, input_tensor_names, output_tensor_names) + model, input_tensor_names, output_tensor_names + ) except: graph_def, input_names, output_names = _get_graph_from_saved_model_v1(model) - assert graph_def is not None, 'Can not parse the saved model...' + assert graph_def is not None, "Can not parse the saved model..." return graph_def_session(graph_def, input_names, output_names, **kwargs) + # it's necessary that a session with input output tensors to run the model -SESSIONS = {'frozen_pb': frozen_pb_session, - 'graph_def': graph_def_session, - 'graph': graph_session, - 'saved_model': saved_model_session, - 'keras': keras_session, - 'checkpoint': checkpoint_session, - 'estimator': estimator_session, - 'slim': slim_session,} +SESSIONS = { + "frozen_pb": frozen_pb_session, + "graph_def": graph_def_session, + "graph": graph_session, + "saved_model": saved_model_session, + "keras": keras_session, + "checkpoint": checkpoint_session, + "estimator": estimator_session, + "slim": slim_session, +} class TensorflowBaseModel(BaseModel): @@ -655,16 +664,16 @@ def __init__(self, model, **kwargs): model (string or tensorflow model object): model path or model object. """ self._model = model - self._name = '' + self._name = "" self._weights = None self.kwargs = kwargs self._graph_info = {} self._input_tensor_names = [] self._output_tensor_names = [] - self._model_type = '' + self._model_type = "" self._sess = None self._iter_op = None - self._workspace_path = '' + self._workspace_path = "" self._q_config = None self._model_path = None if not isinstance(model, str) else model @@ -680,7 +689,7 @@ def model_path(self, path): def framework(self): """Return framework.""" - return 'tensorflow' + return "tensorflow" @property def name(self): @@ -690,7 +699,7 @@ def name(self): @name.setter def name(self, name): """Set name.""" - self.kwargs.update({'name': name}) + self.kwargs.update({"name": name}) self._name = name @property @@ -731,7 +740,7 @@ def model_type(self): @model_type.setter def model_type(self, model_type): """Set model type.""" - assert model_type in SESSIONS, 'model type not supported....' + assert model_type in SESSIONS, "model type not supported...." self._model_type = model_type @property @@ -769,24 +778,20 @@ def graph_def(self, graph_def): """Set graph defination.""" if self._sess is not None: self._sess.close() - output_sess = SESSIONS['graph_def'](graph_def,\ - self._input_tensor_names, \ - self._output_tensor_names, \ - **self.kwargs) + output_sess = SESSIONS["graph_def"]( + graph_def, self._input_tensor_names, self._output_tensor_names, **self.kwargs + ) self._sess = output_sess[0] self._input_tensor_names = output_sess[1] self._output_tensor_names = output_sess[2] - self.model_type = 'graph_def' + self.model_type = "graph_def" def _load_sess(self, model, **kwargs): if self.name: - kwargs.update({'name': self.name}) + kwargs.update({"name": self.name}) # assert self.model_type, 'model type not set....' - output_sess = SESSIONS[self.model_type](model, - self._input_tensor_names, \ - self._output_tensor_names, - **kwargs) + output_sess = SESSIONS[self.model_type](model, self._input_tensor_names, self._output_tensor_names, **kwargs) self._sess = output_sess[0] self._input_tensor_names = output_sess[1] self._output_tensor_names = output_sess[2] @@ -801,8 +806,8 @@ def iter_op(self): if self._sess is None: self._load_sess(self._model, **self.kwargs) op_list = [node.op for node in self._sess.graph.as_graph_def().node] - if 'MakeIterator' in op_list: - self._iter_op.append(self._sess.graph.get_operation_by_name('MakeIterator')) + if "MakeIterator" in op_list: + self._iter_op.append(self._sess.graph.get_operation_by_name("MakeIterator")) return self._iter_op @property @@ -819,9 +824,9 @@ def input_tensor_names(self, tensor_names): logger.warn("Input tensor names is empty.") return if self._sess is not None: - assert validate_graph_node(\ - self.graph_def, tensor_to_node(tensor_names)), \ - 'tensor names {} not in graph'.format(tensor_names) + assert validate_graph_node( + self.graph_def, tensor_to_node(tensor_names) + ), "tensor names {} not in graph".format(tensor_names) self._input_tensor_names = tensor_names @property @@ -838,9 +843,9 @@ def output_tensor_names(self, tensor_names): logger.warn("Output tensor names should not be empty.") return if self._sess is not None: - assert validate_graph_node(\ - self.graph_def, tensor_to_node(tensor_names)), \ - 'tensor names {} not in graph'.format(tensor_names) + assert validate_graph_node( + self.graph_def, tensor_to_node(tensor_names) + ), "tensor names {} not in graph".format(tensor_names) self._output_tensor_names = tensor_names # input/output node names and input/output tensor @@ -863,25 +868,25 @@ def output_node_names(self): def input_tensor(self): """Return input tensor.""" from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name - return [get_tensor_by_name(\ - self.graph, x) for x in self.input_tensor_names] + + return [get_tensor_by_name(self.graph, x) for x in self.input_tensor_names] @property def output_tensor(self): """Return output tensor.""" from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name - return [get_tensor_by_name(\ - self.graph, x) for x in self.output_tensor_names] + + return [get_tensor_by_name(self.graph, x) for x in self.output_tensor_names] def save(self, root=None): """Save Tensorflow model.""" if not root: - root = cfg.default_workspace + '/save.pb' + root = cfg.default_workspace + "/save.pb" root = os.path.abspath(os.path.expanduser(root)) # if not have suffix, default append .pb os.makedirs(os.path.dirname(root), exist_ok=True) - pb_file = root if os.path.split(root)[-1].endswith('.pb') else root + '.pb' - f = tf.io.gfile.GFile(pb_file, 'wb') + pb_file = root if os.path.split(root)[-1].endswith(".pb") else root + ".pb" + f = tf.io.gfile.GFile(pb_file, "wb") f.write(self.graph_def.SerializeToString()) logger.info("Save quantized model to {}.".format(pb_file)) @@ -890,32 +895,35 @@ def export(self, save_path, conf): from neural_compressor.config import TF2ONNXConfig if isinstance(conf, TF2ONNXConfig): - if conf.quant_format == 'QDQ' and conf.opset_version < 13: # pragma: no cover + if conf.quant_format == "QDQ" and conf.opset_version < 13: # pragma: no cover conf.opset_version = 13 - logger.warning("QDQ format requires opset_version >= 13, " + - "we reset opset_version={} here".format(conf.opset_version)) + logger.warning( + "QDQ format requires opset_version >= 13, " + + "we reset opset_version={} here".format(conf.opset_version) + ) from neural_compressor.experimental.export import tf_to_fp32_onnx, tf_to_int8_onnx + inputs_as_nchw = conf.kwargs.get("inputs_as_nchw", None) - if conf.dtype == 'int8': + if conf.dtype == "int8": tf_to_int8_onnx( self.graph_def, save_path, opset_version=conf.opset_version, input_names=conf.input_names if conf.input_names else self.input_tensor_names, output_names=conf.output_names if conf.output_names else self.output_tensor_names, - inputs_as_nchw=inputs_as_nchw + inputs_as_nchw=inputs_as_nchw, ) - elif conf.dtype == 'fp32': + elif conf.dtype == "fp32": tf_to_fp32_onnx( self.graph_def, save_path, opset_version=conf.opset_version, input_names=conf.input_names if conf.input_names else self.input_tensor_names, output_names=conf.output_names if conf.output_names else self.output_tensor_names, - inputs_as_nchw=inputs_as_nchw + inputs_as_nchw=inputs_as_nchw, ) - else: # pragma: no cover + else: # pragma: no cover assert False, "Not allowed dtype: {}, pleas use 'fp32' or 'int8'.".format(conf.dtype) else: logger.warning("Unsupported config for export, only TF2ONNXConfig is supported!") @@ -932,6 +940,7 @@ def get_all_weight_names(self): list: weight names list. """ import tensorflow as tf + names = [] for index, layer in enumerate(tf.keras.models.load_model(self._model).layers): if len(layer.weights): @@ -953,8 +962,9 @@ def get_weight(self, tensor_name): @property def model(self): """Return model itself.""" - import time import shutil + import time + root = os.path.abspath(os.path.expanduser(cfg.default_workspace)) root += str(time.time()) if os.path.exists(root): @@ -975,11 +985,12 @@ def report_sparsity(self): df (DataFrame): DataFrame of sparsity of each weight. total_sparsity (float): total sparsity of model. """ + import numpy as np import pandas as pd import tensorflow as tf - import numpy as np - df = pd.DataFrame(columns=['Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)', "Sparsity(%)"]) - pd.set_option('display.precision', 2) + + df = pd.DataFrame(columns=["Name", "Shape", "NNZ (dense)", "NNZ (sparse)", "Sparsity(%)"]) + pd.set_option("display.precision", 2) param_dims = [2, 4] params_size = 0 sparse_params_size = 0 @@ -990,27 +1001,27 @@ def report_sparsity(self): # as its "type" weights = layer.get_weights()[0] if weights.ndim in param_dims: - param_size, sparse_param_size, dense_param_size = compute_sparsity( - weights) + param_size, sparse_param_size, dense_param_size = compute_sparsity(weights) density = dense_param_size / param_size params_size += param_size sparse_params_size += sparse_param_size - df.loc[len(df.index)] = ([ + df.loc[len(df.index)] = [ index, list(weights.shape), dense_param_size, sparse_param_size, (1 - density) * 100, - ]) + ] total_sparsity = sparse_params_size / params_size * 100 - df.loc[len(df.index)] = ([ - 'Total sparsity:', + df.loc[len(df.index)] = [ + "Total sparsity:", "-", params_size, sparse_params_size, - total_sparsity,]) + total_sparsity, + ] return df, total_sparsity @@ -1022,7 +1033,7 @@ def build_saved_model(self, root=None): Returns: root (str): path to saved model. - builder (tf.compat.v1.saved_model.builder.SavedModelBuilder): builds + builder (tf.compat.v1.saved_model.builder.SavedModelBuilder): builds the SavedModel protocol buffer and saves variables and assets. """ if not root: @@ -1030,28 +1041,30 @@ def build_saved_model(self, root=None): root = os.path.abspath(os.path.expanduser(root)) if os.path.exists(root): import shutil + shutil.rmtree(root) os.makedirs(root, exist_ok=True) - from tensorflow.python.saved_model import signature_constants - from tensorflow.python.saved_model import tag_constants + from tensorflow.python.saved_model import signature_constants, tag_constants + from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(root) sigs = {} with tf.compat.v1.Session(graph=tf.Graph()) as sess: - #(TODO) not directly use self._sess.graph, use self.graph + # (TODO) not directly use self._sess.graph, use self.graph tf.import_graph_def(self.graph.as_graph_def(), name="") g = tf.compat.v1.get_default_graph() inp = [get_tensor_by_name(g, x) for x in self._input_tensor_names] out = [get_tensor_by_name(g, x) for x in self._output_tensor_names] - sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \ - tf.compat.v1.saved_model.signature_def_utils.predict_signature_def( + sigs[ + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY + ] = tf.compat.v1.saved_model.signature_def_utils.predict_signature_def( {k: v for k, v in zip(self._input_tensor_names, inp)}, - {k: v for k, v in zip(self._output_tensor_names, out)}) - builder.add_meta_graph_and_variables(sess, - [tag_constants.SERVING], - signature_def_map=sigs) + {k: v for k, v in zip(self._output_tensor_names, out)}, + ) + builder.add_meta_graph_and_variables(sess, [tag_constants.SERVING], signature_def_map=sigs) return root, builder def save(self, root=None): @@ -1060,25 +1073,27 @@ def save(self, root=None): builder.save() logger.info("Save quantized model to {}.".format(root)) + class TensorflowQATModel(TensorflowSavedModelModel): """Build Tensorflow QAT model.""" - def __init__(self, model='', **kwargs): + def __init__(self, model="", **kwargs): """Initialize a Tensorflow QAT model. Args: model (string or tf.keras.Model object): model path or model object. """ - assert isinstance(model, tf.keras.Model) or isinstance(model, str), \ - "The TensorflowQATModel should be initialized either by a string or a tf.keras.Model." + assert isinstance(model, tf.keras.Model) or isinstance( + model, str + ), "The TensorflowQATModel should be initialized either by a string or a tf.keras.Model." super(TensorflowQATModel, self).__init__(model) self.keras_model = None - self.model_type = 'keras' + self.model_type = "keras" @property def model(self): """Return model itself.""" - if self.keras_model == None: + if self.keras_model is None: if isinstance(self._model, tf.keras.Model): self.keras_model = self._model else: @@ -1095,59 +1110,61 @@ def model(self, q_model): def frozen_graph_def(self): """Get frozen graph_def.""" graph_def = tf.compat.v1.graph_util.convert_variables_to_constants( - self.sess, self.sess.graph_def, self.output_node_names) + self.sess, self.sess.graph_def, self.output_node_names + ) return graph_def def save(self, root=None): """Save Tensorflow QAT model.""" if not root: - root = cfg.default_workspace + '/saved_model' + root = cfg.default_workspace + "/saved_model" root = os.path.abspath(os.path.expanduser(root)) os.makedirs(os.path.dirname(root), exist_ok=True) - if root.endswith('.pb'): - saved_format = 'pb file' + if root.endswith(".pb"): + saved_format = "pb file" graph_def = self.frozen_graph_def - f=tf.io.gfile.GFile(root,'wb') - f.write(graph_def.SerializeToString()) + f = tf.io.gfile.GFile(root, "wb") + f.write(graph_def.SerializeToString()) else: q_aware_model = self.keras_model q_aware_model.save(root) - saved_format = 'saved_model' - if root.endswith('.h5'): - saved_format = 'h5 file' + saved_format = "saved_model" + if root.endswith(".h5"): + saved_format = "h5 file" logger.info("Save quantized model to {}.".format(saved_format)) return root + class TensorflowCheckpointModel(TensorflowBaseModel): """Build Tensorflow checkpoint model.""" @property def graph_def(self): """Return graph defination.""" - if self.model_type == 'graph_def': + if self.model_type == "graph_def": return self.sess.graph.as_graph_def() - from neural_compressor.adaptor.tf_utils.util import _parse_ckpt_bn_input from tensorflow.compat.v1 import graph_util + + from neural_compressor.adaptor.tf_utils.util import _parse_ckpt_bn_input + graph_def = self.sess.graph.as_graph_def() graph_def = _parse_ckpt_bn_input(graph_def) return graph_util.convert_variables_to_constants( - sess=self._sess, - input_graph_def=graph_def, - output_node_names=self.output_node_names) + sess=self._sess, input_graph_def=graph_def, output_node_names=self.output_node_names + ) @graph_def.setter def graph_def(self, graph_def): """Set graph defination.""" if self._sess is not None: self._sess.close() - output_sess = SESSIONS['graph_def'](graph_def, - self._input_tensor_names, \ - self._output_tensor_names, \ - **self.kwargs) + output_sess = SESSIONS["graph_def"]( + graph_def, self._input_tensor_names, self._output_tensor_names, **self.kwargs + ) self._sess = output_sess[0] self._input_tensor_names = output_sess[1] self._output_tensor_names = output_sess[2] - self.model_type = 'graph_def' + self.model_type = "graph_def" @property def model(self): @@ -1155,15 +1172,17 @@ def model(self): return self -TENSORFLOW_MODELS = {'frozen_pb': TensorflowBaseModel, - 'graph_def': TensorflowBaseModel, - 'graph': TensorflowBaseModel, - 'checkpoint': TensorflowCheckpointModel, - 'estimator': TensorflowBaseModel, - 'slim': TensorflowBaseModel, - 'saved_model': TensorflowSavedModelModel, - 'keras': TensorflowSavedModelModel - } +TENSORFLOW_MODELS = { + "frozen_pb": TensorflowBaseModel, + "graph_def": TensorflowBaseModel, + "graph": TensorflowBaseModel, + "checkpoint": TensorflowCheckpointModel, + "estimator": TensorflowBaseModel, + "slim": TensorflowBaseModel, + "saved_model": TensorflowSavedModelModel, + "keras": TensorflowSavedModelModel, +} + class TensorflowModel(object): """A wrapper to construct a Tensorflow Model.""" diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 40d75051dac..a5b2da03133 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -14,26 +14,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Class for PyTorch model.""" import copy -import os import inspect +import os import sys from collections import OrderedDict, UserDict -from neural_compressor.utils.utility import LazyImport, compute_sparsity -from neural_compressor.utils import logger + from neural_compressor import config as cfg from neural_compressor.model.base_model import BaseModel +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport, compute_sparsity -torch = LazyImport('torch') -yaml = LazyImport('yaml') -json = LazyImport('json') -np = LazyImport('numpy') -onnx = LazyImport('onnx') -ort = LazyImport('onnxruntime') -ortq = LazyImport('onnxruntime.quantization') +torch = LazyImport("torch") +yaml = LazyImport("yaml") +json = LazyImport("json") +np = LazyImport("numpy") +onnx = LazyImport("onnx") +ort = LazyImport("onnxruntime") +ortq = LazyImport("onnxruntime.quantization") class PyTorchBaseModel(torch.nn.Module, BaseModel): @@ -49,9 +49,9 @@ def __init__(self, model, **kwargs): self._model = model assert isinstance(model, torch.nn.Module), "model should be pytorch nn.Module." self.handles = [] - self.tune_cfg= None + self.tune_cfg = None self.q_config = None - self._workspace_path = '' + self._workspace_path = "" self.is_quantized = False self.fp32_model = model self.kwargs = kwargs if kwargs else None @@ -60,28 +60,29 @@ def __repr__(self): """Describe a PyTorchBaseModel as a string.""" # rewirte this func to avoid printing fp32_model from torch.nn.modules.module import _addindent + # We treat the extra repr like the sub-module, one item per line extra_lines = [] extra_repr = self.extra_repr() # empty string will be split into list [''] if extra_repr: - extra_lines = extra_repr.split('\n') + extra_lines = extra_repr.split("\n") child_lines = [] for key, module in self._modules.items(): - if key == 'fp32_model': + if key == "fp32_model": continue mod_str = repr(module) mod_str = _addindent(mod_str, 2) - child_lines.append('(' + key + '): ' + mod_str) + child_lines.append("(" + key + "): " + mod_str) lines = extra_lines + child_lines - main_str = self._get_name() + '(' + main_str = self._get_name() + "(" if lines: # simple one-liner info, which most builtin Modules will use if len(extra_lines) == 1 and not child_lines: main_str += extra_lines[0] else: - main_str += '\n ' + '\n '.join(lines) + '\n' - main_str += ')' + main_str += "\n " + "\n ".join(lines) + "\n" + main_str += ")" return main_str def forward(self, *args, **kwargs): @@ -110,8 +111,7 @@ def fp32_model(self, fp32_model): def register_forward_pre_hook(self): """Register forward pre hook.""" - self.handles.append( - self._model.register_forward_pre_hook(self.generate_forward_pre_hook())) + self.handles.append(self._model.register_forward_pre_hook(self.generate_forward_pre_hook())) def remove_hooks(self): """Remove hooks.""" @@ -121,21 +121,23 @@ def remove_hooks(self): def generate_forward_pre_hook(self): """Generate forward pre hook.""" # skip input argument 'self' in forward - self.input_args = OrderedDict().fromkeys( - inspect.getfullargspec(self._model.forward).args[1:], None) + self.input_args = OrderedDict().fromkeys(inspect.getfullargspec(self._model.forward).args[1:], None) + # a wrapper is needed to insert self into the actual hook def actual_forward_pre_hook(module, input): args, _, _, values = inspect.getargvalues(inspect.stack()[1].frame) # intersection update kw arguments - self.input_args.update(values['kwargs']) + self.input_args.update(values["kwargs"]) # update arguments if "input" in values: - for (single_input, single_arg) in \ - zip(values['input'], list(self.input_args.keys())[:len(values['input'])]): + for single_input, single_arg in zip( + values["input"], list(self.input_args.keys())[: len(values["input"])] + ): self.input_args[single_arg] = single_input elif "args" in values: - for (single_input, single_arg) in \ - zip(values['args'], list(self.input_args.keys())[:len(values['args'])]): + for single_input, single_arg in zip( + values["args"], list(self.input_args.keys())[: len(values["args"])] + ): self.input_args[single_arg] = single_input else: assert False, "there is no input field was found!" @@ -144,7 +146,7 @@ def actual_forward_pre_hook(module, input): def framework(self): """Return framework.""" - return 'pytorch' + return "pytorch" def get_all_weight_names(self): """Get weight names.""" @@ -170,9 +172,9 @@ def update_weights(self, tensor_name, new_tensor): # TODO: copy tensor option to new tensor is better device = next(self._model.parameters()).device new_tensor = torch.tensor(new_tensor).float().to(device) - module_index = '.'.join(tensor_name.split('.')[:-1]) + module_index = ".".join(tensor_name.split(".")[:-1]) module = dict(self._model.named_modules())[module_index] - getattr(module, tensor_name.split('.')[-1]).data = new_tensor.data + getattr(module, tensor_name.split(".")[-1]).data = new_tensor.data def update_gradient(self, grad_name, new_grad): """Update grad value. @@ -183,7 +185,7 @@ def update_gradient(self, grad_name, new_grad): """ device = next(self._model.parameters()).device new_grad = torch.tensor(new_grad).float().to(device) - params = [p for n,p in self._model.named_parameters() if n == grad_name] + params = [p for n, p in self._model.named_parameters() if n == grad_name] assert len(params) == 1, "lpot can only update grad of one tensor at one time" param = params[0] param.grad.copy_(new_grad) @@ -198,7 +200,7 @@ def prune_weights_(self, tensor_name, mask): state_dict = self._model.state_dict() for name in state_dict: if name == tensor_name: - state_dict[name].masked_fill_(mask.to(state_dict[name].device), 0.) + state_dict[name].masked_fill_(mask.to(state_dict[name].device), 0.0) def get_inputs(self, input_name=None): """Get inputs of model. @@ -223,14 +225,13 @@ def get_gradient(self, input_tensor): if isinstance(input_tensor, str): for name, tensor in self._model.named_parameters(): if name == input_tensor: - assert tensor.grad is not None, 'Please call backward() before get_gradient' + assert tensor.grad is not None, "Please call backward() before get_gradient" return np.array(tensor.grad.cpu()) elif isinstance(input_tensor, torch.Tensor): - assert input_tensor.grad is not None, 'Please call backward() before get_gradient' + assert input_tensor.grad is not None, "Please call backward() before get_gradient" return np.array(input_tensor.grad.cpu()) - else: # pragma: no cover - logger.error("Expect str or torch.Tensor in get_gradient, " \ - "but get {}.".format(type(input_tensor))) + else: # pragma: no cover + logger.error("Expect str or torch.Tensor in get_gradient, " "but get {}.".format(type(input_tensor))) def report_sparsity(self): """Get sparsity of the model. @@ -243,8 +244,9 @@ def report_sparsity(self): logger.info("INC IPEX don't support compute sparsity for model in TorchScript format now.") return [0.0] import pandas as pd - df = pd.DataFrame(columns=['Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)', "Sparsity(%)"]) - pd.set_option('display.precision', 2) + + df = pd.DataFrame(columns=["Name", "Shape", "NNZ (dense)", "NNZ (sparse)", "Sparsity(%)"]) + pd.set_option("display.precision", 2) # TODO: need to specify modules(Conv2d, Linear, etc.) instead of dims param_dims = [2, 4] params_size = 0 @@ -252,35 +254,39 @@ def report_sparsity(self): model_params = dict(self._model.state_dict()) for name, param in model_params.items(): # '_packed_params._packed_params' and dtype is specific for quantized module - if '_packed_params._packed_params' in name and isinstance(param, tuple): + if "_packed_params._packed_params" in name and isinstance(param, tuple): param = param[0] - if hasattr(param, 'dtype') and param.dtype in [torch.qint8, torch.quint8]: + if hasattr(param, "dtype") and param.dtype in [torch.qint8, torch.quint8]: param = param.dequantize() - if hasattr(param, 'dim') and param.dim() in param_dims \ - and any(type in name for type in ['weight', 'bias', '_packed_params']): - param_size, sparse_param_size, dense_param_size = compute_sparsity( - param.detach().cpu().numpy()) + if ( + hasattr(param, "dim") + and param.dim() in param_dims + and any(type in name for type in ["weight", "bias", "_packed_params"]) + ): + param_size, sparse_param_size, dense_param_size = compute_sparsity(param.detach().cpu().numpy()) density = dense_param_size / param_size params_size += param_size sparse_params_size += sparse_param_size - df.loc[len(df.index)] = ([ + df.loc[len(df.index)] = [ name, list(param.shape), dense_param_size, sparse_param_size, (1 - density) * 100, - ]) + ] total_sparsity = sparse_params_size / params_size * 100 - df.loc[len(df.index)] = ([ - 'Total sparsity:', + df.loc[len(df.index)] = [ + "Total sparsity:", "-", params_size, sparse_params_size, - total_sparsity,]) + total_sparsity, + ] return df, total_sparsity + class PyTorchModel(PyTorchBaseModel): """Build PyTorchModel object.""" @@ -297,11 +303,10 @@ def workspace_path(self): def workspace_path(self, path): """Set workspace path.""" from neural_compressor.utils.pytorch import load + workspace_path = path - weights_file = os.path.join(os.path.abspath(os.path.expanduser(workspace_path)), - 'best_model.pt') - assert os.path.exists( - weights_file), "weight file %s didn't exist" % weights_file + weights_file = os.path.join(os.path.abspath(os.path.expanduser(workspace_path)), "best_model.pt") + assert os.path.exists(weights_file), "weight file %s didn't exist" % weights_file self._model = load(weights_file, self._model) def save(self, root=None): @@ -313,44 +318,47 @@ def save(self, root=None): try: stat_dict = self._model.state_dict() if self.q_config: - if self.q_config['approach'] == 'post_training_weight_only': + if self.q_config["approach"] == "post_training_weight_only": from ..adaptor.torch_utils.util import collect_weight_info + weight_config_path = os.path.join(root, "qconfig.json") weight_config = collect_weight_info(self.model, self.q_config) - with open(weight_config_path, 'w') as f: - json.dump(weight_config, f, indent = 4) - if hasattr(self, 'gptq_config') and self.gptq_config: + with open(weight_config_path, "w") as f: + json.dump(weight_config, f, indent=4) + if hasattr(self, "gptq_config") and self.gptq_config: gptq_config_path = os.path.join(root, "gptq_config.json") - with open(gptq_config_path, 'w') as f: - json.dump(self.gptq_config, f, indent = 4) + with open(gptq_config_path, "w") as f: + json.dump(self.gptq_config, f, indent=4) else: - stat_dict['best_configure'] = self.q_config + stat_dict["best_configure"] = self.q_config torch.save(stat_dict, os.path.join(root, "best_model.pt")) logger.info("Save config file and weights of quantized model to {}.".format(root)) - except IOError as e: # pragma: no cover + except IOError as e: # pragma: no cover logger.error("Fail to save configure file and weights due to {}.".format(e)) def quantized_state_dict(self): """Load quantized state dict.""" try: stat_dict = self._model.state_dict() - stat_dict['best_configure'] = self.q_config - except IOError as e: # pragma: no cover + stat_dict["best_configure"] = self.q_config + except IOError as e: # pragma: no cover logger.error("Fail to dump configure and weights due to {}.".format(e)) return stat_dict def load_quantized_state_dict(self, stat_dict): """Load quantized state with given dict.""" from ..utils.pytorch import load - self.q_config = stat_dict['best_configure'] + + self.q_config = stat_dict["best_configure"] self._model = load(stat_dict, self._model) @property def graph_info(self): """Return graph info.""" from ..adaptor.pytorch import get_ops_recursively + op_map = {} - get_ops_recursively(self._model, '', op_map) + get_ops_recursively(self._model, "", op_map) return op_map def export( @@ -360,17 +368,19 @@ def export( ): """Export PyTorch model to ONNX model.""" from packaging.version import Version + from ..adaptor.pytorch import get_torch_version + version = get_torch_version() - if version.release < Version("1.12.0").release: # pragma: no cover - assert False, "PyTorch to ONNX export function requires a minimum torch version of {}, " \ + if version.release < Version("1.12.0").release: # pragma: no cover + assert False, ( + "PyTorch to ONNX export function requires a minimum torch version of {}, " "but the torch version found is {}".format(Version("1.12.0"), version) + ) - from neural_compressor.experimental.export import ( - torch_to_fp32_onnx, - torch_to_int8_onnx) + from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx - if conf.dtype == 'int8': + if conf.dtype == "int8": torch_to_int8_onnx( self.fp32_model, self.model, @@ -382,8 +392,9 @@ def export( input_names=conf.input_names, output_names=conf.output_names, quant_format=conf.quant_format, - verbose=True,) - elif conf.dtype == 'fp32': + verbose=True, + ) + elif conf.dtype == "fp32": torch_to_fp32_onnx( self.model, save_path, @@ -393,87 +404,99 @@ def export( input_names=conf.input_names, output_names=conf.output_names, do_constant_folding=True, - verbose=True,) - else: # pragma: no cover + verbose=True, + ) + else: # pragma: no cover assert False, "Not allowed dtype: {}, pleas use 'fp32' or 'int8'.".format(conf.dtype) - def export_compressed_model(self, qweight_config_path=None, sym_full_range=False, - compression_dtype=torch.int32, compression_dim=1, - scale_dtype=torch.float32, gptq_config_path=None, - device='cpu'): + def export_compressed_model( + self, + qweight_config_path=None, + sym_full_range=False, + compression_dtype=torch.int32, + compression_dim=1, + scale_dtype=torch.float32, + gptq_config_path=None, + device="cpu", + ): """Convert Linear to WeightOnlyLinear for low memory inference. Args: qweight_config_path (str, optional): Path of qconfig.json. Defaults to None. sym_full_range (bool, optional): Whether to leverage the full compression range under symmetric quantization. Defaults to False. - compression_dtype (torch.Tensor, optional): The target dtype after comoression. + compression_dtype (torch.Tensor, optional): The target dtype after comoression. Defaults to torch.int32. - compression_dim (int, optional): Select from [0, 1], 0 is output channel, + compression_dim (int, optional): Select from [0, 1], 0 is output channel, 1 is input channel. Defaults to 1. - scale_dtype (torch.Tensor, optional): Use float32 or float16. + scale_dtype (torch.Tensor, optional): Use float32 or float16. Defaults to torch.float32. gptq_config_path (str, optional): Path of gptq_config.json. Defaults to None. device (str, optional): choose device for compression. Defaults to cpu. """ - from ..adaptor.torch_utils.util import fetch_module, set_module - from ..adaptor.torch_utils.weight_only import rtn_quantize, quant_weight_w_scale - from ..adaptor.torch_utils.util import collect_weight_info from ..adaptor.torch_utils.model_wrapper import WeightOnlyLinear + from ..adaptor.torch_utils.util import collect_weight_info, fetch_module, set_module + from ..adaptor.torch_utils.weight_only import quant_weight_w_scale, rtn_quantize + if qweight_config_path is not None: - with open(qweight_config_path, 'r') as f: + with open(qweight_config_path, "r") as f: weight_config = json.load(f) else: weight_config = collect_weight_info(self.model, self.q_config) if gptq_config_path is not None: - with open(gptq_config_path, 'r') as f: + with open(gptq_config_path, "r") as f: gptq_config = json.load(f) else: - gptq_config = self.gptq_config if hasattr(self, 'gptq_config') else {} + gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {} if gptq_config: for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") - if v['dtype'] == 'fp32': + if v["dtype"] == "fp32": continue else: - dtype = v['dtype'] - num_bits = v['bits'] - group_size = v['group_size'] - scheme = v['scheme'] + dtype = v["dtype"] + num_bits = v["bits"] + group_size = v["group_size"] + scheme = v["scheme"] m = fetch_module(self.model, k) if k not in gptq_config: new_module = rtn_quantize( - m, num_bits, group_size, scheme, - data_type=dtype, - return_int=True, + m, + num_bits, + group_size, + scheme, + data_type=dtype, + return_int=True, sym_full_range=sym_full_range, - compression_dtype=compression_dtype, - compression_dim=compression_dim, - scale_dtype=scale_dtype, - device=device + compression_dtype=compression_dtype, + compression_dim=compression_dim, + scale_dtype=scale_dtype, + device=device, ) set_module(self.model, k, new_module) continue gptq_conf = gptq_config[k] - if 'perm' in gptq_conf: - gptq_perm = torch.tensor(gptq_conf['perm']) + if "perm" in gptq_conf: + gptq_perm = torch.tensor(gptq_conf["perm"]) fp32_weight = m.weight.data[:, gptq_perm] else: fp32_weight = m.weight.data gptq_perm = None - gptq_scale = torch.tensor(gptq_conf['scale']) - gptq_zp = None if scheme == 'sym' else torch.tensor(gptq_conf['zero']) - int_weight = quant_weight_w_scale( - fp32_weight, gptq_scale, gptq_zp, group_size - ) + gptq_scale = torch.tensor(gptq_conf["scale"]) + gptq_zp = None if scheme == "sym" else torch.tensor(gptq_conf["zero"]) + int_weight = quant_weight_w_scale(fp32_weight, gptq_scale, gptq_zp, group_size) new_module = WeightOnlyLinear( - m.in_features, m.out_features, num_bits, group_size, + m.in_features, + m.out_features, + num_bits, + group_size, dtype=dtype, - zp=gptq_zp is not None, bias=m.bias is not None, + zp=gptq_zp is not None, + bias=m.bias is not None, gptq_perm=gptq_perm is not None, - compression_dtype=compression_dtype, - compression_dim=compression_dim, - scale_dtype=scale_dtype, + compression_dtype=compression_dtype, + compression_dim=compression_dim, + scale_dtype=scale_dtype, device=device, ) new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm) @@ -481,23 +504,26 @@ def export_compressed_model(self, qweight_config_path=None, sym_full_range=False else: for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") - if v['dtype'] == 'fp32': + if v["dtype"] == "fp32": continue else: - dtype = v['dtype'] - num_bits = v['bits'] - group_size = v['group_size'] - scheme = v['scheme'] + dtype = v["dtype"] + num_bits = v["bits"] + group_size = v["group_size"] + scheme = v["scheme"] mod = fetch_module(self.model, k) mod = rtn_quantize( - mod, num_bits, group_size, scheme, - data_type=dtype, - return_int=True, + mod, + num_bits, + group_size, + scheme, + data_type=dtype, + return_int=True, sym_full_range=sym_full_range, - compression_dtype=compression_dtype, - compression_dim=compression_dim, - scale_dtype=scale_dtype, - device=device + compression_dtype=compression_dtype, + compression_dim=compression_dim, + scale_dtype=scale_dtype, + device=device, ) set_module(self.model, k, mod) return self.model @@ -511,7 +537,7 @@ def __init__(self, model, **kwargs): super(PyTorchFXModel, self).__init__(model, **kwargs) -class IPEXModel(PyTorchBaseModel): # pragma: no cover +class IPEXModel(PyTorchBaseModel): # pragma: no cover """Build IPEXModel object.""" def __init__(self, model, **kwargs): @@ -532,12 +558,10 @@ def workspace_path(self): def workspace_path(self, path): """Set workspace path.""" self._workspace_path = path - tune_cfg_file = os.path.join(os.path.abspath(os.path.expanduser(path)), - 'best_configure.json') - assert os.path.exists( - tune_cfg_file), "tune configure file %s didn't exist" % tune_cfg_file + tune_cfg_file = os.path.join(os.path.abspath(os.path.expanduser(path)), "best_configure.json") + assert os.path.exists(tune_cfg_file), "tune configure file %s didn't exist" % tune_cfg_file - with open(tune_cfg_file, 'r') as f: + with open(tune_cfg_file, "r") as f: self.tune_cfg = json.load(f) def save(self, root=None): @@ -547,8 +571,8 @@ def save(self, root=None): root = os.path.abspath(os.path.expanduser(root)) os.makedirs(root, exist_ok=True) try: - with open(os.path.join(root, "best_configure.json"), 'w') as f: - json.dump(self.tune_cfg, f, indent = 4) + with open(os.path.join(root, "best_configure.json"), "w") as f: + json.dump(self.tune_cfg, f, indent=4) logger.info("Save config file of quantized model to {}.".format(root)) except IOError as e: logger.error("Fail to save configure file and weights due to {}.".format(e)) diff --git a/neural_compressor/profiling/parser/factory.py b/neural_compressor/profiling/parser/factory.py index db0f5158bbb..c84057b1413 100644 --- a/neural_compressor/profiling/parser/factory.py +++ b/neural_compressor/profiling/parser/factory.py @@ -16,13 +16,14 @@ from typing import Optional from neural_compressor.model import BaseModel +from neural_compressor.profiling.parser.onnx_parser.factory import OnnxrtParserFactory from neural_compressor.profiling.parser.parser import ProfilingParser from neural_compressor.profiling.parser.tensorflow_parser.factory import TensorFlowParserFactory -from neural_compressor.profiling.parser.onnx_parser.factory import OnnxrtParserFactory class ParserFactory: """Parser factory.""" + @staticmethod def get_parser( model: BaseModel, @@ -44,6 +45,5 @@ def get_parser( parser = framework_parser.get(model.framework(), None) if parser is None: - raise Exception( - f"Profiling Parser for '{model.framework()}' framework is not supported.") + raise Exception(f"Profiling Parser for '{model.framework()}' framework is not supported.") return parser(logs) diff --git a/neural_compressor/profiling/parser/onnx_parser/factory.py b/neural_compressor/profiling/parser/onnx_parser/factory.py index f5eab941801..4a296c42adf 100644 --- a/neural_compressor/profiling/parser/onnx_parser/factory.py +++ b/neural_compressor/profiling/parser/onnx_parser/factory.py @@ -16,12 +16,12 @@ from typing import List, Optional -from neural_compressor.profiling.parser.onnx_parser.parser import \ - OnnxProfilingParser +from neural_compressor.profiling.parser.onnx_parser.parser import OnnxProfilingParser class OnnxrtParserFactory: """Parser factory.""" + @staticmethod def get_parser( logs: List, diff --git a/neural_compressor/profiling/parser/onnx_parser/parser.py b/neural_compressor/profiling/parser/onnx_parser/parser.py index 7d9002884ed..6cb2d719971 100644 --- a/neural_compressor/profiling/parser/onnx_parser/parser.py +++ b/neural_compressor/profiling/parser/onnx_parser/parser.py @@ -23,6 +23,7 @@ class OnnxProfilingParser(ProfilingParser): """Parser class is responsible for parsing profiling log files.""" + def process(self) -> List[dict]: """Process profiling logs. @@ -51,7 +52,7 @@ def process(self) -> List[dict]: dur = int(node.get("dur") or 0) # Get the duration of the node # Skip nodes with missing or invalid values - if (category is None or node.get("name") is None or op_name is None): + if category is None or node.get("name") is None or op_name is None: continue # Update the summarized data for this operation @@ -62,8 +63,9 @@ def process(self) -> List[dict]: elif category == "kernel": summarized[op_name]["accelerator_execution_time"] += dur summarized[op_name]["op_defined"] += 1 - summarized[op_name]["op_run"] += 0 if not ( - dur or node.get("args", {}).get("thread_scheduling_stats")) else 1 + summarized[op_name]["op_run"] += ( + 0 if not (dur or node.get("args", {}).get("thread_scheduling_stats")) else 1 + ) for node in summarized.values(): self.add_result(ProfilingResult(**node)) diff --git a/neural_compressor/profiling/parser/parser.py b/neural_compressor/profiling/parser/parser.py index 2d769e0c721..984b37fa00c 100644 --- a/neural_compressor/profiling/parser/parser.py +++ b/neural_compressor/profiling/parser/parser.py @@ -14,14 +14,15 @@ # limitations under the License. """Parsers for log files.""" -from neural_compressor.profiling.parser.result import ProfilingResult from abc import ABC, abstractmethod - from typing import List +from neural_compressor.profiling.parser.result import ProfilingResult + class ProfilingParser(ABC): """Parser class is responsible for parsing profiling log files.""" + def __init__(self, logs: list) -> None: """Initialize parser. @@ -39,7 +40,6 @@ def results(self) -> List[ProfilingResult]: """Get profiling results. Returns: list of ProfilingResult entries - """ return self._results diff --git a/neural_compressor/profiling/parser/result.py b/neural_compressor/profiling/parser/result.py index 7aa2cf6bb74..d52c3aa9c65 100644 --- a/neural_compressor/profiling/parser/result.py +++ b/neural_compressor/profiling/parser/result.py @@ -20,13 +20,13 @@ class ProfilingResult: """Profiling result class.""" def __init__( - self, - node_name: str, - total_execution_time: int, - accelerator_execution_time: int, - cpu_execution_time: int, - op_run: int, - op_defined: int, + self, + node_name: str, + total_execution_time: int, + accelerator_execution_time: int, + cpu_execution_time: int, + op_run: int, + op_defined: int, ) -> None: """Create profiling result instance. diff --git a/neural_compressor/profiling/parser/tensorflow_parser/factory.py b/neural_compressor/profiling/parser/tensorflow_parser/factory.py index e8135ff2736..90d1a8f04e7 100644 --- a/neural_compressor/profiling/parser/tensorflow_parser/factory.py +++ b/neural_compressor/profiling/parser/tensorflow_parser/factory.py @@ -16,12 +16,12 @@ from typing import List, Optional -from neural_compressor.profiling.parser.tensorflow_parser.parser import \ - TensorFlowProfilingParser +from neural_compressor.profiling.parser.tensorflow_parser.parser import TensorFlowProfilingParser class TensorFlowParserFactory: """Parser factory.""" + @staticmethod def get_parser( logs: List, diff --git a/neural_compressor/profiling/profiler/factory.py b/neural_compressor/profiling/profiler/factory.py index ff1324f5a04..2273e0375e0 100644 --- a/neural_compressor/profiling/profiler/factory.py +++ b/neural_compressor/profiling/profiler/factory.py @@ -17,12 +17,10 @@ from neural_compressor.experimental.data.dataloaders.base_dataloader import BaseDataLoader from neural_compressor.model import BaseModel +from neural_compressor.profiling.profiler.onnxrt_profiler.factory import ProfilerFactory as OnnxrtProfilerFactory from neural_compressor.profiling.profiler.profiler import Profiler from neural_compressor.profiling.profiler.tensorflow_profiler.factory import ( - ProfilerFactory as TensorflowProfilerFactory -) -from neural_compressor.profiling.profiler.onnxrt_profiler.factory import ( - ProfilerFactory as OnnxrtProfilerFactory + ProfilerFactory as TensorflowProfilerFactory, ) @@ -31,9 +29,9 @@ class ProfilerFactory: @staticmethod def get_profiler( - model: BaseModel, - dataloader: BaseDataLoader, - log_file: Optional[str] = None, + model: BaseModel, + dataloader: BaseDataLoader, + log_file: Optional[str] = None, ) -> Optional[Profiler]: """Get profiling for specified framework. diff --git a/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py b/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py index 9a1b1a6e2ed..0317c557f6c 100644 --- a/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +++ b/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py @@ -16,8 +16,7 @@ from typing import Optional -from neural_compressor.experimental.data.dataloaders.onnxrt_dataloader import \ - ONNXRTDataLoader +from neural_compressor.experimental.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.profiling.profiler.onnxrt_profiler.profiler import Profiler @@ -27,11 +26,11 @@ class ProfilerFactory: @staticmethod def get_profiler( - model: ONNXModel, - dataloader: ONNXRTDataLoader, - log_file: Optional[str] = None, - *args, - **kwargs, + model: ONNXModel, + dataloader: ONNXRTDataLoader, + log_file: Optional[str] = None, + *args, + **kwargs, ) -> Profiler: """Get profiling for specified framework. diff --git a/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py b/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py index 34fa5c1d7b8..3ec6c866d95 100644 --- a/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +++ b/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py @@ -18,11 +18,9 @@ from pathlib import Path from typing import Optional -from neural_compressor.experimental.data.dataloaders.onnxrt_dataloader import \ - ONNXRTDataLoader +from neural_compressor.experimental.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader from neural_compressor.model.onnx_model import ONNXModel -from neural_compressor.profiling.profiler.onnxrt_profiler.utils import \ - create_onnx_config +from neural_compressor.profiling.profiler.onnxrt_profiler.utils import create_onnx_config from neural_compressor.profiling.profiler.profiler import Profiler as Parent diff --git a/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py b/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py index 9efb425f83e..888e66eb53c 100644 --- a/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +++ b/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py @@ -16,13 +16,10 @@ from typing import Optional -from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import \ - TensorflowDataLoader +from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader from neural_compressor.model.tensorflow_model import TensorflowBaseModel from neural_compressor.profiling.profiler.profiler import Profiler -from neural_compressor.profiling.profiler.tensorflow_profiler.profiler import ( - Profiler as FrozenPbProfiler -) +from neural_compressor.profiling.profiler.tensorflow_profiler.profiler import Profiler as FrozenPbProfiler class ProfilerFactory: @@ -30,11 +27,11 @@ class ProfilerFactory: @staticmethod def get_profiler( - model: TensorflowBaseModel, - dataloader: TensorflowDataLoader, - log_file: Optional[str] = None, - *args, - **kwargs, + model: TensorflowBaseModel, + dataloader: TensorflowDataLoader, + log_file: Optional[str] = None, + *args, + **kwargs, ) -> Profiler: """Get profiling for specified framework. diff --git a/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py b/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py index 8fe00992372..06802d05b89 100644 --- a/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +++ b/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py @@ -17,9 +17,7 @@ from pathlib import Path from typing import Optional -from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import ( - TensorflowDataLoader, -) +from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader from neural_compressor.model.tensorflow_model import TensorflowBaseModel from neural_compressor.profiling.profiler.profiler import Profiler as Parent @@ -28,10 +26,10 @@ class Profiler(Parent): """Tensorflow profiler class.""" def __init__( - self, - model: TensorflowBaseModel, - dataloader: TensorflowDataLoader, - log_file: Optional[str] = None, + self, + model: TensorflowBaseModel, + dataloader: TensorflowDataLoader, + log_file: Optional[str] = None, ) -> None: """Initialize profiler for specified model. @@ -53,12 +51,12 @@ def __init__( profiling_log_file.parent.mkdir(parents=True, exist_ok=True) def profile_model( - self, - intra_num_of_threads: int = 1, - inter_num_of_threads: int = 1, - num_warmup: int = 10, + self, + intra_num_of_threads: int = 1, + inter_num_of_threads: int = 1, + num_warmup: int = 10, ) -> None: - """"Execute model profiling. + """ "Execute model profiling. Args: intra_num_of_threads: number of threads used within an individual op for parallelism @@ -71,6 +69,7 @@ def profile_model( """ import tensorflow.compat.v1 as tf_v1 from tensorflow.python.profiler import model_analyzer, option_builder + tf_v1.enable_eager_execution() run_options = tf_v1.RunOptions(trace_level=tf_v1.RunOptions.FULL_TRACE) @@ -95,7 +94,7 @@ def profile_model( else: feed_dict = {input_tensor[0]: inputs} # get raw tensor using index [0] else: - assert len(input_tensor) == len(inputs), 'inputs len must equal with input_tensor' + assert len(input_tensor) == len(inputs), "inputs len must equal with input_tensor" feed_dict = {} if isinstance(inputs, dict) or isinstance(inputs, OrderedDict) or isinstance(inputs, UserDict): for name in inputs: diff --git a/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py b/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py index 2c936bac8ad..0e96799cc4f 100644 --- a/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py +++ b/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py @@ -24,7 +24,6 @@ def delete_assign(graph_def: Any) -> Any: graph_def: TensorFlow GraphDef Returns: - """ for node in graph_def.node: if node.op == "RefSwitch": diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 233b555e206..8b758c58909 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -20,25 +20,31 @@ import random import numpy as np + from .config import _Config, options from .data import check_dataloader from .metric import register_customer_metric from .model import Model from .strategy import STRATEGIES from .utils import logger -from .utils.neural_insights_utils import register_neural_insights_workload, \ - update_neural_insights_workload, update_neural_insights_workload_accuracy_data -from .utils.utility import time_limit, dump_class_attrs - - -def fit(model, - conf, - calib_dataloader=None, - calib_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - **kwargs): +from .utils.neural_insights_utils import ( + register_neural_insights_workload, + update_neural_insights_workload, + update_neural_insights_workload_accuracy_data, +) +from .utils.utility import dump_class_attrs, time_limit + + +def fit( + model, + conf, + calib_dataloader=None, + calib_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + **kwargs, +): """Quantize the model with a given configure. Args: @@ -95,7 +101,7 @@ def eval_func(model): eval_metric (dict or obj): Set metric class or a dict of built-in metric configures, and neural_compressor will initialize this class when evaluation. - 1. neural_compressor have many built-in metrics, + 1. neural_compressor have many built-in metrics, user can pass a metric configure dict to tell neural compressor what metric will be use. You also can set multi-metrics to evaluate the performance of a specific model. Single metric: @@ -164,8 +170,9 @@ def eval_func(model): strategy_name = "conservative" if strategy_name == "mse_v2": - if not (conf.framework.startswith("tensorflow")\ - or conf.framework in ['pytorch_fx', 'onnxruntime']): # pragma: no cover + if not ( + conf.framework.startswith("tensorflow") or conf.framework in ["pytorch_fx", "onnxruntime"] + ): # pragma: no cover strategy_name = "basic" logger.warning(f"MSE_v2 does not support {conf.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") @@ -175,15 +182,15 @@ def eval_func(model): _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ - if options.workspace and options.resume_from else None + resume_file = ( + os.path.abspath(os.path.expanduser(options.resume_from)) if options.workspace and options.resume_from else None + ) if resume_file: - assert os.path.exists(resume_file), \ - "The specified resume file {} doesn't exist!".format(resume_file) - with open(resume_file, 'rb') as f: + assert os.path.exists(resume_file), "The specified resume file {} doesn't exist!".format(resume_file) + with open(resume_file, "rb") as f: _resume = pickle.load(f).__dict__ - if eval_func is None and eval_dataloader is None: # pragma: no cover + if eval_func is None and eval_dataloader is None: # pragma: no cover logger.info("Quantize model without tuning!") strategy = STRATEGIES[strategy_name]( @@ -195,7 +202,7 @@ def eval_func(model): eval_dataloader=eval_dataloader, eval_metric=metric, resume=_resume, - q_hooks=None + q_hooks=None, ) try: @@ -227,16 +234,18 @@ def eval_func(model): if ni_workload_id: update_neural_insights_workload(ni_workload_id, "failure") import traceback + traceback.print_exc() finally: if strategy.best_qmodel: logger.info( - "Specified timeout or max trials is reached! " - "Found a quantized model which meet accuracy goal. Exit.") + "Specified timeout or max trials is reached! " "Found a quantized model which meet accuracy goal. Exit." + ) strategy.deploy_config() else: logger.error( "Specified timeout or max trials is reached! " - "Not found any quantized model which meet accuracy goal. Exit.") + "Not found any quantized model which meet accuracy goal. Exit." + ) return strategy.best_qmodel diff --git a/neural_compressor/strategy/__init__.py b/neural_compressor/strategy/__init__.py index a25db4e71b9..e0ea8facd38 100644 --- a/neural_compressor/strategy/__init__.py +++ b/neural_compressor/strategy/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Intel Neural Compressor Strategy.""" from .strategy import STRATEGIES @@ -24,7 +23,7 @@ modules = glob.glob(join(dirname(__file__), "*.py")) for f in modules: - if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + if isfile(f) and not f.startswith("__") and not f.endswith("__init__.py"): __import__(basename(f)[:-3], globals(), locals(), level=1) __all__ = ["STRATEGIES"] diff --git a/neural_compressor/strategy/auto.py b/neural_compressor/strategy/auto.py index 8954c2b73e6..17511030de9 100644 --- a/neural_compressor/strategy/auto.py +++ b/neural_compressor/strategy/auto.py @@ -16,8 +16,10 @@ # limitations under the License. """The auto tuning strategy.""" from copy import deepcopy -from .strategy import strategy_registry, TuneStrategy, STRATEGIES + from ..utils import logger +from .strategy import STRATEGIES, TuneStrategy, strategy_registry + @strategy_registry class AutoTuneStrategy(TuneStrategy): @@ -27,16 +29,18 @@ class AutoTuneStrategy(TuneStrategy): and the tuning process ends once the condition meets the exit policy. """ - def __init__(self, - model, - conf, - q_dataloader=None, - q_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, - q_hooks=None): + def __init__( + self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None, + ): """Init an auto tuning strategy. Args: @@ -53,24 +57,29 @@ def __init__(self, q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. """ - super().__init__(model=model, - conf=conf, - q_dataloader=q_dataloader, - q_func=q_func, - eval_func=eval_func, - eval_dataloader=eval_dataloader, - eval_metric=eval_metric, - resume=resume, - q_hooks=q_hooks) - logger.info(f"*** Initialize auto tuning") - self.strategies_sequence = ['conservative', 'basic'] - + super().__init__( + model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks, + ) + logger.info("*** Initialize auto tuning") + self.strategies_sequence = ["conservative", "basic"] + def _transfer_alpha(self, pre_strategy): - sq_alpha = pre_strategy.cur_best_tuning_cfg.get("recipe_cfgs", {}).get(\ - "smooth_quant_args", {}).get("alpha", None) + sq_alpha = ( + pre_strategy.cur_best_tuning_cfg.get("recipe_cfgs", {}).get("smooth_quant_args", {}).get("alpha", None) + ) if sq_alpha and self.conf.quantization.recipes: - logger.warning(f"[Strategy] Override the user config's smooth quant alpha into best alpha"\ - f"({sq_alpha: .4f}) found in pre-strategy.") + logger.warning( + f"[Strategy] Override the user config's smooth quant alpha into best alpha" + f"({sq_alpha: .4f}) found in pre-strategy." + ) self.conf.quantization.recipes.setdefault("smooth_quant_args", {})["alpha"] = sq_alpha def sequential_traverse(self): @@ -81,8 +90,8 @@ def sequential_traverse(self): # transfer the best alpha of sq to the next strategy self._transfer_alpha(pre_strategy) strategy = STRATEGIES[strategy_name]( - model = self.model, - conf = self.conf, + model=self.model, + conf=self.conf, q_dataloader=self.calib_dataloader, q_func=self.q_func, eval_func=self.eval_func, @@ -90,8 +99,8 @@ def sequential_traverse(self): eval_metric=self.eval_metric, resume=self._resume, q_hooks=self.q_hooks, - pre_strategy = pre_strategy - ) + pre_strategy=pre_strategy, + ) pre_strategy = strategy strategy.traverse() @@ -106,9 +115,9 @@ def next_tune_cfg(self): tune_config (dict): A dict containing the tuning configuration for quantization. """ tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options _, _, op_tuning_cfg = self.initial_tuning_cfg() - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size_lst[0] + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size_lst[0] if not self.cur_best_tuning_cfg: self.cur_best_tuning_cfg = deepcopy(op_tuning_cfg) # try to tune sq alpha @@ -116,7 +125,7 @@ def next_tune_cfg(self): for tune_cfg in self.tuning_sq_alpha(tuning_space, deepcopy(self.cur_best_tuning_cfg), self.config.recipes): yield tune_cfg - logger.info(f"Quantize the model with default config.") + logger.info("Quantize the model with default config.") yield op_tuning_cfg def traverse(self): @@ -127,8 +136,10 @@ def traverse(self): logger.info("[Strategy] Found the model meets accuracy requirements, ending the tuning process.") return elif self.config.tuning_criterion.max_trials == 1: - logger.info("[Strategy] Not found the model meets accuracy requirements,\ - but the max trial is 1, ending the tuning process.") + logger.info( + "[Strategy] Not found the model meets accuracy requirements,\ + but the max trial is 1, ending the tuning process." + ) else: # Start to try different strategies sequentially - self.sequential_traverse() \ No newline at end of file + self.sequential_traverse() diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 108127632de..52044526305 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -14,17 +14,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The auto-mixed precision strategy.""" import copy from collections import OrderedDict, defaultdict from itertools import groupby -from .strategy import strategy_registry, TuneStrategy + +from neural_compressor.adaptor.torch_utils.mixed_precision import ipex_mixed_precision + from ..utils import logger +from .strategy import TuneStrategy, strategy_registry from .utils.tuning_sampler import FallbackTuningSampler from .utils.tuning_structs import OpTuningConfig -from neural_compressor.adaptor.torch_utils.mixed_precision import ipex_mixed_precision @strategy_registry @@ -41,15 +42,15 @@ def _initialize_config(self, conf): Tuning config """ config = conf.mixed_precision - config.approach = getattr(config, 'approach', None) - config.recipes = getattr(config, 'recipes', {}) - config.calibration_sampling_size = getattr(config, 'calibration_sampling_size', [0]) - config.op_type_dict = getattr(config, 'op_type_dict', None) - config.op_name_dict = getattr(config, 'op_name_dict', None) - config.quant_format = getattr(config, 'quant_format', "") - config.domain = getattr(config, 'domain', None) - config.reduce_range = getattr(config, 'reduce_range', None) - config.example_inputs = getattr(config, 'example_inputs', None) + config.approach = getattr(config, "approach", None) + config.recipes = getattr(config, "recipes", {}) + config.calibration_sampling_size = getattr(config, "calibration_sampling_size", [0]) + config.op_type_dict = getattr(config, "op_type_dict", None) + config.op_name_dict = getattr(config, "op_name_dict", None) + config.quant_format = getattr(config, "quant_format", "") + config.domain = getattr(config, "domain", None) + config.reduce_range = getattr(config, "reduce_range", None) + config.example_inputs = getattr(config, "example_inputs", None) config.quant_level = getattr(config, "quant_level", "auto") return config @@ -69,31 +70,33 @@ def next_tune_cfg(self): # filter quantization dtype # TODO align with the old mixed-precison target_dtypes = self.config.precisions - target_dtypes = list(set(target_dtypes) - set(['fp32'])) + target_dtypes = list(set(target_dtypes) - set(["fp32"])) tuning_space = self.tuning_space initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) if not target_dtypes: - target_dtypes = ['bf16'] + target_dtypes = ["bf16"] # step1. target_dtype AMAP, collect the ops that support target_dtype lower_precision_items_name = [] op_tuning_cfg = {} for idx, target_dtype in enumerate(target_dtypes): lower_precision_items = tuning_space.query_items_by_quant_mode(target_dtype) - if len(lower_precision_items) == 0 and \ - not (idx == len(target_dtypes) - 1 and len(lower_precision_items_name) == 0): + if len(lower_precision_items) == 0 and not ( + idx == len(target_dtypes) - 1 and len(lower_precision_items_name) == 0 + ): continue lower_precision_items_name = [item.name for item in lower_precision_items] op_tuning_cfg = deepcopy(initial_op_tuning_cfg) for op_name_type in lower_precision_items_name: - op_tuning_cfg[op_name_type] = \ - OpTuningConfig(op_name_type[0], op_name_type[1], target_dtype, tuning_space) + op_tuning_cfg[op_name_type] = OpTuningConfig( + op_name_type[0], op_name_type[1], target_dtype, tuning_space + ) calib_sampling_size = 1 - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg # step 2, fallback op into fp32 @@ -109,18 +112,24 @@ def next_tune_cfg(self): logger.info("[Strategy] start fallback op into fp32.") initial_op_tuning_cfg = deepcopy(op_tuning_cfg) if self.config.quant_level in ["auto", 0]: - logger.info(f"[Strategy] fallback op into fp32 in op type wise, \ - as quant level is {self.config.quant_level}") - for op_tuning_cfg in self.fallback_in_op_type_wise(tuning_space, fallback_items_name_lst,\ - deepcopy(initial_op_tuning_cfg), target_dtype): + logger.info( + f"[Strategy] fallback op into fp32 in op type wise, \ + as quant level is {self.config.quant_level}" + ) + for op_tuning_cfg in self.fallback_in_op_type_wise( + tuning_space, fallback_items_name_lst, deepcopy(initial_op_tuning_cfg), target_dtype + ): yield op_tuning_cfg # if quant level is auto or 1, do op instance fallback if self.config.quant_level in ["auto", 1]: - logger.info(f"[Strategy] fallback op into fp32 in op wise, \ - as quant level is {self.config.quant_level}") - for op_tuning_cfg in self.fallback_in_op_wise(tuning_space, fallback_items_name_lst,\ - deepcopy(initial_op_tuning_cfg), target_dtype): + logger.info( + f"[Strategy] fallback op into fp32 in op wise, \ + as quant level is {self.config.quant_level}" + ) + for op_tuning_cfg in self.fallback_in_op_wise( + tuning_space, fallback_items_name_lst, deepcopy(initial_op_tuning_cfg), target_dtype + ): yield op_tuning_cfg def fallback_in_op_type_wise(self, tuning_space, fallback_items_name_lst, initial_op_tuning_cfg, target_dtype): @@ -142,12 +151,16 @@ def fallback_in_op_type_wise(self, tuning_space, fallback_items_name_lst, initia ops_dtypes = OrderedDict() for op_type, op_lst in op_type_groups: ops_dtypes[tuple(op_lst)] = target_dtype - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=ops_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=ops_dtypes, + accumulate=False, + ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = -1 + op_tuning_cfg["calib_sampling_size"] = -1 yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc @@ -165,36 +178,45 @@ def fallback_in_op_wise(self, tuning_space, fallback_items_name_lst, initial_op_ tuning config """ op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = -1 + op_tuning_cfg["calib_sampling_size"] = -1 yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc # do accumulated fallback according to the order in the previous stage if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), key=lambda key: op_fallback_acc_impact[key], \ - reverse=self.higher_is_better) + ordered_ops = sorted( + op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better, + ) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) logger.info("Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = copy.deepcopy(op_tuning_cfg) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = -1 + op_tuning_cfg["calib_sampling_size"] = -1 yield op_tuning_cfg - def traverse(self): """Traverse the tuning space according to auto-mixed precision strategy.""" if self.config.backend == "ipex": - self.best_qmodel = ipex_mixed_precision( - self.model, self.config.example_inputs) + self.best_qmodel = ipex_mixed_precision(self.model, self.config.example_inputs) if self.eval_dataloader or self.eval_func: self._evaluate(self.best_qmodel) return @@ -206,15 +228,14 @@ def traverse(self): self.trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: - self.last_tune_result = tuning_history['last_tune_result'] - self.best_tune_result = tuning_history['best_tune_result'] + self.last_tune_result = tuning_history["last_tune_result"] + self.best_tune_result = tuning_history["best_tune_result"] logger.warn("Find evaluated tuning config, skip.") continue logger.debug("Dump current mixed precision configuration:") logger.debug(tune_cfg) - self.last_qmodel = self.adaptor.quantize( - tune_cfg, self.model, self.calib_dataloader, self.q_func) + self.last_qmodel = self.adaptor.quantize(tune_cfg, self.model, self.calib_dataloader, self.q_func) assert self.last_qmodel # Return the last quantized model as a result. if performance only. if self._not_tuning: @@ -239,5 +260,3 @@ def traverse(self): if need_stop: break - - diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index f4522603325..78f023b0139 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -16,20 +16,20 @@ # limitations under the License. """The basic tuning strategy.""" -from copy import deepcopy from collections import OrderedDict -from .strategy import strategy_registry, TuneStrategy -from ..utils import logger +from copy import deepcopy +from ..utils import logger +from .strategy import TuneStrategy, strategy_registry +from .utils.constant import LOWER_BIT_LIST, PRECISION_LIST, TUNING_ITEMS_LST from .utils.tuning_sampler import ( - OpTypeWiseTuningSampler, - FallbackTuningSampler, BlockFallbackTuningSampler, + FallbackTuningSampler, LowerBitsSampler, - ) - + OpTypeWiseTuningSampler, +) from .utils.tuning_structs import OpTuningConfig -from .utils.constant import TUNING_ITEMS_LST, PRECISION_LIST, LOWER_BIT_LIST + @strategy_registry class BasicTuneStrategy(TuneStrategy): @@ -51,8 +51,9 @@ def distributed_next_tune_cfg_lst(self, comm): tuning_config_list (list): A list containing dicts of the tuning configuration for quantization. """ from copy import deepcopy + tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options rank = comm.Get_rank() for calib_sampling_size in calib_sampling_size_lst: # Initialize the tuning config for each op according to the quantization approach @@ -60,8 +61,8 @@ def distributed_next_tune_cfg_lst(self, comm): # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) early_stop_tuning = False stage1_cnt = 0 - quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + quant_ops = quant_mode_wise_items["static"] if "static" in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items["dynamic"] if "dynamic" in quant_mode_wise_items else [] stage1_max = 1e9 # TODO set a more appropriate value if not self.cur_best_tuning_cfg: self.cur_best_tuning_cfg = deepcopy(initial_op_tuning_cfg) @@ -69,14 +70,16 @@ def distributed_next_tune_cfg_lst(self, comm): # try to tune sq alpha op_tuning_cfg_lst_stage_sq = [] if self._should_tuning_sq_alpha(self.config.recipes): - for tune_cfg in self.tuning_sq_alpha(tuning_space, \ - deepcopy(self.cur_best_tuning_cfg), self.config.recipes): + for tune_cfg in self.tuning_sq_alpha( + tuning_space, deepcopy(self.cur_best_tuning_cfg), self.config.recipes + ): op_tuning_cfg_lst_stage_sq.append(tune_cfg) yield op_tuning_cfg_lst_stage_sq # op type-wise tuning - op_type_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_type_wise_tuning_sampler = OpTypeWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) # stage 1: yield op_tune_cfg_lst op_tuning_cfg_lst_stage_1 = [] for op_tuning_cfg in op_type_wise_tuning_sampler: @@ -84,7 +87,7 @@ def distributed_next_tune_cfg_lst(self, comm): if early_stop_tuning and stage1_cnt > stage1_max: logger.info("Early stopping the stage 1.") break - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size op_tuning_cfg_lst_stage_1.append(deepcopy(op_tuning_cfg)) logger.info("yield op_tuning_cfg_lst_stage_1 with length {}".format(len(op_tuning_cfg_lst_stage_1))) yield op_tuning_cfg_lst_stage_1 @@ -96,13 +99,15 @@ def distributed_next_tune_cfg_lst(self, comm): else: self.cur_best_tuning_cfg = comm.bcast(cur_best_tuning_cfg, root=0) - # stage 2: yield new_op_tuning_cfg_lst (length of stage 1) # Fallback the ops supported both static and dynamic from static to dynamic # Tuning items: None - if self.config.approach == 'post_training_auto_quant': - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] + if self.config.approach == "post_training_auto_quant": + static_dynamic_items = [ + item + for item in tuning_space.query_items_by_quant_mode("static") + if item in tuning_space.query_items_by_quant_mode("dynamic") + ] if static_dynamic_items: logger.info("Fallback all ops that support both dynamic and static to dynamic.") else: @@ -111,8 +116,9 @@ def distributed_next_tune_cfg_lst(self, comm): new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) for item in static_dynamic_items: new_op_tuning_cfg[item.name] = self._initial_dynamic_cfg_based_on_static_cfg( - new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + new_op_tuning_cfg[item.name] + ) + new_op_tuning_cfg["calib_sampling_size"] = calib_sampling_size op_tuning_cfg_lst_stage_2 = [deepcopy(new_op_tuning_cfg)] logger.info("yield op_tuning_cfg_lst_stage_2 with length {}".format(len(op_tuning_cfg_lst_stage_2))) yield op_tuning_cfg_lst_stage_2 @@ -135,29 +141,33 @@ def distributed_next_tune_cfg_lst(self, comm): for target_dtype in PRECISION_LIST: target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) fallback_items_lst = [item for item in quant_ops if item in target_type_lst] - + # Fallback block by block - for op_tuning_cfg in self.fallback_by_block(fallback_items_lst, best_op_tuning_cfg_stage1, - target_dtype, - tuning_space, - calib_sampling_size): + for op_tuning_cfg in self.fallback_by_block( + fallback_items_lst, best_op_tuning_cfg_stage1, target_dtype, tuning_space, calib_sampling_size + ): op_tuning_cfg_lst_stage_block.append(deepcopy(op_tuning_cfg)) - logger.info("yield op_tuning_cfg_lst_stage_block with length {}"\ - .format(len(op_tuning_cfg_lst_stage_block))) + logger.info( + "yield op_tuning_cfg_lst_stage_block with length {}".format(len(op_tuning_cfg_lst_stage_block)) + ) yield op_tuning_cfg_lst_stage_block if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() - fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size # yield op_tuning_cfg op_tuning_cfg_lst_stage_3.append(deepcopy(op_tuning_cfg)) logger.info("yield op_tuning_cfg_lst_stage_3 with length {}".format(len(op_tuning_cfg_lst_stage_3))) @@ -178,24 +188,31 @@ def distributed_next_tune_cfg_lst(self, comm): # Fallback OPs accumulated according to the order in the previous stage if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) + ordered_ops = sorted( + op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better, + ) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size # yield op_tuning_cfg op_tuning_cfg_lst_stage_4.append(deepcopy(op_tuning_cfg)) logger.info("yield op_tuning_cfg_lst_stage_4 with length {}".format(len(op_tuning_cfg_lst_stage_4))) yield op_tuning_cfg_lst_stage_4 - def fallback_by_block(self, fallback_items_lst, best_op_tuning_cfg_stage1, target_dtype, tuning_space,\ - calib_sampling_size): + def fallback_by_block( + self, fallback_items_lst, best_op_tuning_cfg_stage1, target_dtype, tuning_space, calib_sampling_size + ): """Fallback ops by block. Step 1. block by block @@ -211,7 +228,8 @@ def fallback_by_block(self, fallback_items_lst, best_op_tuning_cfg_stage1, targe dict: op_tuning_cfg fall-backed by block """ from copy import deepcopy - op_block_lst = self.capability.get('block_wise', []) + + op_block_lst = self.capability.get("block_wise", []) if op_block_lst: # Fallback block by block fallback_items_name_lst = [item.name for item in fallback_items_lst] @@ -227,14 +245,16 @@ def fallback_by_block(self, fallback_items_lst, best_op_tuning_cfg_stage1, targe # Fallback by accumulating blocks if op_block_fallback_lst: logger.info(f"Start to fallback op to {target_dtype} by blocks") - block_fallback_sampler = BlockFallbackTuningSampler(tuning_space=tuning_space, - tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_block_lst=op_block_fallback_lst, - accumulate=True, - target_dtype=target_dtype) + block_fallback_sampler = BlockFallbackTuningSampler( + tuning_space=tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_block_lst=op_block_fallback_lst, + accumulate=True, + target_dtype=target_dtype, + ) for op_block_index, op_tuning_cfg in enumerate(block_fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg def quant_to_lower_bits(self, initial_op_tuning_cfg, calib_sampling_size): @@ -251,14 +271,18 @@ def quant_to_lower_bits(self, initial_op_tuning_cfg, calib_sampling_size): logger.info(f"Start to quantize ops into {quant_bit}") ops = self.tuning_space.collect_op_by_quant_bits(quant_bit) op_item_dtype_dict = {op.name: quant_bit for op in ops} - lower_bits_sampler = LowerBitsSampler(deepcopy(self.tuning_space), [], - initial_op_tuning_cfg, op_item_dtype_dict, - accumulate=False, skip_first=True) + lower_bits_sampler = LowerBitsSampler( + deepcopy(self.tuning_space), + [], + initial_op_tuning_cfg, + op_item_dtype_dict, + accumulate=False, + skip_first=True, + ) for tune_cfg in lower_bits_sampler: - tune_cfg['calib_sampling_size'] = calib_sampling_size + tune_cfg["calib_sampling_size"] = calib_sampling_size yield tune_cfg - def next_tune_cfg(self): """Generate and yield the next tuning config with below order. @@ -275,32 +299,35 @@ def next_tune_cfg(self): tune_config (dict): A dict containing the tuning configuration for quantization. """ from copy import deepcopy + tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options for calib_sampling_size in calib_sampling_size_lst: # Initialize the tuning config for each op according to the quantization approach. op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - initial_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + initial_op_tuning_cfg["calib_sampling_size"] = calib_sampling_size # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) early_stop_tuning = False stage1_cnt = 0 - quant_ops = quant_mode_wise_items.get('static', []) - quant_ops += quant_mode_wise_items.get('dynamic', []) + quant_ops = quant_mode_wise_items.get("static", []) + quant_ops += quant_mode_wise_items.get("dynamic", []) stage1_max = 1e9 # TODO set a more appropriate value if not self.cur_best_tuning_cfg: self.cur_best_tuning_cfg = deepcopy(initial_op_tuning_cfg) # try to tune sq alpha if self._should_tuning_sq_alpha(self.config.recipes): - for tune_cfg in self.tuning_sq_alpha(tuning_space, \ - deepcopy(self.cur_best_tuning_cfg), self.config.recipes): + for tune_cfg in self.tuning_sq_alpha( + tuning_space, deepcopy(self.cur_best_tuning_cfg), self.config.recipes + ): yield tune_cfg # op type-wise tuning - op_type_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [],\ - op_item_dtype_dict, initial_op_tuning_cfg) - + op_type_wise_tuning_sampler = OpTypeWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) + for index, op_tuning_cfg in enumerate(op_type_wise_tuning_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size # try to quantizing ops into lower bits, such as int4, # if accuracy meets the requirements after first trial and max_trials > 1 if index == 1 and self.objectives.accuracy_meet_req(deepcopy(self.last_tune_result)): @@ -326,9 +353,12 @@ def next_tune_cfg(self): # Fallback the ops supported both static and dynamic from static to dynamic # Tuning items: None - if self.config.approach == 'post_training_auto_quant': - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] + if self.config.approach == "post_training_auto_quant": + static_dynamic_items = [ + item + for item in tuning_space.query_items_by_quant_mode("static") + if item in tuning_space.query_items_by_quant_mode("dynamic") + ] if static_dynamic_items: logger.info("Fallback all ops that support both dynamic and static to dynamic.") else: @@ -337,8 +367,9 @@ def next_tune_cfg(self): new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) for item in static_dynamic_items: new_op_tuning_cfg[item.name] = self._initial_dynamic_cfg_based_on_static_cfg( - new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + new_op_tuning_cfg[item.name] + ) + new_op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield new_op_tuning_cfg logger.info("Apply recipe one by one.") @@ -352,62 +383,74 @@ def next_tune_cfg(self): fallback_items_lst = [item for item in quant_ops if item in target_type_lst] # Fallback block by block - for op_tuning_cfg in self.fallback_by_block(fallback_items_lst, best_op_tuning_cfg_stage1, - target_dtype, - tuning_space, - calib_sampling_size): + for op_tuning_cfg in self.fallback_by_block( + fallback_items_lst, best_op_tuning_cfg_stage1, target_dtype, tuning_space, calib_sampling_size + ): yield op_tuning_cfg if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() - fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc - # Fallback OPs accumulated according to the order in the previous stage if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) + ordered_ops = sorted( + op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better, + ) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg - logger.warning(f"[Strategy] All tuning options for the current strategy have been tried.\ - If the quantized model does not seem to work well, it might be worth considering other strategies.") + logger.warning( + "[Strategy] All tuning options for the current strategy have been tried.\ + If the quantized model does not seem to work well, it might be worth considering other strategies." + ) - def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): + def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig): op_state = op_static_cfg.get_state() op_name = op_static_cfg.op_name op_type = op_static_cfg.op_type op_name_type = (op_name, op_type) - op_quant_mode = 'dynamic' + op_quant_mode = "dynamic" tuning_space = self.tuning_space dynamic_state = {} - for att in ['weight', 'activation']: - if att not in op_state: continue + for att in ["weight", "activation"]: + if att not in op_state: + continue # Add dtype full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, op_quant_mode) - dynamic_state[att + '_dtype'] = self.tuning_space.ops_data_type[op_name_type][full_path[att]] + dynamic_state[att + "_dtype"] = self.tuning_space.ops_data_type[op_name_type][full_path[att]] for method_name, method_val in op_state[att].items(): att_and_method_name = (att, method_name) - if att_and_method_name not in TUNING_ITEMS_LST: continue + if att_and_method_name not in TUNING_ITEMS_LST: + continue if tuning_space.query_item_option(op_name_type, full_path[att], att_and_method_name, method_val): dynamic_state[att_and_method_name] = method_val else: @@ -416,4 +459,3 @@ def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig) tuning_item = quant_mode_item.get_option_by_name(att_and_method_name) dynamic_state[att_and_method_name] = tuning_item.options[0] if tuning_item else None return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) - diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index 5749ecf0c15..5dad3e67eb7 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -14,19 +14,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The Bayesian tuning strategy.""" import warnings +from copy import deepcopy + import numpy as np from scipy.optimize import minimize -from sklearn.gaussian_process.kernels import Matern from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import Matern -from copy import deepcopy from ..config import options from ..utils import logger -from .strategy import strategy_registry, TuneStrategy +from .strategy import TuneStrategy, strategy_registry from .utils.tuning_sampler import OpWiseTuningSampler @@ -34,16 +34,18 @@ class BayesianTuneStrategy(TuneStrategy): """The Bayesian tuning strategy.""" - def __init__(self, - model, - conf, - q_dataloader=None, - q_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, - q_hooks=None): + def __init__( + self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None, + ): """Init the BaySian tuning strategy. Args: @@ -60,15 +62,17 @@ def __init__(self, q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. """ - super().__init__(model=model, - conf=conf, - q_dataloader=q_dataloader, - q_func=q_func, - eval_func=eval_func, - eval_dataloader=eval_dataloader, - eval_metric=eval_metric, - resume=resume, - q_hooks=q_hooks) + super().__init__( + model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks, + ) self.bayes_opt = None def __getstate__(self): @@ -78,24 +82,24 @@ def __getstate__(self): dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_conf(history['cfg'], self.conf): - history['bayes_opt'] = self.bayes_opt + if self._same_conf(history["cfg"], self.conf): + history["bayes_opt"] = self.bayes_opt save_dict = super().__getstate__() return save_dict def _params_to_tune_configs(self, params): op_tuning_cfg = {} - calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name("calib_sampling_size").options for op_name_type, configs in self.op_configs.items(): if len(configs) == 1: op_tuning_cfg[op_name_type] = configs[0] else: op_tuning_cfg[op_name_type] = configs[min(len(configs) - 1, int(params[op_name_type[0]]))] if len(calib_sampling_size_lst) > 1: - calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params['calib_sampling_size']))] + calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params["calib_sampling_size"]))] else: calib_sampling_size = calib_sampling_size_lst[0] - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size return op_tuning_cfg def next_tune_cfg(self): @@ -112,23 +116,21 @@ def next_tune_cfg(self): params = None pbounds = {} tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() for op_name_type, configs in self.op_configs.items(): if len(configs) > 1: pbounds[op_name_type[0]] = (0, len(configs)) if len(calib_sampling_size_lst) > 1: - pbounds['calib_sampling_size'] = (0, len(calib_sampling_size_lst)) + pbounds["calib_sampling_size"] = (0, len(calib_sampling_size_lst)) if len(pbounds) == 0: yield self._params_to_tune_configs(params) return if self.bayes_opt is None: - self.bayes_opt = BayesianOptimization( - pbounds=pbounds, random_seed=options.random_seed) + self.bayes_opt = BayesianOptimization(pbounds=pbounds, random_seed=options.random_seed) while True: params = self.bayes_opt.gen_next_params() logger.debug("Dump current bayesian params:") @@ -140,6 +142,7 @@ def next_tune_cfg(self): logger.debug("Find registered params, skip it.") pass + # Util part # Bayesian opt acq function @@ -160,21 +163,18 @@ def acq_max(ac, gp, y_max, bounds, random_seed, n_warmup=10000, n_iter=10): x_max: The arg max of the acquisition function. """ # Warm up with random points - x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], - size=(n_warmup, bounds.shape[0])) + x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(n_warmup, bounds.shape[0])) ys = ac(x_tries, gp=gp, y_max=y_max) x_max = x_tries[ys.argmax()] max_acq = ys.max() # Explore the parameter space more thoroughly - x_seeds = np.random.uniform(bounds[:, 0], bounds[:, 1], - size=(n_iter, bounds.shape[0])) + x_seeds = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(n_iter, bounds.shape[0])) for x_try in x_seeds: # Find the minimum of minus the acquisition function - res = minimize(lambda x: -ac(x.reshape(1, -1), gp=gp, y_max=y_max), - x_try.flatten(), - bounds=bounds, - method="L-BFGS-B") + res = minimize( + lambda x: -ac(x.reshape(1, -1), gp=gp, y_max=y_max), x_try.flatten(), bounds=bounds, method="L-BFGS-B" + ) # See if success if not res.success: @@ -196,6 +196,7 @@ def _hashable(x): """Ensure that an point is hashable by a python dict.""" return tuple(map(float, x)) + # Target space part class TargetSpace(object): """Holds the param-space coordinates (X) and target values (Y). @@ -216,10 +217,7 @@ def __init__(self, pbounds, random_seed=9527): names = list(pbounds.keys()) self._keys = deepcopy(names) # Create an array with parameters bounds - self._bounds = np.array( - [pbounds[name] for name in names], - dtype=np.float32 - ) + self._bounds = np.array([pbounds[name] for name in names], dtype=np.float32) # preallocated memory for X and Y points self._params = np.empty(shape=(0, self.dim)) @@ -281,8 +279,8 @@ def params_to_array(self, params): assert set(params) == set(self.keys) except AssertionError: raise ValueError( - "Parameters' keys ({}) do ".format(list(params.keys())) + - "not match the expected set of keys ({}).".format(self.keys) + "Parameters' keys ({}) do ".format(list(params.keys())) + + "not match the expected set of keys ({}).".format(self.keys) ) return np.asarray([params[key] for key in self.keys]) @@ -299,8 +297,8 @@ def array_to_params(self, x): assert len(x) == len(self.keys) except AssertionError: raise ValueError( - "Size of array ({}) is different than the ".format(len(x)) + - "expected number of parameters ({}).".format(len(self.keys)) + "Size of array ({}) is different than the ".format(len(x)) + + "expected number of parameters ({}).".format(len(self.keys)) ) return dict(zip(self.keys, x)) @@ -315,8 +313,8 @@ def _as_array(self, x): assert x.size == self.dim except AssertionError: raise ValueError( - "Size of array ({}) is different than the ".format(len(x)) + - "expected number of parameters ({}).".format(len(self.keys)) + "Size of array ({}) is different than the ".format(len(x)) + + "expected number of parameters ({}).".format(len(self.keys)) ) return x @@ -334,7 +332,7 @@ def register(self, params, target): """ x = self._as_array(params) if x in self: - raise KeyError('Params point {} is not unique'.format(x)) + raise KeyError("Params point {} is not unique".format(x)) # Insert data into unique dictionary self._cache[_hashable(x.ravel())] = target @@ -364,19 +362,13 @@ def random_sample(self): # TODO: support integer, category, and basic scipy.optimize constraints data = np.empty((1, self.dim)) for col, (lower, upper) in enumerate(self._bounds): - data.T[col] = np.random.uniform( # pylint: disable=unsupported-assignment-operation - lower, upper, size=1) + data.T[col] = np.random.uniform(lower, upper, size=1) # pylint: disable=unsupported-assignment-operation return data.ravel() def max(self): """Get maximum target value found and corresponding parametes.""" try: - res = { - 'target': self.target.max(), - 'params': dict( - zip(self.keys, self.params[self.target.argmax()]) - ) - } + res = {"target": self.target.max(), "params": dict(zip(self.keys, self.params[self.target.argmax()]))} except ValueError: res = {} return res @@ -385,13 +377,11 @@ def res(self): """Get all target values found and corresponding parametes.""" params = [dict(zip(self.keys, p)) for p in self.params] - return [ - {"target": target, "params": param} - for target, param in zip(self.target, params) - ] + return [{"target": target, "params": param} for target, param in zip(self.target, params)] + # Tuning part -class BayesianOptimization(): +class BayesianOptimization: """The class for bayesian optimization. This class takes the parameters bounds in order to find which values for @@ -461,7 +451,7 @@ def suggest(self): gp=self._gp, y_max=self._space.target.max(), bounds=self._space.bounds, - random_seed=self._random_seed + random_seed=self._random_seed, ) return self._space.array_to_params(suggestion) diff --git a/neural_compressor/strategy/conservative.py b/neural_compressor/strategy/conservative.py index 84218bf5c17..9a58b8874b7 100644 --- a/neural_compressor/strategy/conservative.py +++ b/neural_compressor/strategy/conservative.py @@ -17,18 +17,19 @@ """The conservative tuning strategy for quantization level 0.""" import copy import os -import numpy as np - -from collections import deque from collections import OrderedDict as COrderedDict +from collections import deque from copy import deepcopy -from typing import Dict, List, Tuple, OrderedDict +from typing import Dict, List, OrderedDict, Tuple -from .strategy import strategy_registry, TuneStrategy -from .utils.tuning_space import TuningItem +import numpy as np + +from ..algorithm import AlgorithmScheduler from ..utils import logger from ..utils.utility import Statistics -from ..algorithm import AlgorithmScheduler +from .strategy import TuneStrategy, strategy_registry +from .utils.tuning_space import TuningItem + @strategy_registry class ConservativeTuneStrategy(TuneStrategy): @@ -39,16 +40,18 @@ class ConservativeTuneStrategy(TuneStrategy): and then quantize the OPs to lower precision OP type wisely and OP wisely. """ - def __init__(self, - model, - conf, - q_dataloader=None, - q_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, - q_hooks=None): + def __init__( + self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None, + ): """Init conservative tuning strategy. Args: @@ -65,28 +68,30 @@ def __init__(self, q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. """ - super().__init__(model=model, - conf=conf, - q_dataloader=q_dataloader, - q_func=q_func, - eval_func=eval_func, - eval_dataloader=eval_dataloader, - eval_metric=eval_metric, - resume=resume, - q_hooks=q_hooks) - logger.info(f"*** Initialize conservative tuning") + super().__init__( + model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks, + ) + logger.info("*** Initialize conservative tuning") self.acc_meet_flag = False - self.quant_op_type_lst = ['conv', 'matmul', 'bmm', 'linear'] + self.quant_op_type_lst = ["conv", "matmul", "bmm", "linear"] extend_op_type_lst = self._get_extend_op_type_lst() self.quant_op_type_lst += extend_op_type_lst res_lst = [None] * len(self.quant_op_type_lst) - self.quant_status = {k : v for k, v in zip(self.quant_op_type_lst, res_lst)} - + self.quant_status = {k: v for k, v in zip(self.quant_op_type_lst, res_lst)} + def _get_extend_op_type_lst(self): extend_lst = [] # add 'add' to op type list when sq is on - if self.config.recipes.get('smooth_quant', False): - extend_lst.append('add') + if self.config.recipes.get("smooth_quant", False): + extend_lst.append("add") return extend_lst def next_tune_cfg(self): @@ -103,14 +108,14 @@ def next_tune_cfg(self): tune_config (dict): It's a dict containing the tuning configuration to run. """ tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options calib_sampling_size = calib_sampling_size_lst[0] op_item_dtype_dict, quant_mode_wise_items, tune_cfg = self.initialize_tune_cfg() - tune_cfg['calib_sampling_size'] = calib_sampling_size + tune_cfg["calib_sampling_size"] = calib_sampling_size op_type_priority = self._get_op_type_priority() quant_items_pool = self._quant_items_pool(op_type_priority) self.re_quant = True - logger.info(f"*** Try to convert op into lower precision to improve performance.") + logger.info("*** Try to convert op into lower precision to improve performance.") for dtype, op_items in quant_items_pool.items(): logger.info(f"*** Start to convert op into {dtype}.") for op_type, items_lst in op_items.items(): @@ -127,20 +132,18 @@ def next_tune_cfg(self): tune_cfg = deepcopy(tmp_tune_cfg) else: # tmp_tune_cfg = deepcopy(tune_cfg) - self.quant_status[op_type] = 'fp32' + self.quant_status[op_type] = "fp32" logger.info(f"*** Convert all {op_type} ops to {dtype} but accuracy not meet the requirements") logger.info(f"***Current result {self.quant_status.items()}") - logger.info(f"*** Ending tuning process due to no quantifiable op left.") + logger.info("*** Ending tuning process due to no quantifiable op left.") self.re_quant = False def _get_op_type_priority(self): - optypewise_cap = self.capability['optypewise'] + optypewise_cap = self.capability["optypewise"] op_type_priority = list(optypewise_cap.keys()) return op_type_priority - def _sorted_item_by_op_type(self, - items_lst, - op_type_priority: List[str]) -> OrderedDict[str, List]: + def _sorted_item_by_op_type(self, items_lst, op_type_priority: List[str]) -> OrderedDict[str, List]: """Scoring the tuning items according to its op type. Args: @@ -163,8 +166,9 @@ def _sorted_item_by_op_type(self, if target_op_type not in sorted_items: sorted_items[target_op_type] = [] sorted_items[target_op_type].append((op_item, quant_mode)) - new_sorted_items = COrderedDict((op_type, sorted_items[op_type]) for op_type \ - in self.quant_op_type_lst if op_type in sorted_items) + new_sorted_items = COrderedDict( + (op_type, sorted_items[op_type]) for op_type in self.quant_op_type_lst if op_type in sorted_items + ) return new_sorted_items def initialize_tune_cfg(self): @@ -176,12 +180,11 @@ def initialize_tune_cfg(self): op_item_dtype_dict (OrderedDict): key is (op_name, op_type); value is quantization mode. quant_mode_wise_items (OrderedDict): key is quant_mode/precision; value is item list. initial_op_tuning_cfg (OrderedDict): key is (op_name, op_type); value is the initialized tuning config. - """ from .utils.constant import auto_query_order_o0 as query_order from .utils.tuning_space import initial_tuning_cfg_with_quant_mode - quant_mode_wise_items = OrderedDict() # mode, op_item_lst + quant_mode_wise_items = OrderedDict() # mode, op_item_lst pre_items = set() # Collect op items supported the specified mode. for quant_mode in query_order: @@ -200,13 +203,14 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): initial_op_tuning_cfg = {} for op_name_type, quant_mode in op_item_dtype_dict.items(): - initial_op_tuning_cfg[op_name_type] = initial_tuning_cfg_with_quant_mode(op_name_type, - quant_mode, - self.tuning_space) + initial_op_tuning_cfg[op_name_type] = initial_tuning_cfg_with_quant_mode( + op_name_type, quant_mode, self.tuning_space + ) return op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg - def _quant_items_pool(self, op_type_priority: List[str]) -> OrderedDict[ - str, OrderedDict[str, List[Tuple[TuningItem, str]]]]: + def _quant_items_pool( + self, op_type_priority: List[str] + ) -> OrderedDict[str, OrderedDict[str, List[Tuple[TuningItem, str]]]]: """Create the op queue to be quantized. Args: @@ -228,10 +232,10 @@ def _quant_items_pool(self, op_type_priority: List[str]) -> OrderedDict[ quant_ops_name_set = set() # collect and sorted all ops that support int8 for quant_mode, items_lst in quant_mode_wise_items.items(): - if "static" in quant_mode or 'dynamic' in quant_mode: + if "static" in quant_mode or "dynamic" in quant_mode: _quant_mode = "static" if "static" in quant_mode else "dynamic" op_item_pairs += [(item, _quant_mode) for item in items_lst if item.name not in quant_ops_name_set] quant_ops_name_set = quant_ops_name_set.union([item.name for item in items_lst]) op_item_pairs = self._sorted_item_by_op_type(op_item_pairs, op_type_priority) - quant_items_pool['int8'] = op_item_pairs + quant_items_pool["int8"] = op_item_pairs return quant_items_pool diff --git a/neural_compressor/strategy/exhaustive.py b/neural_compressor/strategy/exhaustive.py index b9fad5d963e..a94b2a4b8fe 100644 --- a/neural_compressor/strategy/exhaustive.py +++ b/neural_compressor/strategy/exhaustive.py @@ -15,10 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """The exhaustive tuning strategy.""" -from .strategy import strategy_registry, TuneStrategy - +from .strategy import TuneStrategy, strategy_registry from .utils.tuning_sampler import OpWiseTuningSampler + @strategy_registry class ExhaustiveTuneStrategy(TuneStrategy): """The exhaustive tuning strategy.""" @@ -35,11 +35,12 @@ def next_tune_cfg(self): tune_config (dict): A dict containing the tuning configuration for quantization. """ tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options for calib_sampling_size in calib_sampling_size_lst: op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) for op_tuning_cfg in op_wise_tuning_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 5e6cb1e939c..0958bd8f05a 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -18,12 +18,12 @@ from collections import OrderedDict from copy import deepcopy -from .strategy import strategy_registry, TuneStrategy - -from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .utils.tuning_structs import OpTuningConfig -from .utils.constant import TUNING_ITEMS_LST from ..utils import logger +from .strategy import TuneStrategy, strategy_registry +from .utils.constant import TUNING_ITEMS_LST +from .utils.tuning_sampler import FallbackTuningSampler, ModelWiseTuningSampler, OpTypeWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig + @strategy_registry class HAWQ_V2TuneStrategy(TuneStrategy): @@ -32,7 +32,6 @@ class HAWQ_V2TuneStrategy(TuneStrategy): HAWQ_V2 implements the "Hawq-v2: Hessian aware trace-weighted quantization of neural networks". We made a small change to it by using the hessian trace to score the op impact and then fallback the OPs according to the scoring result. - """ def next_tune_cfg(self): @@ -42,44 +41,47 @@ def next_tune_cfg(self): tune_config (dict): A dict containing the tuning configuration for quantization. """ tuning_space = self.tuning_space - calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] + calib_size = tuning_space.root_item.get_option_by_name("calib_sampling_size").options[0] # Initialize the tuning config for each op according to the quantization approach op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) early_stop_tuning = True stage1_cnt = 0 - quant_ops = quant_mode_wise_items.get('static', []) - quant_ops += quant_mode_wise_items.get('dynamic', []) + quant_ops = quant_mode_wise_items.get("static", []) + quant_ops += quant_mode_wise_items.get("dynamic", []) stage1_max = 1 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpTypeWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 if early_stop_tuning and stage1_cnt > stage1_max: logger.info("Early stopping the stage 1.") break - op_tuning_cfg['calib_sampling_size'] = calib_size + op_tuning_cfg["calib_sampling_size"] = calib_size yield op_tuning_cfg # Start compute the hessian trace - logger.info(f"************** Start compute the hessian trace *****************") + logger.info("************** Start compute the hessian trace *****************") target_dtype = "fp32" hawq_v2_criterion = None strategy_kwargs = self.config.tuning_criterion.strategy_kwargs if strategy_kwargs: - hawq_v2_criterion = strategy_kwargs.get('hawq_v2_loss', None) + hawq_v2_criterion = strategy_kwargs.get("hawq_v2_loss", None) # assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ # Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." - op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model=self.model, - dataloader=self.calib_dataloader, - q_model=self.last_qmodel, - criterion=hawq_v2_criterion, - enable_act=False) + op_to_traces = self.adaptor.calculate_hessian_trace( + fp32_model=self.model, + dataloader=self.calib_dataloader, + q_model=self.last_qmodel, + criterion=hawq_v2_criterion, + enable_act=False, + ) sorted_op_to_traces = dict(sorted(op_to_traces.items(), key=lambda item: item[1], reverse=True)) - logger.info(f"************** Hessian Trace *****************") + logger.info("************** Hessian Trace *****************") for op_name, trace in sorted_op_to_traces.items(): logger.info(f"*** op: {op_name}, hessian trace : {trace}") - logger.info(f"************************************************") + logger.info("************************************************") # WA for op mapping ordered_ops_tmp = {} for op_info in list(initial_op_tuning_cfg.keys()): @@ -87,13 +89,17 @@ def next_tune_cfg(self): for op_trace_name in op_to_traces.keys(): if isinstance(op_trace_name, str) and op_trace_name.startswith(op_name): if op_name in ordered_ops_tmp: - logger.info((f"*** Already assigned the hessian trace to {op_name}", - f"update it with the value of {op_trace_name}")) + logger.info( + ( + f"*** Already assigned the hessian trace to {op_name}", + f"update it with the value of {op_trace_name}", + ) + ) ordered_ops_tmp[op_name] = op_to_traces[op_trace_name] - ordered_ops_tmp = sorted(ordered_ops_tmp.keys(), - key=lambda key: ordered_ops_tmp[key], - reverse=self.higher_is_better) + ordered_ops_tmp = sorted( + ordered_ops_tmp.keys(), key=lambda key: ordered_ops_tmp[key], reverse=self.higher_is_better + ) # WA for add op type op_info_map = {} for op_info in list(initial_op_tuning_cfg.keys()): @@ -103,11 +109,14 @@ def next_tune_cfg(self): logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(op_tuning_cfg) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True, - skip_first=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + skip_first=False, + ) for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_size + op_tuning_cfg["calib_sampling_size"] = calib_size yield op_tuning_cfg - diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index 50892f2dcaa..67ee9877c42 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -15,17 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. """MSE tuning strategy.""" -from copy import deepcopy -import numpy as np from collections import OrderedDict -from typing import Dict, Any, List -from .strategy import strategy_registry, TuneStrategy -from ..utils import logger +from copy import deepcopy from time import time +from typing import Any, Dict, List + +import numpy as np -from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +from ..utils import logger +from .strategy import TuneStrategy, strategy_registry +from .utils.tuning_sampler import FallbackTuningSampler, OpTypeWiseTuningSampler from .utils.tuning_structs import OpTuningConfig + @strategy_registry class MSETuneStrategy(TuneStrategy): """The tuning strategy using MSE policy in tuning space. @@ -35,16 +37,18 @@ class MSETuneStrategy(TuneStrategy): those OPs according to the MSE value, and performs the op-wise fallback in this order. """ - def __init__(self, - model, - conf, - q_dataloader=None, - q_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, - q_hooks=None): + def __init__( + self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None, + ): """Init MSE tuning strategy. Args: @@ -61,19 +65,20 @@ def __init__(self, q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. """ - super().__init__(model=model, - conf=conf, - q_dataloader=q_dataloader, - q_func=q_func, - eval_func=eval_func, - eval_dataloader=eval_dataloader, - eval_metric=eval_metric, - resume=resume, - q_hooks=q_hooks) - logger.info(f"*** Initialize MSE tuning") + super().__init__( + model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks, + ) + logger.info("*** Initialize MSE tuning") self.ordered_ops = None - def __getstate__(self): """Magic method for pickle saving. @@ -81,8 +86,8 @@ def __getstate__(self): save_dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_conf(history['cfg'], self.conf): - history['ordered_ops'] = self.ordered_ops + if self._same_conf(history["cfg"], self.conf): + history["ordered_ops"] = self.ordered_ops save_dict = super().__getstate__() return save_dict @@ -98,13 +103,12 @@ def _mse_metric_gap(self, fp32_tensor, dequantize_tensor): dequantize_max = np.max(dequantize_tensor) dequantize_min = np.min(dequantize_tensor) fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) - dequantize_tensor = (dequantize_tensor - dequantize_min) / \ - (dequantize_max - dequantize_min) + dequantize_tensor = (dequantize_tensor - dequantize_min) / (dequantize_max - dequantize_min) diff_tensor = fp32_tensor - dequantize_tensor - euclidean_dist = np.sum(diff_tensor ** 2) + euclidean_dist = np.sum(diff_tensor**2) return euclidean_dist / fp32_tensor.size - def mse_impact_lst(self, op_list: List, fp32_model, best_qmodel): + def mse_impact_lst(self, op_list: List, fp32_model, best_qmodel): """Calculate and generate the MSE impact list. Args: @@ -116,32 +120,45 @@ def mse_impact_lst(self, op_list: List, fp32_model, best_qmodel): ordered_op_name_types (List[Tuple(str, str)]): The sorted list of ops by its MSE impaction, in the same format of 'op_list'. """ - op_name_lst = [element[0] for element in op_list ] + op_name_lst = [element[0] for element in op_list] op_mapping = {} - for (op_name, op_type) in list(op_list): + for op_name, op_type in list(op_list): op_mapping[op_name] = (op_name, op_type) current_best_tune_cfg = self._tune_cfg_converter(self.cur_best_tuning_cfg) - fp32_dump_content = self.adaptor.inspect_tensor(fp32_model, - self.calib_dataloader, op_name_lst, [1], inspect_type='activation', - save_to_disk=True, save_path="./nc_workspace/", - quantization_cfg=current_best_tune_cfg) - fp32_tensor_dict = fp32_dump_content['activation'][0] + fp32_dump_content = self.adaptor.inspect_tensor( + fp32_model, + self.calib_dataloader, + op_name_lst, + [1], + inspect_type="activation", + save_to_disk=True, + save_path="./nc_workspace/", + quantization_cfg=current_best_tune_cfg, + ) + fp32_tensor_dict = fp32_dump_content["activation"][0] best_qmodel = self.adaptor.quantize(current_best_tune_cfg, self.model, self.calib_dataloader, self.q_func) - quant_dump_content = self.adaptor.inspect_tensor(best_qmodel, - self.calib_dataloader, op_name_lst, [1], inspect_type='activation', - save_to_disk=True, save_path="./nc_workspace/", - quantization_cfg=current_best_tune_cfg) - dequantize_tensor_dict = quant_dump_content['activation'][0] + quant_dump_content = self.adaptor.inspect_tensor( + best_qmodel, + self.calib_dataloader, + op_name_lst, + [1], + inspect_type="activation", + save_to_disk=True, + save_path="./nc_workspace/", + quantization_cfg=current_best_tune_cfg, + ) + dequantize_tensor_dict = quant_dump_content["activation"][0] ops_mse = { op: self._mse_metric_gap( - list(fp32_tensor_dict[op].values())[0], - list(dequantize_tensor_dict[op].values())[0]) for op in fp32_tensor_dict} + list(fp32_tensor_dict[op].values())[0], list(dequantize_tensor_dict[op].values())[0] + ) + for op in fp32_tensor_dict + } ordered_op_names = sorted(ops_mse.keys(), key=lambda key: ops_mse[key], reverse=self.higher_is_better) ordered_op_name_types = [op_mapping[name] for name in ordered_op_names] return ordered_op_name_types - def next_tune_cfg(self): """Generate and yield the next tuning config. @@ -149,28 +166,32 @@ def next_tune_cfg(self): tune_config (dict): A dict containing the tuning configuration for quantization. """ tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options for calib_sampling_size in calib_sampling_size_lst: op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() # Optype-wise tuning early_stop_tuning = True stage1_cnt = 0 - int8_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - int8_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + int8_ops = quant_mode_wise_items["static"] if "static" in quant_mode_wise_items else [] + int8_ops += quant_mode_wise_items["dynamic"] if "dynamic" in quant_mode_wise_items else [] stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpTypeWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 if early_stop_tuning and stage1_cnt > stage1_max: logger.info("Early stopping the stage 1.") break - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg # Fallback the ops supported both static and dynamic from static to dynamic - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] + static_dynamic_items = [ + item + for item in tuning_space.query_items_by_quant_mode("static") + if item in tuning_space.query_items_by_quant_mode("dynamic") + ] if static_dynamic_items: logger.info("Fallback all ops that support both dynamic and static to dynamic.") else: @@ -178,21 +199,22 @@ def next_tune_cfg(self): def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): new_op_tuning_cfg = deepcopy(op_tuning_cfg) - new_op_tuning_cfg.op_quant_mode = 'dynamic' + new_op_tuning_cfg.op_quant_mode = "dynamic" return new_op_tuning_cfg new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) for item in static_dynamic_items: new_op_tuning_cfg[item.name] = dynamic_op_tuning_cfg_from_static(new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + new_op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield new_op_tuning_cfg best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) # Fallback to float point datatypes ('bf16' or 'fp32') - for target_dtype in ['bf16', 'fp32']: - fallback_items_lst = [item for item in int8_ops if - item in tuning_space.query_items_by_quant_mode(target_dtype)] + for target_dtype in ["bf16", "fp32"]: + fallback_items_lst = [ + item for item in int8_ops if item in tuning_space.query_items_by_quant_mode(target_dtype) + ] if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") # Replace it with sorted items list @@ -202,27 +224,37 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): self.ordered_ops = [op_name for (op_name, op_type) in ordered_op_name_types] op_dtypes = OrderedDict(zip(ordered_op_name_types, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc # Do accumulated fallback according to the order in the previous stage if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) + ordered_ops = sorted( + op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better, + ) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg diff --git a/neural_compressor/strategy/mse_v2.py b/neural_compressor/strategy/mse_v2.py index bc6ba4e386a..b146be975ad 100644 --- a/neural_compressor/strategy/mse_v2.py +++ b/neural_compressor/strategy/mse_v2.py @@ -16,16 +16,19 @@ # limitations under the License. """The MSE_V2 tuning strategy.""" import copy -import numpy as np from collections import OrderedDict -from typing import Dict, Any, List -from .strategy import strategy_registry, TuneStrategy -from ..utils import logger from time import time +from typing import Any, Dict, List +import numpy as np + +from ..utils import logger +from .strategy import TuneStrategy, strategy_registry +from .utils.constant import PRECISION_LIST from .utils.tuning_sampler import OpTypeWiseTuningSampler from .utils.tuning_structs import OpTuningConfig -from .utils.constant import PRECISION_LIST + + @strategy_registry class MSE_V2TuneStrategy(TuneStrategy): """The `mse_v2` tuning strategy. @@ -37,7 +40,7 @@ class MSE_V2TuneStrategy(TuneStrategy): def _tuning_record_msg(self, records): records_str_lst = [[str(e) for e in record] for record in records] - record_msg = '\n'.join(','.join(record) for record in records_str_lst) + record_msg = "\n".join(",".join(record) for record in records_str_lst) return record_msg def next_tune_cfg(self): @@ -55,30 +58,35 @@ def next_tune_cfg(self): tune_config (dict): A dict containing the tuning configuration for quantization. """ from copy import deepcopy + tuning_space = self.tuning_space initial_op_tuning_cfg = {} - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options for calib_sampling_size in calib_sampling_size_lst: op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - quant_ops = quant_mode_wise_items.get('static', []) - quant_ops += quant_mode_wise_items.get('dynamic', []) + quant_ops = quant_mode_wise_items.get("static", []) + quant_ops += quant_mode_wise_items.get("dynamic", []) # Optype-wise tuning early_stop_tuning = True stage1_cnt = 0 stage1_max = 2 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpTypeWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 if early_stop_tuning and stage1_cnt > stage1_max: logger.info("Early stopping the stage 1.") break - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg # Fallback the ops supported both static and dynamic from static to dynamic - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] + static_dynamic_items = [ + item + for item in tuning_space.query_items_by_quant_mode("static") + if item in tuning_space.query_items_by_quant_mode("dynamic") + ] if static_dynamic_items: logger.info("Fallback all ops that support both dynamic and static to dynamic.") else: @@ -86,13 +94,13 @@ def next_tune_cfg(self): def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): new_op_tuning_cfg = deepcopy(op_tuning_cfg) - new_op_tuning_cfg.op_quant_mode = 'dynamic' + new_op_tuning_cfg.op_quant_mode = "dynamic" return new_op_tuning_cfg new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) for item in static_dynamic_items: new_op_tuning_cfg[item.name] = dynamic_op_tuning_cfg_from_static(new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + new_op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield new_op_tuning_cfg # Fallback one by one by op sensitivity(mse) @@ -107,12 +115,13 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): self.output_op_names = self.adaptor.get_output_op_names(self.last_qmodel) confidence_batches = 2 strategy_kwargs = self.config.tuning_criterion.strategy_kwargs - if strategy_kwargs and strategy_kwargs.get('confidence_batches', None): - confidence_batches = strategy_kwargs.get('confidence_batches', None) + if strategy_kwargs and strategy_kwargs.get("confidence_batches", None): + confidence_batches = strategy_kwargs.get("confidence_batches", None) tune_cfg_backup = deepcopy(tune_cfg) - quant_ops_in_tune_cfg = self._collect_ops_by_quant_mode(tune_cfg, 'dynamic') + \ - self._collect_ops_by_quant_mode(tune_cfg, 'static') + quant_ops_in_tune_cfg = self._collect_ops_by_quant_mode( + tune_cfg, "dynamic" + ) + self._collect_ops_by_quant_mode(tune_cfg, "static") op_quant_cfgs = {op_info: tune_cfg_backup[op_info] for op_info in quant_ops_in_tune_cfg} fallback_records = [] self.re_quant = True @@ -128,24 +137,27 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): while not self.objectives.compare(self.last_tune_result, self.baseline): # Record the time of calculating the sensitivity start = time() - ops_lst = self.adaptor.calculate_op_sensitivity(self.model, - self.calib_dataloader, - deepcopy(self._tune_cfg_converter(tune_cfg)), - self.output_op_names, - confidence_batches, - fallback=True) + ops_lst = self.adaptor.calculate_op_sensitivity( + self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), + self.output_op_names, + confidence_batches, + fallback=True, + ) if not ops_lst: - logger.debug(f" Try to fallback to next data type.") + logger.debug(" Try to fallback to next data type.") break logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") select_op_info = ops_lst[0] logger.debug(f"*** ops_lst({len(ops_lst)}): {ops_lst} ") - logger.info(f"*** The op {select_op_info} have the highest sensitivity in the current state, \ - fallback it to {target_dtype}.") - tune_cfg[select_op_info] = OpTuningConfig(select_op_info[0], - select_op_info[1], - target_dtype, - self.tuning_space) + logger.info( + f"*** The op {select_op_info} have the highest sensitivity in the current state, \ + fallback it to {target_dtype}." + ) + tune_cfg[select_op_info] = OpTuningConfig( + select_op_info[0], select_op_info[1], target_dtype, self.tuning_space + ) # Record the fallback history if not fallback_records: fallback_records = [[select_op_info]] @@ -154,28 +166,30 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): logger.debug(f"*** The fallback ops record: \n{self._tuning_record_msg(fallback_records)}") yield tune_cfg - logger.info(f"*** The accuracy meeting the accuracy requirements, stop fallback ops.") + logger.info("*** The accuracy meeting the accuracy requirements, stop fallback ops.") while self.objectives.compare(self.last_tune_result, self.baseline): if len(fallback_records) == 0 or len(fallback_records[-1]) <= 1: - logger.info(f"*** Stop re-quant due to no int8 op or only 1 int8 op left.") + logger.info("*** Stop re-quant due to no int8 op or only 1 int8 op left.") break - logger.info(f"*** Start to re-quant the fallback op in the previous stage.") + logger.info("*** Start to re-quant the fallback op in the previous stage.") # Track the current fallback ops tmp_fallback_ops = fallback_records[-1] if fallback_records else [] start = time() - ops_lst = self.adaptor.calculate_op_sensitivity(self.model, - self.calib_dataloader, - deepcopy(self._tune_cfg_converter(tune_cfg)), - self.output_op_names, - confidence_batches, - fallback=False, - requantize_cfgs=requantize_cfg['op']) + ops_lst = self.adaptor.calculate_op_sensitivity( + self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), + self.output_op_names, + confidence_batches, + fallback=False, + requantize_cfgs=requantize_cfg["op"], + ) logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") if not ops_lst: logger.warning("No op to be requantized") break for select_op_info in ops_lst: - #assert select_op_info in tmp_fallback_ops, f"{select_op_info} not in fallback list." + # assert select_op_info in tmp_fallback_ops, f"{select_op_info} not in fallback list." if select_op_info not in tmp_fallback_ops: logger.debug(f"{select_op_info} not in fallback list.") continue @@ -183,8 +197,10 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): new_fallback_ops = deepcopy(tmp_fallback_ops) new_fallback_ops.remove(select_op_info) if new_fallback_ops not in fallback_records: - logger.info(f"*** The op {select_op_info} have the lowest sensitivity in the current state, \ - re-quantize it.") + logger.info( + f"*** The op {select_op_info} have the lowest sensitivity in the current state, \ + re-quantize it." + ) tune_cfg[select_op_info] = op_quant_cfgs[select_op_info] fallback_records.append(new_fallback_ops) logger.debug(f"*** The fallback ops record: \n{self._tuning_record_msg(fallback_records)}") @@ -194,4 +210,4 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): logger.debug(f"*** Skip re-quant {select_op_info}, due the config has been evaluated.") continue self.re_quant = False - logger.info(f"*** The accuracy not meeting the accuracy requirements, stop re-quantize ops.") \ No newline at end of file + logger.info("*** The accuracy not meeting the accuracy requirements, stop re-quantize ops.") diff --git a/neural_compressor/strategy/random.py b/neural_compressor/strategy/random.py index e876050cc67..168e330ed91 100644 --- a/neural_compressor/strategy/random.py +++ b/neural_compressor/strategy/random.py @@ -15,13 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. """The random tuning strategy.""" -import numpy as np -from .strategy import strategy_registry, TuneStrategy from collections import OrderedDict -from .utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler -from .utils.tuning_structs import OpTuningConfig +import numpy as np + from ..utils import logger +from .strategy import TuneStrategy, strategy_registry +from .utils.tuning_sampler import FallbackTuningSampler, OpWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig + @strategy_registry class RandomTuneStrategy(TuneStrategy): @@ -39,17 +41,16 @@ def next_tune_cfg(self): """ tuning_space = self.tuning_space op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) op_tuning_cfg_lst = list(op_wise_tuning_sampler) op_tuning_cfg_cnt = len(op_tuning_cfg_lst) - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name("calib_sampling_size").options calib_sampling_size_cnt = len(calib_sampling_size_lst) while True: calib_index = np.random.choice(calib_sampling_size_cnt) calib_sampling_size = calib_sampling_size_lst[calib_index] op_tuning_cfg_index = np.random.choice(op_tuning_cfg_cnt) op_tuning_cfg = op_tuning_cfg_lst[op_tuning_cfg_index] - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg["calib_sampling_size"] = calib_sampling_size yield op_tuning_cfg return diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 97d73285603..46f2caf87a1 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -33,28 +33,33 @@ import yaml from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor -from .utils.constant import FALLBACK_RECIPES_SET -from .utils.tuning_space import TuningSpace -from .utils.tuning_structs import OpTuningConfig + from ..adaptor import FRAMEWORKS -from ..algorithm import AlgorithmScheduler, ALGORITHMS +from ..algorithm import ALGORITHMS, AlgorithmScheduler from ..config import MixedPrecisionConfig, options from ..objective import MultiObjective from ..utils import logger from ..utils.create_obj_from_config import create_eval_func -from ..utils.utility import Statistics, fault_tolerant_file, GLOBAL_STATE, MODE, LazyImport, \ - DotDict, print_table, get_weights_details, dump_table, print_op_list, equal_dicts +from ..utils.utility import ( + GLOBAL_STATE, + MODE, + DotDict, + LazyImport, + Statistics, + dump_table, + equal_dicts, + fault_tolerant_file, + get_weights_details, + print_op_list, + print_table, +) from ..utils.weights_details import WeightsDetails from ..version import __version__ - -from ..algorithm import AlgorithmScheduler, ALGORITHMS - +from .utils.constant import FALLBACK_RECIPES_SET +from .utils.tuning_sampler import tuning_sampler_dict from .utils.tuning_space import TuningSpace from .utils.tuning_structs import OpTuningConfig -from .utils.constant import FALLBACK_RECIPES_SET from .utils.utility import build_slave_faker_model, quant_options -from .utils.tuning_sampler import tuning_sampler_dict - STRATEGIES = {} @@ -69,13 +74,14 @@ def strategy_registry(cls): cls: The class of register. """ assert cls.__name__.endswith( - 'TuneStrategy' - ), "The name of subclass of TuneStrategy should end with \'TuneStrategy\' substring." - if cls.__name__[:-len('TuneStrategy')].lower() in STRATEGIES: # pragma: no cover - raise ValueError('Cannot have two strategies with the same name') - STRATEGIES[cls.__name__[:-len('TuneStrategy')].lower()] = cls + "TuneStrategy" + ), "The name of subclass of TuneStrategy should end with 'TuneStrategy' substring." + if cls.__name__[: -len("TuneStrategy")].lower() in STRATEGIES: # pragma: no cover + raise ValueError("Cannot have two strategies with the same name") + STRATEGIES[cls.__name__[: -len("TuneStrategy")].lower()] = cls return cls + class TuneStrategyMeta(type): """Tuning strategy metaclass.""" @@ -106,20 +112,23 @@ def __call__(cls, *args, pre_strategy=None, **kwargs): new_strategy.diagnosis_done = pre_strategy.diagnosis_done return new_strategy + @strategy_registry class TuneStrategy(metaclass=TuneStrategyMeta): """Basic class for tuning strategy.""" - def __init__(self, - model, - conf, - q_dataloader=None, - q_func=None, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, - q_hooks=None): + def __init__( + self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None, + ): """Init the TuneStrategy. Args: @@ -140,8 +149,8 @@ def __init__(self, self.conf = conf self.config = self._initialize_config(conf) self._set_quant_type(self.config) - self.history_path = self._create_path(options.workspace, './history.snapshot') - self.deploy_path = self._create_path(options.workspace, 'deploy.yaml') + self.history_path = self._create_path(options.workspace, "./history.snapshot") + self.deploy_path = self._create_path(options.workspace, "deploy.yaml") self.calib_dataloader = q_dataloader self.eval_func = eval_func self.eval_dataloader = eval_dataloader @@ -206,7 +215,8 @@ def __init__(self, self.calib_sampling_size_lst, self.calib_iter = self._get_calib_iter() # A algo scheduler for algos that were applied before tuning, such as sq. self._pre_tuning_algo_scheduler = None - if self._resume is not None: self.setup_resume(resume) + if self._resume is not None: + self.setup_resume(resume) @property def adaptor(self): @@ -305,27 +315,26 @@ def algo_scheduler(self, value): value: The new value for the algo_scheduler. """ self._algo_scheduler = value - + @property def pre_tuning_algo_scheduler(self): """Gets the pre-tuning algo scheduler.""" return self._pre_tuning_algo_scheduler - + @pre_tuning_algo_scheduler.setter def pre_tuning_algo_scheduler(self, algo_scheduler): """Sets the pre-tuning algo scheduler. - + Args: algo_scheduler: the pre-tuning algo scheduler """ self._pre_tuning_algo_scheduler = algo_scheduler - - + def _set_quant_type(self, config): - if config.approach == 'post_training_weight_only': + if config.approach == "post_training_weight_only": quant_options.quant_type = 3 # TODO for future usage(other quantization type) - + def _initial_pre_tuning_algo_scheduler(self): algo_scheduler = AlgorithmScheduler(None) # reuse the calibration iteration @@ -347,9 +356,8 @@ def _setup_pre_tuning_algo_scheduler(self): # set param for pre_tuning_algo_scheduler self.set_param_for_pre_tuning_algos(self._pre_tuning_algo_scheduler, self.config, self.model) # execute the pre_tuning_algo_scheduler - self.model = self._pre_tuning_algo_scheduler('pre_quantization') - - + self.model = self._pre_tuning_algo_scheduler("pre_quantization") + def _initialize_algo_scheduler(self): algo_scheduler = AlgorithmScheduler(self.config.recipes) # reuse the calibration iteration @@ -357,7 +365,7 @@ def _initialize_algo_scheduler(self): algo_scheduler.origin_model = self.model algo_scheduler.adaptor = self.adaptor return algo_scheduler - + def _initial_adaptor(self): framework, framework_specific_info = self._set_framework_info(self.calib_dataloader, self.q_func) self.adaptor = self.adaptor or FRAMEWORKS[framework](framework_specific_info) @@ -390,23 +398,31 @@ def _check_tuning_status(self): # got eval dataloader + eval metric => eval func if self.eval_dataloader and self.eval_metric: self._not_tuning = False - logger.info("Create evaluation function according to evaluation dataloader and metric\ - and Execute the tuning process.") + logger.info( + "Create evaluation function according to evaluation dataloader and metric\ + and Execute the tuning process." + ) return else: # got eval dataloader but not eval metric - if self.eval_dataloader: # pragma: no cover - assert self.eval_metric, "Detected evaluation dataloader but no evaluation metric, " \ - "Please provide both to perform tuning process or neither for the default quantization." + if self.eval_dataloader: # pragma: no cover + assert self.eval_metric, ( + "Detected evaluation dataloader but no evaluation metric, " + "Please provide both to perform tuning process or neither for the default quantization." + ) # got eval metric but not eval dataloader - if self.eval_metric: # pragma: no cover - assert self.eval_dataloader, "Detected evaluation metric but no evaluation dataloader, "\ + if self.eval_metric: # pragma: no cover + assert self.eval_dataloader, ( + "Detected evaluation metric but no evaluation dataloader, " "Please provide both to perform tuning process or neither for the default quantization." + ) # not tuning if self._not_tuning: - logger.info("Quantize the model with default configuration without evaluating the model.\ + logger.info( + "Quantize the model with default configuration without evaluating the model.\ To perform the tuning process, please either provide an eval_func or provide an\ - eval_dataloader an eval_metric.") + eval_dataloader an eval_metric." + ) def _initialize_config(self, conf): """Init the tuning config based on user conf. @@ -418,7 +434,7 @@ def _initialize_config(self, conf): Tuning config """ config = conf.quantization - config.diagnosis = getattr(config, 'diagnosis', None) + config.diagnosis = getattr(config, "diagnosis", None) return config @abstractmethod @@ -443,16 +459,22 @@ def traverse(self): # try to tune on multiple nodes based on the rank size. try: from mpi4py import MPI + if MPI.COMM_WORLD.Get_size() > 2: logger.info("Use distributed tuning on {} nodes".format(MPI.COMM_WORLD.Get_size())) elif MPI.COMM_WORLD.Get_size() == 2: - logger.info("Use distributed tuning on {} nodes, will be fallback to normal tuning."\ - .format(MPI.COMM_WORLD.Get_size())) - MPI_INSTALLED=True + logger.info( + "Use distributed tuning on {} nodes, will be fallback to normal tuning.".format( + MPI.COMM_WORLD.Get_size() + ) + ) + MPI_INSTALLED = True except (ImportError, AttributeError) as e: - logger.warning("[Strategy] Please install `mpi4py` correctly if using distributed tuning;" + \ - " otherwise, ignore this warning.") - MPI_INSTALLED=False + logger.warning( + "[Strategy] Please install `mpi4py` correctly if using distributed tuning;" + + " otherwise, ignore this warning." + ) + MPI_INSTALLED = False if MPI_INSTALLED: if MPI.COMM_WORLD.Get_size() > 2: return self.distributed_traverse() @@ -465,25 +487,26 @@ def traverse(self): self.trials_count += 1 tune_cfg = self._tune_cfg_converter(op_tuning_cfg) tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: # pragma: no cover - self.last_tune_result = tuning_history['last_tune_result'] - self.best_tune_result = tuning_history['best_tune_result'] + if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: # pragma: no cover + self.last_tune_result = tuning_history["last_tune_result"] + self.best_tune_result = tuning_history["best_tune_result"] logger.warn("Find evaluated tuning config, skip.") continue self._remove_redundant_qmodel() self.tuning_times += 1 # set the parameter for pre quantization algos and run self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) - self.model = self.algo_scheduler('pre_quantization') # pylint: disable=E1102 + self.model = self.algo_scheduler("pre_quantization") # pylint: disable=E1102 logger.debug("Dump current tuning configuration:") logger.debug(tune_cfg) # quantize q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) assert self.adaptor.pre_optimized_model # set the parameter for post quantization algos and run - self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg,\ - self.adaptor.pre_optimized_model, q_model) - self.last_qmodel = self.algo_scheduler('post_quantization') # pylint: disable=E1102 + self.set_param_for_post_quantization_algos( + self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, q_model + ) + self.last_qmodel = self.algo_scheduler("post_quantization") # pylint: disable=E1102 self.last_tune_cfg = copy.deepcopy(tune_cfg) # start diagnose, if needed if self._need_do_diagnosis(): @@ -503,9 +526,7 @@ def traverse(self): # record the tuning history saved_tune_cfg = copy.deepcopy(tune_cfg) saved_last_tune_result = copy.deepcopy(self.last_tune_result) - self._add_tuning_history(saved_tune_cfg, - saved_last_tune_result, - q_config=q_model.q_config) + self._add_tuning_history(saved_tune_cfg, saved_last_tune_result, q_config=q_model.q_config) self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) self.tune_cfg = tune_cfg now_time = time() @@ -527,18 +548,21 @@ def traverse(self): # recover the best quantized model from tuning config self._recover_best_qmodel_from_tuning_cfg() if self._need_do_diagnosis(): - logger.debug(f'*** Start to do diagnosis (inspect tensor).') + logger.debug("*** Start to do diagnosis (inspect tensor).") self._diagnosis(tune_cfg) - if self.use_multi_objective and len(self.tune_result_record) > 1 and \ - self.best_tune_result is not None: # pragma: no cover - best_trail, best_result = self.objectives.best_result(self.tune_result_record, - copy.deepcopy(self.baseline)) + if ( + self.use_multi_objective and len(self.tune_result_record) > 1 and self.best_tune_result is not None + ): # pragma: no cover + best_trail, best_result = self.objectives.best_result( + self.tune_result_record, copy.deepcopy(self.baseline) + ) if best_result != self.best_tune_result: from neural_compressor.utils.utility import recover - self.best_qmodel = recover(self.model.model, - os.path.join(options.workspace, 'history.snapshot'), - best_trail) - logger.debug(f"*** Update the best qmodel by recovering from history.") + + self.best_qmodel = recover( + self.model.model, os.path.join(options.workspace, "history.snapshot"), best_trail + ) + logger.debug("*** Update the best qmodel by recovering from history.") self.best_tune_result = best_result self._dump_tuning_process_statistics() break @@ -546,15 +570,16 @@ def traverse(self): def _initialize_recipe(self): """Divide the recipe into two categories tuning/not tuning.""" - from .utils.utility import get_adaptor_name from ..utils.constant import RECIPES as fwk_recipes from ..utils.constant import RECIPES_PRIORITY as fwk_recipes_priority + from .utils.utility import get_adaptor_name + # get all recipes supported by adaptor. adaptor_name = get_adaptor_name(self.adaptor) - adaptor_recipes = fwk_recipes['common'] + adaptor_recipes = fwk_recipes["common"] # TODO WA due to smooth quant only supported by ort/pt currently. - if not adaptor_name not in ['onnx', 'pytorch', 'tensorflow']: - adaptor_recipes.pop('smooth_quant', None) + if not adaptor_name not in ["onnx", "pytorch", "tensorflow"]: + adaptor_recipes.pop("smooth_quant", None) for adaptor_name_key, adaptor_recipes_val in fwk_recipes.items(): if adaptor_name_key.startswith(adaptor_name): adaptor_recipes.update(adaptor_recipes_val) @@ -572,7 +597,8 @@ def _initialize_recipe(self): for recipe_name in fwk_recipes_priority: if recipe_name in adaptor_recipes and recipe_name not in self._not_tuning_recipes_values: # TODO skip tuning smooth_quant first - if recipe_name == 'smooth_quant': continue + if recipe_name == "smooth_quant": + continue self._tuning_recipes[recipe_name] = adaptor_recipes[recipe_name] self._tuning_recipes_default_values[recipe_name] = adaptor_recipes[recipe_name][0] logger.info(f"{len(self._not_tuning_recipes_values)} recipes specified by user.") @@ -615,14 +641,16 @@ def master_worker_handle(self, comm): size = comm.Get_size() for process_id in range(1, min(len(self.tune_cfg_lst) + 1, size)): tune_cfg_id = process_id - 1 - logger.info("[Rank {}]master sending tune cfg: {} to rank {}".format(comm.Get_rank(), \ - tune_cfg_id, process_id)) + logger.info( + "[Rank {}]master sending tune cfg: {} to rank {}".format(comm.Get_rank(), tune_cfg_id, process_id) + ) comm.send( - obj=tune_cfg_id, # just send the tune cfg id is enough - dest=process_id, # rank 0 send to rank 1, 2, ... - tag=tune_cfg_id # tag, the index of tune cfg 0,1,2,3 + obj=tune_cfg_id, # just send the tune cfg id is enough + dest=process_id, # rank 0 send to rank 1, 2, ... + tag=tune_cfg_id, # tag, the index of tune cfg 0,1,2,3 ) import time as ttime + # WA for UT ttime.sleep(0.5) @@ -640,19 +668,16 @@ def master_worker_handle(self, comm): # stuck here to receive any result while True: - eval_res = comm.recv( - source=MPI.ANY_SOURCE, - tag=MPI.ANY_TAG, - status=status # get MPI status object - ) + eval_res = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) # get MPI status object self.num_acks += 1 # sender rank sender_rank = status.Get_source() # the task id that is finished tag = status.Get_tag() - logger.info("[Rank {}]master receiving eval result: {} from rank {}".format(comm.Get_rank(), \ - eval_res, sender_rank)) + logger.info( + "[Rank {}]master receiving eval result: {} from rank {}".format(comm.Get_rank(), eval_res, sender_rank) + ) # record eval_results for context coordination of stage 3 self.last_tune_result = eval_res @@ -666,7 +691,7 @@ def master_worker_handle(self, comm): self.already_ack_id_lst.add(tag) # if meet accuracy requirement, then update minimum id that met requirement - if(self.meet_acc_req(eval_res)): + if self.meet_acc_req(eval_res): logger.info("[Rank {}]master has one tuning cfg meet acc: {}".format(comm.Get_rank(), tag)) self.met_flag = True self.requirements_met_min_cfg_id = min(self.requirements_met_min_cfg_id, tag) @@ -675,21 +700,30 @@ def master_worker_handle(self, comm): # because a tune cfg (not acked yet) with lower id can have better acc for i in range(self.requirements_met_min_cfg_id): if i not in self.already_ack_id_lst: - logger.info("[Rank {}]master has one tuning cfg meet acc: {} but not collect all acks before"\ - .format(comm.Get_rank(), tag)) + logger.info( + "[Rank {}]master has one tuning cfg meet acc: {} but not collect all acks before".format( + comm.Get_rank(), tag + ) + ) # not completely collected yet! self.met_flag = False break if self.met_flag: # found the best tune cfg! - logger.info("[Rank {}]master has one tuning cfg meet acc: {} and also collect all acks before"\ - .format(comm.Get_rank(), tag)) + logger.info( + "[Rank {}]master has one tuning cfg meet acc: {} and also collect all acks before".format( + comm.Get_rank(), tag + ) + ) self.best_tune_cfg_id = self.requirements_met_min_cfg_id else: # get the current best acc but not meet requirements - logger.info("[Rank {}]master gets the current best acc: {} but not meet requirements"\ - .format(comm.Get_rank(), tag)) + logger.info( + "[Rank {}]master gets the current best acc: {} but not meet requirements".format( + comm.Get_rank(), tag + ) + ) self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(self.tune_cfg_lst[tag]) if self.best_tune_cfg_id is not None: @@ -705,13 +739,17 @@ def master_worker_handle(self, comm): # elif time.time() - self.overall_time_start > self.config.tuning_criterion.timeout: # self.max_time_flag = True elif cur_cfg_id < len(self.tune_cfg_lst): - logger.info("[Rank {}]master sends new tuning cfg {} to rank: {}".format(comm.Get_rank(), \ - cur_cfg_id, sender_rank)) + logger.info( + "[Rank {}]master sends new tuning cfg {} to rank: {}".format( + comm.Get_rank(), cur_cfg_id, sender_rank + ) + ) comm.send(obj=cur_cfg_id, dest=sender_rank, tag=cur_cfg_id) cur_cfg_id += 1 else: - logger.info("[Rank {}]All tune configs are sent, no more sending, just collecting..."\ - .format(comm.Get_rank())) + logger.info( + "[Rank {}]All tune configs are sent, no more sending, just collecting...".format(comm.Get_rank()) + ) # all collected (ack should collected == acks) if len(self.tune_cfg_lst) == self.num_acks: @@ -719,7 +757,7 @@ def master_worker_handle(self, comm): # return self.requirements_met_min_cfg_id if it has been updated if self.requirements_met_min_cfg_id == sys.maxsize: logger.info("[Rank {}]Not found any tune cfg that meet requirements".format(comm.Get_rank())) - self.cur_best_tuning_cfg = self.tune_cfg_lst[0] # TODO select cur_best_tuning_cfg + self.cur_best_tuning_cfg = self.tune_cfg_lst[0] # TODO select cur_best_tuning_cfg else: logger.info("[Rank {}]Find best tune cfg id".format(comm.Get_rank())) logger.info(self.requirements_met_min_cfg_id) @@ -733,16 +771,15 @@ def master_worker_handle(self, comm): for process_id in range(1, size): logger.info("[Rank {}]master sends END signal to rank: {}".format(comm.Get_rank(), process_id)) comm.send( - obj="MET" if self.met_flag else "NOT MET", # send whether met criterion in the current stage - dest=process_id, # rank 0 send to rank 1, 2, ... - tag=len(self.tune_cfg_lst) + obj="MET" if self.met_flag else "NOT MET", # send whether met criterion in the current stage + dest=process_id, # rank 0 send to rank 1, 2, ... + tag=len(self.tune_cfg_lst), ) if self.best_tune_cfg_id is not None: self.best_qmodel = self.adaptor.quantize( - copy.deepcopy(self.tune_cfg_lst[self.best_tune_cfg_id]), self.model, self.calib_dataloader, \ - self.q_func) - + copy.deepcopy(self.tune_cfg_lst[self.best_tune_cfg_id]), self.model, self.calib_dataloader, self.q_func + ) def slave_worker_handle(self, comm): """Slave worker handles the task processing. @@ -756,15 +793,14 @@ def slave_worker_handle(self, comm): MPI = LazyImport("mpi4py.MPI") status = MPI.Status() while True: - task = comm.recv( - source=MPI.ANY_SOURCE, - tag=MPI.ANY_TAG, - status=status # sender (master) - ) + task = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) # sender (master) cfg_idx = status.Get_tag() if status.Get_tag() >= len(self.tune_cfg_lst): - logger.info("[Rank {}]slave {} receiving END signal in the current stage".format(comm.Get_rank(),\ - comm.Get_rank())) + logger.info( + "[Rank {}]slave {} receiving END signal in the current stage".format( + comm.Get_rank(), comm.Get_rank() + ) + ) if task == "MET": logger.info("[Rank {}]met criterion in this stage!".format(comm.Get_rank())) self.met_flag = True @@ -773,14 +809,15 @@ def slave_worker_handle(self, comm): # set the parameter for pre quantization algos and run self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) - self.model = self.algo_scheduler('pre_quantization') # pylint: disable=E1102 + self.model = self.algo_scheduler("pre_quantization") # pylint: disable=E1102 # quantize q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) assert self.adaptor.pre_optimized_model # set the parameter for post quantization algos and run - self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, - q_model) - self.last_qmodel = self.algo_scheduler('post_quantization') # pylint: disable=E1102 + self.set_param_for_post_quantization_algos( + self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, q_model + ) + self.last_qmodel = self.algo_scheduler("post_quantization") # pylint: disable=E1102 self.last_tune_cfg = copy.deepcopy(tune_cfg) # Remove the reference to model self.algo_scheduler.reset_exec_algorithms() @@ -790,11 +827,7 @@ def slave_worker_handle(self, comm): # send back the tuning statistics logger.debug("[Rank {}]Slave sends back the tuning statistics".format(comm.Get_rank())) logger.debug(self.last_tune_result) - comm.send( - obj=self.last_tune_result, - dest=0, # rank 0 send to rank 1, 2, ... - tag=cfg_idx - ) + comm.send(obj=self.last_tune_result, dest=0, tag=cfg_idx) # rank 0 send to rank 1, 2, ... def distributed_traverse(self): """Distributed traverse the tuning space. @@ -828,8 +861,11 @@ def distributed_traverse(self): self.master_worker_handle(comm) else: self.slave_worker_handle(comm) - logger.debug("# if self.met_flag or self.max_trial_flag or self.max_time_flag: {}" \ - .format(self.met_flag or self.max_trial_flag or self.max_time_flag)) + logger.debug( + "# if self.met_flag or self.max_trial_flag or self.max_time_flag: {}".format( + self.met_flag or self.max_trial_flag or self.max_time_flag + ) + ) if self.met_flag or self.max_trial_flag or self.max_time_flag: break @@ -840,20 +876,21 @@ def distributed_traverse(self): def _fallback_ops(self, tune_cfg, recipe_op_lst, tuning_space): """Fallback ops in recipe op list.""" for op_name_type in recipe_op_lst: - tune_cfg.update({op_name_type: OpTuningConfig(op_name_type[0], \ - op_name_type[1],'fp32', tuning_space)}) + tune_cfg.update({op_name_type: OpTuningConfig(op_name_type[0], op_name_type[1], "fp32", tuning_space)}) return tune_cfg def apply_all_tuning_recipes(self, tune_cfg): """Apply all tunable recipes with their value.""" - tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) + tune_cfg["recipe_cfgs"] = tune_cfg.get("recipe_cfgs", {}) for recipe_name, recipe_val_lst in self._tuning_recipes.items(): - tune_cfg['recipe_cfgs'][recipe_name] = recipe_val_lst[-1] - if recipe_name in FALLBACK_RECIPES_SET and 'recipes_ops' in self.capability and \ - len(self.capability['recipes_ops'].get(recipe_name, [])) > 0: + tune_cfg["recipe_cfgs"][recipe_name] = recipe_val_lst[-1] + if ( + recipe_name in FALLBACK_RECIPES_SET + and "recipes_ops" in self.capability + and len(self.capability["recipes_ops"].get(recipe_name, [])) > 0 + ): logger.info(f"Applied recipe {recipe_name}.") - tune_cfg = self._fallback_ops(tune_cfg, self.capability['recipes_ops'][recipe_name],\ - self.tuning_space) + tune_cfg = self._fallback_ops(tune_cfg, self.capability["recipes_ops"][recipe_name], self.tuning_space) return tune_cfg def apply_recipe_one_by_one(self, tune_cfg): @@ -863,21 +900,25 @@ def apply_recipe_one_by_one(self, tune_cfg): For recipes with multiple values. such as alpha of smooth quant, apply it one by one. """ for recipe_name, recipe_vals in self._tuning_recipes.items(): - if recipe_name in FALLBACK_RECIPES_SET and 'recipes_ops' in self.capability and \ - len(self.capability['recipes_ops'].get(recipe_name, [])) > 0: + if ( + recipe_name in FALLBACK_RECIPES_SET + and "recipes_ops" in self.capability + and len(self.capability["recipes_ops"].get(recipe_name, [])) > 0 + ): logger.info(f"Applied recipe {recipe_name} with value {recipe_vals[-1]}") - new_tune_cfg = self._fallback_ops(copy.deepcopy(tune_cfg), \ - self.capability['recipes_ops'][recipe_name], self.tuning_space) + new_tune_cfg = self._fallback_ops( + copy.deepcopy(tune_cfg), self.capability["recipes_ops"][recipe_name], self.tuning_space + ) yield new_tune_cfg - if recipe_name == "smooth_quant": # pragma: no cover - sq_args = {'smooth_quant': True} - if 'recipe_cfgs' not in new_tune_cfg: - new_tune_cfg['recipe_cfgs'] = sq_args + if recipe_name == "smooth_quant": # pragma: no cover + sq_args = {"smooth_quant": True} + if "recipe_cfgs" not in new_tune_cfg: + new_tune_cfg["recipe_cfgs"] = sq_args else: - new_tune_cfg['recipe_cfgs'].update(sq_args) - new_tune_cfg['recipe_cfgs'] = sq_args + new_tune_cfg["recipe_cfgs"].update(sq_args) + new_tune_cfg["recipe_cfgs"] = sq_args yield new_tune_cfg - + def set_param_for_pre_tuning_algos(self, algo_scheduler, config, fp32_model) -> None: """Set the parameter for pre-tuning algos, such as smooth quantization. @@ -890,25 +931,25 @@ def set_param_for_pre_tuning_algos(self, algo_scheduler, config, fp32_model) -> # TODO does the algo_scheduler need calib iteration? # algo_scheduler.calib_iter = tune_cfg['calib_iteration'] algo_scheduler.q_model = fp32_model - recipe_cfgs = getattr(config, 'recipes', None) + recipe_cfgs = getattr(config, "recipes", None) algo_scheduler.reset_exec_algorithms() - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False): + if recipe_cfgs and recipe_cfgs.get("smooth_quant", False): # skip assign alpha to sq first. # set the alpha to 0.5 by default - smooth_quant_args = recipe_cfgs.get('smooth_quant_args', {'alpha': 0.5}) - sq_algo = ALGORITHMS()['smooth_quant'] + smooth_quant_args = recipe_cfgs.get("smooth_quant_args", {"alpha": 0.5}) + sq_algo = ALGORITHMS()["smooth_quant"] # if user pass a list, use the first value - user_alpha = smooth_quant_args.get('alpha', 0.5) + user_alpha = smooth_quant_args.get("alpha", 0.5) sq_algo.alpha = user_alpha[0] if isinstance(user_alpha, list) else user_alpha # TODO move all adaptor checks into algo implementation - if 'folding' not in smooth_quant_args: - smooth_quant_args['folding'] = True if self.framework in ['onnxruntime'] else False - logger.info("SmoothQuant args 'folding' is not set, it's {} now.".format(smooth_quant_args['folding'])) - if self.framework == 'pytorch_ipex': - smooth_quant_args['folding'] = None # will reset it to True if IPEX version < 2.1. - sq_algo.folding = smooth_quant_args['folding'] + if "folding" not in smooth_quant_args: + smooth_quant_args["folding"] = True if self.framework in ["onnxruntime"] else False + logger.info("SmoothQuant args 'folding' is not set, it's {} now.".format(smooth_quant_args["folding"])) + if self.framework == "pytorch_ipex": + smooth_quant_args["folding"] = None # will reset it to True if IPEX version < 2.1. + sq_algo.folding = smooth_quant_args["folding"] logger.debug(f"Set smooth quant with alpha {sq_algo.alpha} as the pre-tuning algo.") - algo_scheduler.append_algorithm('pre_quantization', sq_algo) + algo_scheduler.append_algorithm("pre_quantization", sq_algo) def set_param_for_pre_quantization_algos(self, algo_scheduler, tune_cfg, fp32_model) -> None: """Set the parameter for pre-quantization algos, such as smooth quantization. @@ -919,12 +960,11 @@ def set_param_for_pre_quantization_algos(self, algo_scheduler, tune_cfg, fp32_mo fp32_model: the fp32 model """ algo_scheduler.origin_model = fp32_model - algo_scheduler.calib_iter = tune_cfg['calib_iteration'] + algo_scheduler.calib_iter = tune_cfg["calib_iteration"] algo_scheduler.q_model = fp32_model # As the SQ has been moved to a pre-tuning algo scheduler, keep it for future use. return None - def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_optimized_model, q_model) -> None: """Set the parameter for post-quantization algos, such as bias correction, weight correction. @@ -939,19 +979,19 @@ def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_op algo_scheduler.q_model = q_model algo_scheduler.reset_exec_algorithms() - recipe_cfgs = tune_cfg.get('recipe_cfgs', None) + recipe_cfgs = tune_cfg.get("recipe_cfgs", None) # for fast_bias_correction - if recipe_cfgs and recipe_cfgs.get('fast_bias_correction', False): # pragma: no cover - fbc_algo = ALGORITHMS()['fast_bias_correction'] + if recipe_cfgs and recipe_cfgs.get("fast_bias_correction", False): # pragma: no cover + fbc_algo = ALGORITHMS()["fast_bias_correction"] fbc_algo.quantization_cfg = deepcopy(tune_cfg) - algo_scheduler.append_algorithm('post_quantization', fbc_algo) - logger.debug(f"Add fast bias correction as the post quantization algo.") + algo_scheduler.append_algorithm("post_quantization", fbc_algo) + logger.debug("Add fast bias correction as the post quantization algo.") # for weight correction - if recipe_cfgs and recipe_cfgs.get('weight_correction', False): # pragma: no cover - w_algo = ALGORITHMS()['weight_correction'] + if recipe_cfgs and recipe_cfgs.get("weight_correction", False): # pragma: no cover + w_algo = ALGORITHMS()["weight_correction"] w_algo.quantization_cfg = deepcopy(tune_cfg) - algo_scheduler.append_algorithm('post_quantization', w_algo) - logger.debug(f"Add weight correction as the post quantization algo.") + algo_scheduler.append_algorithm("post_quantization", w_algo) + logger.debug("Add weight correction as the post quantization algo.") def _remove_redundant_qmodel(self): """Remove the redundant quantized model to reduce memory use. @@ -985,25 +1025,27 @@ def _eval_baseline(self): def _recover_best_qmodel_from_tuning_cfg(self): """Recover the best quantized model from tuning config.""" if self.best_tuning_cfg and not self.best_qmodel: - logger.info(f"[Strategy] Recover the {self.best_tuning_cfg.get('trial_number', 'N/A')}-trial\ - as the tuning result.") - self.best_qmodel = self.adaptor.quantize(copy.deepcopy(self.best_tuning_cfg), self.model, - self.calib_dataloader, self.q_func) + logger.info( + f"[Strategy] Recover the {self.best_tuning_cfg.get('trial_number', 'N/A')}-trial\ + as the tuning result." + ) + self.best_qmodel = self.adaptor.quantize( + copy.deepcopy(self.best_tuning_cfg), self.model, self.calib_dataloader, self.q_func + ) def _fallback_started(self): self.fallback_start_point = self.tuning_times def _update_optype_statistics(self): - self._optype_statistics = defaultdict(lambda:defaultdict(int)) + self._optype_statistics = defaultdict(lambda: defaultdict(int)) - for op_name_type, op_tune_cfg in self.tune_cfg['op'].items(): + for op_name_type, op_tune_cfg in self.tune_cfg["op"].items(): optype = op_name_type[1] - quant_mode = op_tune_cfg['activation']['quant_mode'] + quant_mode = op_tune_cfg["activation"]["quant_mode"] if isinstance(quant_mode, tuple) or isinstance(quant_mode, list): quant_mode = quant_mode[0] - dtype = 'INT8' if quant_mode in ('static', 'dynamic') \ - else quant_mode.upper() - self._optype_statistics[optype]['Total'] += 1 + dtype = "INT8" if quant_mode in ("static", "dynamic") else quant_mode.upper() + self._optype_statistics[optype]["Total"] += 1 self._optype_statistics[optype][dtype] += 1 return @@ -1016,7 +1058,7 @@ def _dump_tuning_process_statistics(self): logger.debug("Objective(s) met at Tune {}".format(self.metric_met_point)) fallback_stats = self._calculate_fallback_op_count() - if self.fallback_stats_baseline == None: + if self.fallback_stats_baseline is None: self.fallback_stats_baseline = fallback_stats logger.debug(f"Fallbacked ops count: {self.fallback_stats_baseline - fallback_stats}") @@ -1025,7 +1067,7 @@ def _dump_tuning_process_statistics(self): return - def _calculate_fallback_op_count(self, target_dtype='INT8'): + def _calculate_fallback_op_count(self, target_dtype="INT8"): fallback_stats = defaultdict(int) for optype in self._optype_statistics: @@ -1034,11 +1076,9 @@ def _calculate_fallback_op_count(self, target_dtype='INT8'): return fallback_stats[target_dtype] - - def _compare_optype_statistics(self, fields=None, optypes=None, - skip_fields=None, skip_optypes=None): - assert(fields == None or skip_fields == None) - assert(optypes == None or skip_optypes == None) + def _compare_optype_statistics(self, fields=None, optypes=None, skip_fields=None, skip_optypes=None): + assert fields is None or skip_fields is None + assert optypes is None or skip_optypes is None if not isinstance(self.adaptor, TensorFlowAdaptor): logger.debug("OpType statistics comparation is only available for TensorFlow adaptor.") return @@ -1046,22 +1086,22 @@ def _compare_optype_statistics(self, fields=None, optypes=None, adaptor_statistics = self.adaptor.optype_statistics def _field_skipped(field): - if fields != None: # pragma: no cover + if fields is not None: # pragma: no cover return field not in fields - elif skip_fields != None: + elif skip_fields is not None: return field in skip_fields def _optype_skipped(optype): - if optypes != None: # pragma: no cover + if optypes is not None: # pragma: no cover return optype not in optypes - elif skip_optypes != None: + elif skip_optypes is not None: return optype in skip_optypes - field_names = adaptor_statistics[0][1:] adaptor_data = { - line[0].lower() : {dtype : count for dtype, count in zip(field_names, line[1:])} - for line in adaptor_statistics[1]} + line[0].lower(): {dtype: count for dtype, count in zip(field_names, line[1:])} + for line in adaptor_statistics[1] + } strategy_data = self._optype_statistics # compare adaptor statistics to strategy statistics @@ -1069,17 +1109,20 @@ def _optype_skipped(optype): has_difference = False difference_count = 0 for optype in adaptor_data: - if optype not in strategy_data or _optype_skipped(optype): continue + if optype not in strategy_data or _optype_skipped(optype): + continue for field in field_names: - if _field_skipped(field): continue + if _field_skipped(field): + continue adaptor_count = adaptor_data[optype][field] strategy_count = strategy_data[optype][field] if adaptor_count != strategy_count: has_difference = True - if field == 'INT8': + if field == "INT8": difference_count += abs(strategy_count - adaptor_count) - logger.debug("\t{}: [adaptor: {} | tune_cfg: {}]".format( - (optype, field), adaptor_count, strategy_count)) + logger.debug( + "\t{}: [adaptor: {} | tune_cfg: {}]".format((optype, field), adaptor_count, strategy_count) + ) if not has_difference: logger.debug("\tNone") logger.debug(f"\tDifference(s) in total: {difference_count}") @@ -1102,14 +1145,15 @@ def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes): tuning config """ sq_alpha_list = recipes.get("smooth_quant_args", {}).get("alpha", []) - assert len(sq_alpha_list) > 0, "Only tune the smooth quant's alpha when user provide the alpha list,\ + assert ( + len(sq_alpha_list) > 0 + ), "Only tune the smooth quant's alpha when user provide the alpha list,\ but got alpha_list: {alpha_list}" logger.info("[STRATEGY] Start tuning smooth quant'alpha.") sq_sampler = tuning_sampler_dict.get_class("smooth_quant")(tuning_space, [], tuning_cfg, sq_alpha_list) for tune_cfg in sq_sampler: yield tune_cfg - def initial_tuning_cfg(self): """Init the tuning config. @@ -1120,21 +1164,21 @@ def initial_tuning_cfg(self): quant_mode_wise_items (OrderedDict): key is quant_mode/precision; value is item list. initial_op_tuning_cfg (OrderedDict): key is (op_name, op_type); value is the initialized tuning config. """ - from .utils.constant import auto_query_order, static_query_order, dynamic_query_order, \ - weight_only_query_order + from .utils.constant import auto_query_order, dynamic_query_order, static_query_order, weight_only_query_order from .utils.tuning_space import initial_tuning_cfg_with_quant_mode - if self.config.approach == 'post_training_auto_quant': + + if self.config.approach == "post_training_auto_quant": query_order = auto_query_order - elif self.config.approach == 'post_training_dynamic_quant': + elif self.config.approach == "post_training_dynamic_quant": query_order = dynamic_query_order - elif self.config.approach == 'post_training_static_quant': + elif self.config.approach == "post_training_static_quant": query_order = static_query_order - elif self.config.approach == 'post_training_weight_only': + elif self.config.approach == "post_training_weight_only": query_order = weight_only_query_order - elif self.config.approach == 'quant_aware_training': + elif self.config.approach == "quant_aware_training": query_order = auto_query_order - quant_mode_wise_items = OrderedDict() # mode, op_item_lst + quant_mode_wise_items = OrderedDict() # mode, op_item_lst pre_items = set() # Collect op items supported the specified mode. for quant_mode in query_order: @@ -1153,35 +1197,43 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): initial_op_tuning_cfg = {} for op_name_type, quant_mode in op_item_dtype_dict.items(): - initial_op_tuning_cfg[op_name_type] = initial_tuning_cfg_with_quant_mode(op_name_type, - quant_mode, - self.tuning_space) + initial_op_tuning_cfg[op_name_type] = initial_tuning_cfg_with_quant_mode( + op_name_type, quant_mode, self.tuning_space + ) return op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg def show_baseline_info(self): """Display the accuracy and duration of the the baseline model.""" if self.baseline: - self.tune_data['baseline'] = self.baseline[0] if \ - isinstance(self.baseline[0], list) else [self.baseline[0]] - for name, data in zip(self.metric_name, self.tune_data['baseline']): + self.tune_data["baseline"] = self.baseline[0] if isinstance(self.baseline[0], list) else [self.baseline[0]] + for name, data in zip(self.metric_name, self.tune_data["baseline"]): self.tune_data[name] = [data] if self.metric_weight: # baseline is weighted accuracy - self.tune_data['Weighted accuracy'] = \ - [np.mean(np.array(self.tune_data['baseline']) * self.metric_weight)] - self.tune_data['baseline'] = self.tune_data['Weighted accuracy'] - baseline_msg = '[Accuracy:' + \ - ''.join([' {:.4f}'.format(i) for i in self.tune_data['baseline']]) + \ - ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ - self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) + ']' - else: # pragma: no cover + self.tune_data["Weighted accuracy"] = [ + np.mean(np.array(self.tune_data["baseline"]) * self.metric_weight) + ] + self.tune_data["baseline"] = self.tune_data["Weighted accuracy"] + baseline_msg = ( + "[Accuracy:" + + "".join([" {:.4f}".format(i) for i in self.tune_data["baseline"]]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + ) + else: # pragma: no cover if self.metric_weight: - self.tune_data['Weighted accuracy'] = ['n/a'] - self.tune_data['baseline'] = ['n/a'] + self.tune_data["Weighted accuracy"] = ["n/a"] + self.tune_data["baseline"] = ["n/a"] - for name, data in zip(self.metric_name, self.tune_data['baseline']): - self.tune_data[name] = ['n/a'] - baseline_msg = 'n/a' + for name, data in zip(self.metric_name, self.tune_data["baseline"]): + self.tune_data[name] = ["n/a"] + baseline_msg = "n/a" logger.info("FP32 baseline is: {}".format(baseline_msg)) def initial_best_acc(self): @@ -1191,10 +1243,11 @@ def initial_best_acc(self): The initial value of best accuracy. """ if len(self.metric_name) == 1 or self.metric_weight is not None: - best_acc = float('-inf') if self.higher_is_better else float('inf') + best_acc = float("-inf") if self.higher_is_better else float("inf") else: - best_acc = [float('-inf') if higher_is_better else float('inf') for \ - higher_is_better in self.metric_criterion] + best_acc = [ + float("-inf") if higher_is_better else float("inf") for higher_is_better in self.metric_criterion + ] return best_acc def _tune_cfg_converter(self, op_tuning_cfg): @@ -1203,74 +1256,77 @@ def _tune_cfg_converter(self, op_tuning_cfg): Args: op_tuning_cfg (Dict): the op tuning config. """ - tune_cfg = {'op': OrderedDict()} + tune_cfg = {"op": OrderedDict()} for op_name_type, op_config in op_tuning_cfg.items(): if isinstance(op_config, OpTuningConfig): - tune_cfg['op'][op_name_type] = op_config.get_state() - op_cap_lst = self.capability['opwise'][op_name_type] + tune_cfg["op"][op_name_type] = op_config.get_state() + op_cap_lst = self.capability["opwise"][op_name_type] # Add pattern for diagnosis for op_cap in op_cap_lst: - if 'pattern' in op_cap: + if "pattern" in op_cap: op_pattern = {} - op_pattern['sequence'] = op_cap['pattern']['sequence'][0] if\ - 'sequence' in op_cap['pattern'] else None - op_pattern['precision'] = op_cap['pattern']['precision'][0] if\ - 'precision' in op_cap['pattern'] else None - tune_cfg['op'][op_name_type]['pattern'] = op_pattern + op_pattern["sequence"] = ( + op_cap["pattern"]["sequence"][0] if "sequence" in op_cap["pattern"] else None + ) + op_pattern["precision"] = ( + op_cap["pattern"]["precision"][0] if "precision" in op_cap["pattern"] else None + ) + tune_cfg["op"][op_name_type]["pattern"] = op_pattern else: tune_cfg[op_name_type] = op_config - tune_cfg['calib_sampling_size'] = op_tuning_cfg['calib_sampling_size'] + tune_cfg["calib_sampling_size"] = op_tuning_cfg["calib_sampling_size"] if self.calib_dataloader is not None: # For the accelerate's DataLoaderShard, use total_batch_size instead of batch_size - bs = getattr(self.calib_dataloader, 'batch_size') or getattr(self.calib_dataloader, 'total_batch_size') + bs = getattr(self.calib_dataloader, "batch_size") or getattr(self.calib_dataloader, "total_batch_size") assert bs > 0, f"Calibration dataloader's batch size should be greater than one but got {bs}" - tune_cfg['calib_iteration'] = math.ceil(int(tune_cfg['calib_sampling_size']) / bs) + tune_cfg["calib_iteration"] = math.ceil(int(tune_cfg["calib_sampling_size"]) / bs) else: - tune_cfg['calib_iteration'] = 1 - tune_cfg['approach'] = self.config.approach + tune_cfg["calib_iteration"] = 1 + tune_cfg["approach"] = self.config.approach # Add the recipe config - tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) + tune_cfg["recipe_cfgs"] = tune_cfg.get("recipe_cfgs", {}) # For not tuning recipe, tune cfg use it directly - tune_cfg['recipe_cfgs'].update(self._not_tuning_recipes_values) - tune_cfg['trial_number'] = deepcopy(self.trials_count) + tune_cfg["recipe_cfgs"].update(self._not_tuning_recipes_values) + tune_cfg["trial_number"] = deepcopy(self.trials_count) # The sq-related args comes from user config, current best tuning config # TODO simplify the logic for transforming the arguments # update the sq-related args from self.cur_best_tuning_cfg if self.cur_best_tuning_cfg: - for arg in ['smooth_quant', 'smooth_quant_args']: - if arg in tune_cfg['recipe_cfgs']: + for arg in ["smooth_quant", "smooth_quant_args"]: + if arg in tune_cfg["recipe_cfgs"]: continue - val = self.cur_best_tuning_cfg.get('recipe_cfgs', {}).get(arg, None) - if val: tune_cfg['recipe_cfgs'][arg] = val + val = self.cur_best_tuning_cfg.get("recipe_cfgs", {}).get(arg, None) + if val: + tune_cfg["recipe_cfgs"][arg] = val # TODO simplify the check logic # update the sq-related args from user config for k, v in self.config.recipes.get("smooth_quant_args", {}).items(): - if k not in tune_cfg['recipe_cfgs'].get('smooth_quant_args', {}): + if k not in tune_cfg["recipe_cfgs"].get("smooth_quant_args", {}): if k == "alpha": # for O0, pass the first value to alpha if isinstance(v, list) and len(v) >= 1: v = v[0] - tune_cfg['recipe_cfgs'].setdefault('smooth_quant_args', {})[k] = v - if 'layer_wise_quant_args' in self.config.recipes: - tune_cfg['recipe_cfgs']['layer_wise_quant_args'] = self.config.recipes['layer_wise_quant_args'] + tune_cfg["recipe_cfgs"].setdefault("smooth_quant_args", {})[k] = v + if "layer_wise_quant_args" in self.config.recipes: + tune_cfg["recipe_cfgs"]["layer_wise_quant_args"] = self.config.recipes["layer_wise_quant_args"] # For tuning recipe, use the default value if it not specified by recipe tuning sampler. for recipe_name, recipe_val in self._tuning_recipes_default_values.items(): - if recipe_name not in tune_cfg['recipe_cfgs']: - tune_cfg['recipe_cfgs'][recipe_name] = recipe_val + if recipe_name not in tune_cfg["recipe_cfgs"]: + tune_cfg["recipe_cfgs"][recipe_name] = recipe_val return tune_cfg - + def _get_calib_iter(self): calib_sampling_size_lst = self.config.calibration_sampling_size calib_sampling_size_lst = [int(calib_sampling_size) for calib_sampling_size in calib_sampling_size_lst] if self.calib_dataloader: # For the accelerate's DataLoaderShard, use total_batch_size instead of batch_size - bs = getattr(self.calib_dataloader, 'batch_size') or getattr(self.calib_dataloader, 'total_batch_size') + bs = getattr(self.calib_dataloader, "batch_size") or getattr(self.calib_dataloader, "total_batch_size") assert bs > 0, f"Calibration dataloader's batch size should be greater than one but got {bs}" calib_iter = [math.ceil(int(x) / bs) for x in calib_sampling_size_lst] else: calib_iter = 1 return calib_sampling_size_lst, calib_iter - + def build_tuning_space(self, config): """Create the tuning space. @@ -1280,10 +1336,7 @@ def build_tuning_space(self, config): config: The Conf class instance includes all user configurations. """ # create tuning space - adaptor_cap = { - 'calib': {'calib_sampling_size': self.calib_sampling_size_lst}, - 'op': self.capability['opwise'] - } + adaptor_cap = {"calib": {"calib_sampling_size": self.calib_sampling_size_lst}, "op": self.capability["opwise"]} tuning_space = TuningSpace(adaptor_cap, conf=config, framework=self.framework) return tuning_space @@ -1295,18 +1348,16 @@ def setup_resume(self, resume): """ self.__dict__.update(resume) for history in self.tuning_history: - if self._same_conf(history['cfg'], self.conf): # pragma: no cover - self.__dict__.update({k: v for k, v in history.items() \ - if k not in ['version', 'history']}) + if self._same_conf(history["cfg"], self.conf): # pragma: no cover + self.__dict__.update({k: v for k, v in history.items() if k not in ["version", "history"]}) logger.info("Start to resume tuning process.") # resume the best tuning model if needed try: - index = history['id'] - 1 - resume_tuning_cfg = history['history'][index]['tune_cfg'] - self.best_qmodel = self.adaptor.quantize(resume_tuning_cfg, - self.model, - self.calib_dataloader, - self.q_func) + index = history["id"] - 1 + resume_tuning_cfg = history["history"][index]["tune_cfg"] + self.best_qmodel = self.adaptor.quantize( + resume_tuning_cfg, self.model, self.calib_dataloader, self.q_func + ) except: logger.debug("Can not resume the best quantize model from history.") @@ -1314,89 +1365,96 @@ def setup_resume(self, resume): def check_q_func(self): """Check the training function for quantization aware training.""" - if self.config.approach == 'quant_aware_training': - assert self.q_func != None, "Please set train func for quantization aware training" + if self.config.approach == "quant_aware_training": + assert self.q_func is not None, "Please set train func for quantization aware training" def _create_path(self, custom_path, filename): - new_path = os.path.join(os.path.abspath(os.path.expanduser(custom_path)),filename) + new_path = os.path.join(os.path.abspath(os.path.expanduser(custom_path)), filename) path = Path(os.path.dirname(new_path)) path.mkdir(exist_ok=True, parents=True) return new_path def _set_framework_info(self, q_dataloader, q_func=None): - framework_specific_info = {'device': getattr(self.config, 'device', None), - 'approach': getattr(self.config, 'approach', None), - 'random_seed': options.random_seed, - 'performance_only': self._not_tuning} + framework_specific_info = { + "device": getattr(self.config, "device", None), + "approach": getattr(self.config, "approach", None), + "random_seed": options.random_seed, + "performance_only": self._not_tuning, + } framework = self.config.framework.lower() - framework_specific_info.update({'backend': self.config.backend}) - framework_specific_info.update({'format': getattr(self.config, 'quant_format', None)}) - framework_specific_info.update({'domain': getattr(self.config, 'domain', None)}) + framework_specific_info.update({"backend": self.config.backend}) + framework_specific_info.update({"format": getattr(self.config, "quant_format", None)}) + framework_specific_info.update({"domain": getattr(self.config, "domain", None)}) self.mixed_precision_mode = isinstance(self.config, MixedPrecisionConfig) - if 'tensorflow' in framework: + if "tensorflow" in framework: + framework_specific_info.update( + { + "inputs": self.config.inputs, + "outputs": self.config.outputs, + "workspace_path": options.workspace, + "recipes": self.config.recipes, + "use_bf16": self.config.use_bf16 if self.config.use_bf16 is not None else False, + } + ) + for item in ["scale_propagation_max_pooling", "scale_propagation_concat"]: + if framework_specific_info["recipes"] and item not in framework_specific_info["recipes"]: + framework_specific_info["recipes"].update({item: True}) + if self.config.backend == "itex": + framework = "tensorflow_itex" + if "keras" in framework: framework_specific_info.update( - {"inputs": self.config.inputs, - "outputs": self.config.outputs, - 'workspace_path': options.workspace, - 'recipes': self.config.recipes, - 'use_bf16': self.config.use_bf16 if self.config.use_bf16 is not None else False}) - for item in ['scale_propagation_max_pooling', 'scale_propagation_concat']: - if framework_specific_info['recipes'] and item not in framework_specific_info['recipes']: - framework_specific_info['recipes'].update({item: True}) - if self.config.backend == 'itex': - framework = 'tensorflow_itex' - if 'keras' in framework: - framework_specific_info.update({ - 'workspace_path': options.workspace, }) - if framework == 'mxnet': + { + "workspace_path": options.workspace, + } + ) + if framework == "mxnet": framework_specific_info.update({"q_dataloader": q_dataloader}) - if 'onnx' in framework.lower(): + if "onnx" in framework.lower(): if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) framework_specific_info.update({"deploy_path": os.path.dirname(self.deploy_path)}) - framework_specific_info.update({'workspace_path': options.workspace}) - framework_specific_info.update({'recipes': self.config.recipes}) - framework_specific_info.update({'reduce_range': self.config.reduce_range}) - framework_specific_info.update({'recipes': self.config.recipes}) - if framework_specific_info['backend'] in ['onnxrt_trt_ep', 'onnxrt_cuda_ep'] and \ - 'gpu' not in framework_specific_info['device']: - logger.warning('Please set device to gpu during using backend {}.'.format(self.config.backend)) + framework_specific_info.update({"workspace_path": options.workspace}) + framework_specific_info.update({"recipes": self.config.recipes}) + framework_specific_info.update({"reduce_range": self.config.reduce_range}) + framework_specific_info.update({"recipes": self.config.recipes}) + if ( + framework_specific_info["backend"] in ["onnxrt_trt_ep", "onnxrt_cuda_ep"] + and "gpu" not in framework_specific_info["device"] + ): + logger.warning("Please set device to gpu during using backend {}.".format(self.config.backend)) sys.exit(0) - if framework.lower() == 'onnxrt_qdq' or \ - framework_specific_info['backend'] == 'onnxrt_trt_ep': - framework_specific_info.update({'format': 'QDQ'}) - framework = 'onnxrt_qdq' - if framework_specific_info['backend'] == 'onnxrt_cuda_ep' and self.config.device =='gpu': - framework_specific_info['use_fp16'] = True - framework_specific_info['use_bf16'] = True - if framework_specific_info['backend'] == 'onnxrt_dnnl_ep' and self.config.device == 'cpu': - framework_specific_info['use_bf16'] = True - if self.config.approach =='post_training_weight_only': - framework = 'onnxrt_weightonly' # use specific adaptor for weight_only approach - - if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - if self.config.backend == 'ipex': - framework = 'pytorch_ipex' - elif self.config.backend == 'default': - framework = 'pytorch_fx' + if framework.lower() == "onnxrt_qdq" or framework_specific_info["backend"] == "onnxrt_trt_ep": + framework_specific_info.update({"format": "QDQ"}) + framework = "onnxrt_qdq" + if framework_specific_info["backend"] == "onnxrt_cuda_ep" and self.config.device == "gpu": + framework_specific_info["use_fp16"] = True + framework_specific_info["use_bf16"] = True + if framework_specific_info["backend"] == "onnxrt_dnnl_ep" and self.config.device == "cpu": + framework_specific_info["use_bf16"] = True + if self.config.approach == "post_training_weight_only": + framework = "onnxrt_weightonly" # use specific adaptor for weight_only approach + + if framework == "pytorch_ipex" or framework == "pytorch" or framework == "pytorch_fx": + if self.config.backend == "ipex": + framework = "pytorch_ipex" + elif self.config.backend == "default": + framework = "pytorch_fx" if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) - framework_specific_info.update({'recipes': self.config.recipes}) + framework_specific_info.update({"recipes": self.config.recipes}) framework_specific_info.update({"q_dataloader": q_dataloader}) - framework_specific_info.update({"use_bf16": self.config.use_bf16 \ - if self.config.use_bf16 is not None else True}) framework_specific_info.update( - {"workspace_path": os.path.dirname(self.deploy_path)}) - if self.config.op_name_dict is not None \ - and 'default_qconfig' in self.config.op_name_dict: - framework_specific_info.update( - {"default_qconfig": self.config.op_name_dict['default_qconfig']}) + {"use_bf16": self.config.use_bf16 if self.config.use_bf16 is not None else True} + ) + framework_specific_info.update({"workspace_path": os.path.dirname(self.deploy_path)}) + if self.config.op_name_dict is not None and "default_qconfig" in self.config.op_name_dict: + framework_specific_info.update({"default_qconfig": self.config.op_name_dict["default_qconfig"]}) framework_specific_info.update({"q_func": q_func}) framework_specific_info.update({"example_inputs": self.config.example_inputs}) - if self.config.approach =='post_training_weight_only': - framework = 'pytorchweightonly' # use specific adaptor for weight_only approach + if self.config.approach == "post_training_weight_only": + framework = "pytorchweightonly" # use specific adaptor for weight_only approach return framework, framework_specific_info def _set_objectives(self): @@ -1405,7 +1463,7 @@ def _use_multi_obj_check(obj): if isinstance(obj, list): return len(obj) > 1 elif isinstance(obj, dict): - return len(obj.get('objective', [])) > 1 + return len(obj.get("objective", [])) > 1 self.higher_is_better = bool(self.config.accuracy_criterion.higher_is_better) obj_higher_is_better = None @@ -1414,16 +1472,16 @@ def _use_multi_obj_check(obj): use_multi_objs = _use_multi_obj_check(obj) self.use_multi_objective = False if use_multi_objs: - obj_higher_is_better = obj.get('higher_is_better', None) - obj_weight = obj.get('weight', None) - obj_lst = obj.get('objective', []) + obj_higher_is_better = obj.get("higher_is_better", None) + obj_weight = obj.get("weight", None) + obj_lst = obj.get("objective", []) objectives = [i.lower() for i in obj_lst] self.use_multi_objective = True else: objectives = [val.lower() for val in obj] # set metric - self.metric_name = ['Accuracy'] + self.metric_name = ["Accuracy"] self.metric_criterion = [self.higher_is_better] self.metric_weight = None use_multi_metrics = False @@ -1431,9 +1489,9 @@ def _use_multi_obj_check(obj): # metric name # 'weight','higher_is_better', 'metric1', 'metric2', ... if len(self.eval_metric.keys()) >= 4: - self.metric_name = self.eval_metric.keys() - {'weight','higher_is_better'} + self.metric_name = self.eval_metric.keys() - {"weight", "higher_is_better"} use_multi_metrics = True - metric_higher_is_better = self.eval_metric.get('higher_is_better', None) + metric_higher_is_better = self.eval_metric.get("higher_is_better", None) # metric criterion if use_multi_metrics: if metric_higher_is_better is not None: @@ -1441,24 +1499,27 @@ def _use_multi_obj_check(obj): else: self.metric_criterion = [True] * len(self.metric_name) # metric weight - self.metric_weight = self.eval_metric.get('weight', None) + self.metric_weight = self.eval_metric.get("weight", None) - accuracy_criterion = {'relative': 0.01, 'higher_is_better': True} + accuracy_criterion = {"relative": 0.01, "higher_is_better": True} accuracy_criterion_conf = self.config.accuracy_criterion accuracy_criterion[accuracy_criterion_conf.criterion] = accuracy_criterion_conf.tolerable_loss - accuracy_criterion['higher_is_better'] = accuracy_criterion_conf.higher_is_better - objectives = MultiObjective(objectives=objectives, - accuracy_criterion=accuracy_criterion, - metric_criterion=self.metric_criterion, - metric_weight=self.metric_weight, - obj_criterion=obj_higher_is_better, - obj_weight=obj_weight) + accuracy_criterion["higher_is_better"] = accuracy_criterion_conf.higher_is_better + objectives = MultiObjective( + objectives=objectives, + accuracy_criterion=accuracy_criterion, + metric_criterion=self.metric_criterion, + metric_weight=self.metric_weight, + obj_criterion=obj_higher_is_better, + obj_weight=obj_weight, + ) return objectives def _same_conf(self, src_conf, dst_conf): """Check if the two configs are the same.""" from ..utils.utility import compare_objects - return compare_objects(src_conf, dst_conf, {'_options', '_tuning', '_accuracy', 'trial_number'}) + + return compare_objects(src_conf, dst_conf, {"_options", "_tuning", "_accuracy", "trial_number"}) def update_best_op_tuning_cfg(self, op_tuning_cfg): """Track and update the best tuning config with correspondence accuracy result. @@ -1473,21 +1534,27 @@ def update_best_op_tuning_cfg(self, op_tuning_cfg): if self.cur_best_tuning_cfg is None: self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) logger.debug("[Strategy] Updated the current best tuning config as the last one is None.") - if not isinstance(acc, list) and ((self.higher_is_better and acc >= self.cur_best_acc) \ - or (not self.higher_is_better and acc <= self.cur_best_acc)): + if not isinstance(acc, list) and ( + (self.higher_is_better and acc >= self.cur_best_acc) + or (not self.higher_is_better and acc <= self.cur_best_acc) + ): self.cur_best_acc = acc self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) logger.debug("[Strategy] Updated the current best tuning config as got better accuracy.") elif len(self.metric_name) > 1 and self.metric_weight is not None: acc = np.mean(np.array(acc) * self.metric_weight) - if (self.higher_is_better and acc >= self.cur_best_acc) or \ - (not self.higher_is_better and acc <= self.cur_best_acc): + if (self.higher_is_better and acc >= self.cur_best_acc) or ( + not self.higher_is_better and acc <= self.cur_best_acc + ): self.cur_best_acc = acc self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) elif len(self.metric_name) > 1 and self.metric_weight is None: - if all([acc_i >= best_i if higher_is_better else acc_i <= best_i for \ - acc_i, best_i, higher_is_better in \ - zip(acc, self.cur_best_acc, self.metric_criterion)]): + if all( + [ + acc_i >= best_i if higher_is_better else acc_i <= best_i + for acc_i, best_i, higher_is_better in zip(acc, self.cur_best_acc, self.metric_criterion) + ] + ): self.cur_best_acc = acc self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) logger.debug(f"Best acc is {self.cur_best_acc}.") @@ -1497,22 +1564,22 @@ def deploy_config(self): """Save the configuration locally for deployment.""" self.deploy_cfg = OrderedDict() model_cfg = dict() - model_cfg['inputs'] = self.config.inputs - model_cfg['outputs'] = self.config.outputs - model_cfg['backend'] = self.config.backend - model_cfg['quant_format'] = self.config.quant_format - model_cfg['domain'] = self.config.domain - model_cfg['backend'] = self.config.backend - self.deploy_cfg['model'] = model_cfg - self.deploy_cfg['device'] = self.config.device + model_cfg["inputs"] = self.config.inputs + model_cfg["outputs"] = self.config.outputs + model_cfg["backend"] = self.config.backend + model_cfg["quant_format"] = self.config.quant_format + model_cfg["domain"] = self.config.domain + model_cfg["backend"] = self.config.backend + self.deploy_cfg["model"] = model_cfg + self.deploy_cfg["device"] = self.config.device def setup_yaml(): - represent_dict_order = lambda self, \ - data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) + represent_dict_order = lambda self, data: self.represent_mapping("tag:yaml.org,2002:map", data.items()) yaml.add_representer(OrderedDict, represent_dict_order) yaml.add_representer(DotDict, represent_dict_order) + setup_yaml() - with open(self.deploy_path, 'w+') as f: + with open(self.deploy_path, "w+") as f: yaml.dump(self.deploy_cfg, f) logger.info("Save deploy yaml to {}".format(self.deploy_path)) @@ -1539,9 +1606,7 @@ def _evaluate(self, model): # Pytorch can insert observer to model in this hook. # Tensorflow don't support this mode for now model = self.adaptor._pre_eval_hook(model) - val = self.objectives.evaluate( - self.eval_func, model if self.framework == "pytorch_ipex" else model.model - ) + val = self.objectives.evaluate(self.eval_func, model if self.framework == "pytorch_ipex" else model.model) if options.tensorboard: # post_eval_hook to deal the tensor self.adaptor._post_eval_hook(model, accuracy=val[0]) @@ -1551,47 +1616,56 @@ def _evaluate(self, model): postprocess_cfg = None metric_cfg = self.eval_metric iteration = -1 - eval_func = create_eval_func(self.framework, + eval_func = create_eval_func( + self.framework, self.eval_dataloader, self.adaptor, metric_cfg, postprocess_cfg, iteration, - tensorboard = options.tensorboard, - fp32_baseline = self.baseline == None) + tensorboard=options.tensorboard, + fp32_baseline=self.baseline is None, + ) - if getattr(self.eval_dataloader, 'distributed', False): - if 'tensorflow' in self.framework: + if getattr(self.eval_dataloader, "distributed", False): + if "tensorflow" in self.framework: import horovod.tensorflow as hvd - elif self.framework in ['pytorch_ipex','pytorch','pytorch_fx']: + elif self.framework in ["pytorch_ipex", "pytorch", "pytorch_fx"]: import horovod.torch as hvd else: - raise NotImplementedError("Currently only TensorFlow and PyTorch " - "support distributed inference in PTQ.") + raise NotImplementedError( + "Currently only TensorFlow and PyTorch " "support distributed inference in PTQ." + ) hvd.init() try: len_dataloader = len(self.eval_dataloader) except: - logger.info("The length of the distributed dataloader is unknown." - "When the iteration of evaluation dataloader in each " - "process is inconsistent, an error may occur.") + logger.info( + "The length of the distributed dataloader is unknown." + "When the iteration of evaluation dataloader in each " + "process is inconsistent, an error may occur." + ) else: list_len_dataloader = hvd.allgather_object(len_dataloader) if hvd.rank() == 0: - for i in range(len(list_len_dataloader)-1): - if list_len_dataloader[i] != list_len_dataloader[i+1]: - raise AttributeError("The evaluation dataloader's iteration is" - "different between processes, please reset " - "dataloader's batch_size.") + for i in range(len(list_len_dataloader) - 1): + if list_len_dataloader[i] != list_len_dataloader[i + 1]: + raise AttributeError( + "The evaluation dataloader's iteration is" + "different between processes, please reset " + "dataloader's batch_size." + ) val = self.objectives.evaluate(eval_func, model) if isinstance(val[0], list): - assert all([np.isscalar(i) for i in val[0]]), \ - "The eval_func should return a scalar or list of scalar, " \ - "but not {}!".format(str([type(i) for i in val[0]])) + assert all( + [np.isscalar(i) for i in val[0]] + ), "The eval_func should return a scalar or list of scalar, " "but not {}!".format( + str([type(i) for i in val[0]]) + ) else: - assert np.isscalar(val[0]), \ - "The eval_func should return a scalar or list of scalar, " \ - "but not {}!".format(str(type(val[0]))) + assert np.isscalar(val[0]), "The eval_func should return a scalar or list of scalar, " "but not {}!".format( + str(type(val[0])) + ) return val @@ -1601,7 +1675,7 @@ def __getstate__(self): Returns: dict: Saved dict for resuming """ - return {'tuning_history': self.tuning_history} + return {"tuning_history": self.tuning_history} def __setstate__(self, d): """Magic method for pickle loading. @@ -1620,8 +1694,7 @@ def stop(self, timeout, trials_count): bool: True if need stop, otherwise False """ need_stop = False - if self._not_tuning or \ - self.objectives.compare(self.best_tune_result, self.baseline): + if self._not_tuning or self.objectives.compare(self.best_tune_result, self.baseline): self.best_tune_result = self.last_tune_result self.best_qmodel = self.last_qmodel self.best_tuning_cfg = copy.deepcopy(self.last_tune_cfg) @@ -1630,18 +1703,19 @@ def stop(self, timeout, trials_count): self.metric_met_point = self.tuning_times # track the model with highest acc - if self.best_tune_result and self.last_tune_result: # (acc, [perf]) + if self.best_tune_result and self.last_tune_result: # (acc, [perf]) if self.re_quant and self.objectives.accuracy_meets(): self.best_tune_result = self.last_tune_result self.best_qmodel = self.last_qmodel self.best_tuning_cfg = copy.deepcopy(self.last_tune_cfg) logger.debug(f"*** Update the best qmodel with the result {self.best_tune_result}.") else: - logger.debug(f"*** Accuracy not meets the requirements, do not update the best qmodel.") + logger.debug("*** Accuracy not meets the requirements, do not update the best qmodel.") if self.last_tune_result: - last_tune = self.last_tune_result[0] if \ - isinstance(self.last_tune_result[0], list) else [self.last_tune_result[0]] + last_tune = ( + self.last_tune_result[0] if isinstance(self.last_tune_result[0], list) else [self.last_tune_result[0]] + ) for name, data in zip(self.metric_name, last_tune): if len(self.tune_data[name]) == 1: @@ -1652,31 +1726,39 @@ def stop(self, timeout, trials_count): if self.metric_weight and len(last_tune) > 1: weighted_acc = np.mean(np.array(last_tune) * self.metric_weight) - if len(self.tune_data['Weighted accuracy']) == 1: - self.tune_data['Weighted accuracy'].append(weighted_acc) + if len(self.tune_data["Weighted accuracy"]) == 1: + self.tune_data["Weighted accuracy"].append(weighted_acc) else: - self.tune_data['Weighted accuracy'][1] = weighted_acc + self.tune_data["Weighted accuracy"][1] = weighted_acc last_tune = [weighted_acc] - last_tune_msg = '[Accuracy (int8|fp32):' + \ - ''.join([' {:.4f}|{:.4f}'.format(last, base) for last, base in \ - zip(last_tune, self.tune_data['baseline'])]) + \ - ''.join([', {} (int8|fp32): {:.4f}|{:.4f}'.format( \ - x, y, z) for x, y, z in zip( \ - self.objectives.representation, self.last_tune_result[1], self.baseline[1]) \ - if x != 'Accuracy']) + ']' - else: # pragma: no cover - last_tune_msg = 'n/a' - for name in self.tune_data.keys() - {'baseline'}: + last_tune_msg = ( + "[Accuracy (int8|fp32):" + + "".join( + [" {:.4f}|{:.4f}".format(last, base) for last, base in zip(last_tune, self.tune_data["baseline"])] + ) + + "".join( + [ + ", {} (int8|fp32): {:.4f}|{:.4f}".format(x, y, z) + for x, y, z in zip(self.objectives.representation, self.last_tune_result[1], self.baseline[1]) + if x != "Accuracy" + ] + ) + + "]" + ) + else: # pragma: no cover + last_tune_msg = "n/a" + for name in self.tune_data.keys() - {"baseline"}: if len(self.tune_data[name]) == 1: - self.tune_data[name].append('n/a') + self.tune_data[name].append("n/a") else: - self.tune_data[name][1] = 'n/a' + self.tune_data[name][1] = "n/a" if self.best_tune_result: - best_tune = self.best_tune_result[0] if isinstance(self.best_tune_result[0], list) \ - else [self.best_tune_result[0]] + best_tune = ( + self.best_tune_result[0] if isinstance(self.best_tune_result[0], list) else [self.best_tune_result[0]] + ) for name, data in zip(self.metric_name, best_tune): if len(self.tune_data[name]) == 2: @@ -1687,60 +1769,83 @@ def stop(self, timeout, trials_count): if self.metric_weight and len(best_tune) > 1: weighted_acc = np.mean(np.array(best_tune) * self.metric_weight) - if len(self.tune_data['Weighted accuracy']) == 2: - self.tune_data['Weighted accuracy'].append(weighted_acc) - else: # pragma: no cover - self.tune_data['Weighted accuracy'][2] = weighted_acc + if len(self.tune_data["Weighted accuracy"]) == 2: + self.tune_data["Weighted accuracy"].append(weighted_acc) + else: # pragma: no cover + self.tune_data["Weighted accuracy"][2] = weighted_acc best_tune = [weighted_acc] - best_tune_msg = '[Accuracy:' + ''.join([' {:.4f}'.format(best) \ - for best in best_tune]) + ''.join([', {}: {:.4f}'.format(x,y) \ - for x,y in zip(self.objectives.representation, \ - self.best_tune_result[1]) if x != 'Accuracy']) + ']' + best_tune_msg = ( + "[Accuracy:" + + "".join([" {:.4f}".format(best) for best in best_tune]) + + "".join( + [ + ", {}: {:.4f}".format(x, y) + for x, y in zip(self.objectives.representation, self.best_tune_result[1]) + if x != "Accuracy" + ] + ) + + "]" + ) else: - best_tune_msg = 'n/a' - for name in self.tune_data.keys() - {'baseline'}: + best_tune_msg = "n/a" + for name in self.tune_data.keys() - {"baseline"}: if len(self.tune_data[name]) == 2: - self.tune_data[name].append('n/a') + self.tune_data[name].append("n/a") else: - self.tune_data[name][2] = 'n/a' - - logger.info("Tune {} result is: {}, Best tune result is: {}".format(self.trials_count, - last_tune_msg, - best_tune_msg)) - output_data = [[info_type, - '{:.4f} '.format(self.tune_data[info_type][0]) if \ - not isinstance(self.tune_data[info_type][0], str) else self.tune_data[info_type][0], - '{:.4f} '.format(self.tune_data[info_type][1]) if \ - not isinstance(self.tune_data[info_type][1], str) else self.tune_data[info_type][1], - '{:.4f} '.format(self.tune_data[info_type][2]) if \ - not isinstance(self.tune_data[info_type][2], str) else self.tune_data[info_type][2]] \ - for info_type in self.tune_data.keys() if info_type != 'baseline'] - - output_data.extend([[obj, - '{:.4f} '.format(self.baseline[1][i]) if self.baseline else 'n/a', - '{:.4f} '.format(self.last_tune_result[1][i]) if self.last_tune_result else 'n/a', - '{:.4f} '.format(self.best_tune_result[1][i]) if self.best_tune_result else 'n/a'] \ - for i, obj in enumerate(self.objectives.representation)]) + self.tune_data[name][2] = "n/a" + + logger.info( + "Tune {} result is: {}, Best tune result is: {}".format(self.trials_count, last_tune_msg, best_tune_msg) + ) + output_data = [ + [ + info_type, + "{:.4f} ".format(self.tune_data[info_type][0]) + if not isinstance(self.tune_data[info_type][0], str) + else self.tune_data[info_type][0], + "{:.4f} ".format(self.tune_data[info_type][1]) + if not isinstance(self.tune_data[info_type][1], str) + else self.tune_data[info_type][1], + "{:.4f} ".format(self.tune_data[info_type][2]) + if not isinstance(self.tune_data[info_type][2], str) + else self.tune_data[info_type][2], + ] + for info_type in self.tune_data.keys() + if info_type != "baseline" + ] + + output_data.extend( + [ + [ + obj, + "{:.4f} ".format(self.baseline[1][i]) if self.baseline else "n/a", + "{:.4f} ".format(self.last_tune_result[1][i]) if self.last_tune_result else "n/a", + "{:.4f} ".format(self.best_tune_result[1][i]) if self.best_tune_result else "n/a", + ] + for i, obj in enumerate(self.objectives.representation) + ] + ) self.tuning_result_data = output_data - Statistics(output_data, - header='Tune Result Statistics', - field_names=['Info Type', 'Baseline', 'Tune {} result'.format(self.trials_count), \ - 'Best tune result']).print_stat() + Statistics( + output_data, + header="Tune Result Statistics", + field_names=["Info Type", "Baseline", "Tune {} result".format(self.trials_count), "Best tune result"], + ).print_stat() # exit policy # 1. not_tuning(performance_only): only quantize the model without tuning or evaluation. # 2. timeout = 0, exit the tuning process once it is found model meets the accuracy requirement. # 3. max_trials, the number of the actually trials is less or equal to the max_trials # There are two ways to use max_trials to dominate the exit policy. - # 1) timeout = 0, the tuning process exit when the actual_trails_count >= max_trials or + # 1) timeout = 0, the tuning process exit when the actual_trails_count >= max_trials or # a quantized model meets the accuracy requirements # 2) timeout = inf, the tuning process exit until the trials_count >= max_trials # Some use case: # 1) Ending tuning process after a quantized model meets the accuracy requirements # max_trials = inf, timeout = 0 (by default) # the default max_trials is 100 - # value of timeout. max_trials control the exit policy + # value of timeout. max_trials control the exit policy # 2) Even after finding a model that meets the accuracy goal, we may want to continue the # tuning process for better performance or other objectives. # timeout = 100000, max_trials = 10 # Specifics a fairly large timeout, use max_trials @@ -1778,9 +1883,9 @@ def _find_tuning_history(self, tune_cfg): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same config, excluding # some fields in tuning section of config, such as tensorboard, snapshot, resume. - if self._same_conf(tuning_history['cfg'], self.conf): - for history in tuning_history['history']: - if history and equal_dicts(history['tune_cfg'], tune_cfg, ignore_keys=['trial_number']): + if self._same_conf(tuning_history["cfg"], self.conf): + for history in tuning_history["history"]: + if history and equal_dicts(history["tune_cfg"], tune_cfg, ignore_keys=["trial_number"]): return tuning_history return None @@ -1794,9 +1899,9 @@ def _find_history(self, tune_cfg): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same config, excluding # some fields in tuning section of config, such as tensorboard, snapshot, resume. - if self._same_conf(tuning_history['cfg'], self.conf): - for history in tuning_history['history']: - if history and history['tune_cfg'] == tune_cfg: + if self._same_conf(tuning_history["cfg"], self.conf): + for history in tuning_history["history"]: + if history and history["tune_cfg"] == tune_cfg: return history return None @@ -1809,7 +1914,7 @@ def _find_self_tuning_history(self): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same config, excluding # some fields in tuning section of config, such as tensorboard, snapshot, resume. - if self._same_conf(tuning_history['cfg'], self.conf): + if self._same_conf(tuning_history["cfg"], self.conf): return tuning_history return None @@ -1843,28 +1948,28 @@ def _add_tuning_history(self, tune_cfg=None, tune_result=None, **kwargs): Note this record is added under same config. """ found = False - d = {'tune_cfg': tune_cfg, 'tune_result': tune_result} + d = {"tune_cfg": tune_cfg, "tune_result": tune_result} for tuning_history in self.tuning_history: - if self._same_conf(tuning_history['cfg'], self.conf): + if self._same_conf(tuning_history["cfg"], self.conf): d.update(kwargs) - tuning_history['history'].append(d) - tuning_history['last_tune_result'] = self.last_tune_result - tuning_history['best_tune_result'] = self.best_tune_result - tuning_history['cfg'] = self.conf + tuning_history["history"].append(d) + tuning_history["last_tune_result"] = self.last_tune_result + tuning_history["best_tune_result"] = self.best_tune_result + tuning_history["cfg"] = self.conf found = True break if not found: tuning_history = {} - tuning_history['version'] = __version__ - tuning_history['cfg'] = self.conf - tuning_history['baseline'] = self.baseline - tuning_history['last_tune_result'] = self.last_tune_result - tuning_history['best_tune_result'] = self.best_tune_result - tuning_history['history'] = [] + tuning_history["version"] = __version__ + tuning_history["cfg"] = self.conf + tuning_history["baseline"] = self.baseline + tuning_history["last_tune_result"] = self.last_tune_result + tuning_history["best_tune_result"] = self.best_tune_result + tuning_history["history"] = [] if tune_cfg and tune_result: d.update(kwargs) - tuning_history['history'].append(d) + tuning_history["history"].append(d) self.tuning_history.append(tuning_history) self._save() @@ -1879,19 +1984,20 @@ def _collect_ops_by_quant_mode(self, tune_cfg, quant_mode): def _need_do_diagnosis(self): """Check if need to do diagnosis or not.""" # if user specifies to do it and does not do it. - if getattr(self.config, 'diagnosis', None) is True and not self.diagnosis_done: + if getattr(self.config, "diagnosis", None) is True and not self.diagnosis_done: return True return False def _diagnosis(self, tune_cfg): """Dump diagnosis information.""" import logging + logger = logging.getLogger("neural_compressor") logger.debug("[Strategy] Start to do diagnosis (inspect tensor).") iteration_list = [1] - inspect_type = 'all' + inspect_type = "all" save_to_disk = True - save_path = os.path.join(options.workspace, 'inspect_saved') + save_path = os.path.join(options.workspace, "inspect_saved") inspect_node_lst, updated_cfg = self.adaptor.diagnosis_helper( self.model, self.last_qmodel, @@ -1905,25 +2011,29 @@ def _diagnosis(self, tune_cfg): op_list = list(set(op_list).intersection(inspect_node_lst)) # step1. inspect tensor - logger.debug(f'[Strategy] Start to inspect tensor :{op_list} in fp32 model.') - self.adaptor.inspect_tensor(self.model, - dataloader=self.calib_dataloader, - op_list=op_list, - iteration_list=iteration_list, - inspect_type=inspect_type, - save_to_disk=save_to_disk, - save_path=os.path.join(save_path, 'fp32'), - quantization_cfg=updated_cfg) - - logger.debug(f'[Strategy] Start to inspect tensor :{op_list} in quantized model.') - self.adaptor.inspect_tensor(self.last_qmodel, - dataloader=self.calib_dataloader, - op_list=op_list, - iteration_list=iteration_list, - inspect_type=inspect_type, - save_to_disk=save_to_disk, - save_path=os.path.join(save_path, 'quan'), - quantization_cfg=updated_cfg) + logger.debug(f"[Strategy] Start to inspect tensor :{op_list} in fp32 model.") + self.adaptor.inspect_tensor( + self.model, + dataloader=self.calib_dataloader, + op_list=op_list, + iteration_list=iteration_list, + inspect_type=inspect_type, + save_to_disk=save_to_disk, + save_path=os.path.join(save_path, "fp32"), + quantization_cfg=updated_cfg, + ) + + logger.debug(f"[Strategy] Start to inspect tensor :{op_list} in quantized model.") + self.adaptor.inspect_tensor( + self.last_qmodel, + dataloader=self.calib_dataloader, + op_list=op_list, + iteration_list=iteration_list, + inspect_type=inspect_type, + save_to_disk=save_to_disk, + save_path=os.path.join(save_path, "quan"), + quantization_cfg=updated_cfg, + ) print_op_list(workload_location=options.workspace) weights_details = get_weights_details(workload_location=options.workspace) diff --git a/neural_compressor/strategy/utils/constant.py b/neural_compressor/strategy/utils/constant.py index 1515f114e5b..842635c4781 100644 --- a/neural_compressor/strategy/utils/constant.py +++ b/neural_compressor/strategy/utils/constant.py @@ -14,27 +14,45 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Strategy constant.""" -PRECISION_LIST = ['bf16', 'fp16' , 'fp32'] -QUANT_MODE_SET = {'static', 'dynamic'} -LOWER_BIT_LIST = ['int4'] +PRECISION_LIST = ["bf16", "fp16", "fp32"] +QUANT_MODE_SET = {"static", "dynamic"} +LOWER_BIT_LIST = ["int4"] -TUNING_ITEMS_LST = [('activation','scheme'), ('activation','algorithm'), ('activation','granularity'), - ('weight','scheme'), ('weight','algorithm'), ('weight','granularity'), 'sampling_size'] -WEIGHT_ONLY_TUNING_ITEMS_LST = [('activation','scheme'), ('activation','algorithm'), ('activation','granularity'), - ('weight','scheme'), ('weight','algorithm'), ('weight','granularity'), - ('weight','bits'), ('weight','group_size'), ('weight','dtype'), 'sampling_size'] +TUNING_ITEMS_LST = [ + ("activation", "scheme"), + ("activation", "algorithm"), + ("activation", "granularity"), + ("weight", "scheme"), + ("weight", "algorithm"), + ("weight", "granularity"), + "sampling_size", +] +WEIGHT_ONLY_TUNING_ITEMS_LST = [ + ("activation", "scheme"), + ("activation", "algorithm"), + ("activation", "granularity"), + ("weight", "scheme"), + ("weight", "algorithm"), + ("weight", "granularity"), + ("weight", "bits"), + ("weight", "group_size"), + ("weight", "dtype"), + "sampling_size", +] -PRECISION_SET_V2_0 = {'fp32', 'bf16', 'fp16'} +PRECISION_SET_V2_0 = {"fp32", "bf16", "fp16"} -auto_query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] -static_query_order = ['static', 'bf16', 'fp16', 'fp32'] -dynamic_query_order = ['dynamic', 'bf16', 'fp16', 'fp32'] -auto_query_order_o0 = ['bf16', 'fp16', 'fp32', 'static', 'dynamic'] -weight_only_query_order = ['weight_only', 'fp32'] +auto_query_order = ["static", "dynamic", "bf16", "fp16", "fp32"] +static_query_order = ["static", "bf16", "fp16", "fp32"] +dynamic_query_order = ["dynamic", "bf16", "fp16", "fp32"] +auto_query_order_o0 = ["bf16", "fp16", "fp32", "static", "dynamic"] +weight_only_query_order = ["weight_only", "fp32"] -FALLBACK_RECIPES_SET = {'first_conv_or_matmul_quantization', 'last_conv_or_matmul_quantization', \ - 'pre_post_process_quantization'} \ No newline at end of file +FALLBACK_RECIPES_SET = { + "first_conv_or_matmul_quantization", + "last_conv_or_matmul_quantization", + "pre_post_process_quantization", +} diff --git a/neural_compressor/strategy/utils/tuning_sampler.py b/neural_compressor/strategy/utils/tuning_sampler.py index 79574a0ba97..719305dd792 100644 --- a/neural_compressor/strategy/utils/tuning_sampler.py +++ b/neural_compressor/strategy/utils/tuning_sampler.py @@ -17,22 +17,30 @@ """Tuning sampler.""" -from itertools import product import copy -from collections import deque, OrderedDict, defaultdict -from typing import List, Dict, Any, Union, Tuple +from collections import OrderedDict, defaultdict, deque +from itertools import product +from typing import Any, Dict, List, Tuple, Union + +from ...utils import logger from .tuning_space import TuningSpace, pattern_to_internal, pattern_to_path, quant_mode_from_pattern from .tuning_structs import OpTuningConfig -from ...utils import logger from .utility import ClassRegister -TUNING_ITEM_PRIORITY = [('activation','scheme'), ('activation','algorithm'),('activation','granularity'), - ('activation','compute_dtype'), ('weight','scheme'), ('weight','algorithm'), \ - ('weight','granularity')] +TUNING_ITEM_PRIORITY = [ + ("activation", "scheme"), + ("activation", "algorithm"), + ("activation", "granularity"), + ("activation", "compute_dtype"), + ("weight", "scheme"), + ("weight", "algorithm"), + ("weight", "granularity"), +] tuning_sampler_dict = ClassRegister() + class TuningOrder: """Not displayed in API Docs.""" @@ -47,11 +55,13 @@ class TuningSampler: Basic class of tuning sampler. """ - def __init__(self, - tuning_space: TuningSpace, - tuning_order_lst: List[TuningOrder], - initial_op_tuning_cfg: Dict, - kwargs: Dict = {}): + def __init__( + self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict, + kwargs: Dict = {}, + ): """Init tuning sampler. Args: @@ -72,23 +82,25 @@ def __iter__(self, tune_cfg=None): pass def _set_dtype(self, op_name_type, config_args): - has_weight = op_name_type in self.tuning_space.ops_attr['weight'] - path = self.op_complete_path[op_name_type].get('activation', None) - config_args['activation_dtype'] = self.tuning_space.ops_data_type[op_name_type][path] + has_weight = op_name_type in self.tuning_space.ops_attr["weight"] + path = self.op_complete_path[op_name_type].get("activation", None) + config_args["activation_dtype"] = self.tuning_space.ops_data_type[op_name_type][path] if has_weight: - path = self.op_complete_path[op_name_type].get('weight', None) - config_args['weight_dtype'] = self.tuning_space.ops_data_type[op_name_type][path] + path = self.op_complete_path[op_name_type].get("weight", None) + config_args["weight_dtype"] = self.tuning_space.ops_data_type[op_name_type][path] class ModelWiseTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_items_priority: List[str], - tuning_order_lst: List[TuningOrder], - op_dtype_dict: Dict[tuple, str], - initial_op_tuning_cfg: Dict[tuple, OpTuningConfig]): + def __init__( + self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict[tuple, OpTuningConfig], + ): """Model type wise tuning sampler. step1. create a default tuning config for each op @@ -102,25 +114,26 @@ def __init__(self, tuning_order_lst: The tuning orders. op_dtype_dict: The (op name, op type) and its target data type. initial_op_tuning_cfg: The initial tuning config. - """ super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) self.op_dtype_dict = op_dtype_dict self.tuning_space = tuning_space self.default_op_config = {} - tuning_items = defaultdict(set) # item name: options + tuning_items = defaultdict(set) # item name: options for op_name_type, quant_mode in op_dtype_dict.items(): full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, quant_mode) self.op_complete_path[op_name_type] = copy.deepcopy(full_path) # step1, set the default config for each op self.default_op_config[op_name_type] = tuning_space.get_default_config(op_name_type, quant_mode) - if quant_mode[0] == 'precision': continue - mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method + if quant_mode[0] == "precision": + continue + mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method # step2, collect all tuning items and their options for att in mode_items: - if att not in full_path: continue - quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + if att not in full_path: + continue + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type, full_path[att]) for tuning_item in quant_mode_item.options: tuning_items[tuning_item.name] = tuning_items[tuning_item.name].union(tuning_item.options) self.tuning_items = tuning_items @@ -136,39 +149,41 @@ def __iter__(self): # traverse all possible combinations by model-wise level tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) for op_name_type, quant_mode in self.op_dtype_dict.items(): - if quant_mode[0] == 'precision': continue + if quant_mode[0] == "precision": + continue all_exist_flag = True for method_name, method_val in zip(keys, vals): full_path = self.op_complete_path[op_name_type] - if method_name[0] not in full_path: continue - if not self.tuning_space.query_item_option(op_name_type, - full_path[method_name[0]], - method_name, method_val): + if method_name[0] not in full_path: + continue + if not self.tuning_space.query_item_option( + op_name_type, full_path[method_name[0]], method_name, method_val + ): all_exist_flag = False tune_cfg[op_name_type] = self.default_op_config[op_name_type] break if all_exist_flag: config_args = dict(zip(keys, vals)) - self._set_dtype( op_name_type, config_args) + self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(quant_mode) quant_mode = quant_mode_from_pattern(internal_pattern) - tune_cfg[op_name_type] = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - self.tuning_space, - kwargs=config_args) + tune_cfg[op_name_type] = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) yield tune_cfg class OpTypeWiseTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_items_priority: List[str], - tuning_order_lst: List[TuningOrder], - op_dtype_dict: Dict[tuple, str], - initial_op_tuning_cfg: Dict[tuple, OpTuningConfig]): + def __init__( + self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict[tuple, OpTuningConfig], + ): """Op type wise tuning sampler. Args: @@ -194,15 +209,16 @@ def __init__(self, self.op_complete_path[op_name_type] = copy.deepcopy(full_path) self.default_op_config[op_name_type] = self.tuning_space.get_default_config(op_name_type, quant_mode) op_name, op_type = op_name_type - if quant_mode[0] == 'precision': continue - mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method + if quant_mode[0] == "precision": + continue + mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method op_type_quant_mode = (op_type, quant_mode) filtered_tuning_items = [] for item_name in tuning_items_priority: att, method_name = item_name if att not in mode_items: continue - quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type, full_path[att]) item = quant_mode_item.get_option_by_name(item_name) if item: if op_type_quant_mode not in self.optype_quant_mode_option: @@ -232,41 +248,40 @@ def __iter__(self): for index, op_type_quant_mode in enumerate(self.op_type_quant_mode_wise_combination.keys()): for op_name_type, quant_mode in self.op_dtype_dict.items(): if op_name_type[1] == op_type_quant_mode[0] and quant_mode == op_type_quant_mode[1]: - op_tuning_items = [item.name for item in \ - self.optype_quant_mode_items_name[op_type_quant_mode]] + op_tuning_items = [item.name for item in self.optype_quant_mode_items_name[op_type_quant_mode]] op_tuning_item_vals = options_lst[index] all_exist_flag = True for method_name, method_val in zip(op_tuning_items, op_tuning_item_vals): full_path = self.op_complete_path[op_name_type] - if not self.tuning_space.query_item_option(op_name_type, - full_path[method_name[0]], - method_name, - method_val): + if not self.tuning_space.query_item_option( + op_name_type, full_path[method_name[0]], method_name, method_val + ): all_exist_flag = False op_tuning_config = self.default_op_config[op_name_type] break if all_exist_flag: config_args = dict(zip(op_tuning_items, op_tuning_item_vals)) - self._set_dtype( op_name_type, config_args) + self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(quant_mode) quant_mode = quant_mode_from_pattern(internal_pattern) - op_tuning_config = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - self.tuning_space, - kwargs=config_args) + op_tuning_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) new_tune_cfg.update({op_name_type: op_tuning_config}) yield new_tune_cfg + class OpWiseTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_items_priority: List[str], - tuning_order_lst: List[TuningOrder], - op_dtype_dict: Dict[tuple, str], - initial_op_tuning_cfg: Dict): + def __init__( + self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict, + ): """Op wise tuning config sampler. Args: @@ -288,13 +303,14 @@ def __init__(self, mode_items = copy.deepcopy(full_path) internal_pattern = pattern_to_internal(op_quant_mode) op_quant_mode = quant_mode_from_pattern(internal_pattern) - if internal_pattern[0] == 'precision': continue + if internal_pattern[0] == "precision": + continue filtered_tuning_items = [] for item_name in tuning_items_priority: att, method_name = item_name if att not in mode_items: continue - quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type, full_path[att]) item = quant_mode_item.get_option_by_name(item_name) if item: filtered_tuning_items.append(item) @@ -318,9 +334,9 @@ def __iter__(self): self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(op_quant_mode) quant_mode = quant_mode_from_pattern(internal_pattern) - op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode, self.tuning_space, - kwargs=config_args) + op_tuning_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) new_tune_cfg.update({op_name_type: op_tuning_config}) yield new_tune_cfg @@ -339,12 +355,12 @@ def get_opwise_candidate(self): self.op_complete_path[op_name_type] = copy.deepcopy(full_path) op_wise_configs[op_name_type] = [] # For precision - if internal_pattern[0] == 'precision': + if internal_pattern[0] == "precision": config_args = {} self._set_dtype(op_name_type, config_args) - op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode, self.tuning_space, - kwargs=config_args) + op_tuning_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) op_wise_configs[op_name_type].append(op_tuning_config) continue # For quantization @@ -353,10 +369,10 @@ def get_opwise_candidate(self): for op_tuning_item_vals in op_options: config_args = dict(zip(op_tuning_items, op_tuning_item_vals)) - self._set_dtype( op_name_type, config_args) - op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode, self.tuning_space, - kwargs=config_args) + self._set_dtype(op_name_type, config_args) + op_tuning_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) op_wise_configs[op_name_type].append(op_tuning_config) return op_wise_configs @@ -364,14 +380,15 @@ def get_opwise_candidate(self): class FallbackTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_order_lst: List[TuningOrder], - initial_op_tuning_cfg: Dict[Tuple, Any], - op_dtypes: Dict[Union[Tuple, Tuple[Tuple]], str], - accumulate: bool, - skip_first: bool = True - ): + def __init__( + self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict[Tuple, Any], + op_dtypes: Dict[Union[Tuple, Tuple[Tuple]], str], + accumulate: bool, + skip_first: bool = True, + ): """Sampler for generate the tuning config of fallback stage. Args: @@ -399,8 +416,9 @@ def __iter__(self): # Only support fallback to lower precision. if not self.accumulate: new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) - op_name_type_lst = [op_name_type] if len(op_name_type) != 1 and \ - isinstance(op_name_type[1], str) else op_name_type + op_name_type_lst = ( + [op_name_type] if len(op_name_type) != 1 and isinstance(op_name_type[1], str) else op_name_type + ) for op_name_type in op_name_type_lst: full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, target_dtype) self.op_complete_path[op_name_type] = copy.deepcopy(full_path) @@ -408,8 +426,9 @@ def __iter__(self): self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(target_dtype) quant_mode = quant_mode_from_pattern(internal_pattern) - new_op_config = OpTuningConfig(op_name_type[0], op_name_type[1], quant_mode, \ - self.tuning_space, kwargs=config_args) + new_op_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) new_tune_cfg.update({op_name_type: new_op_config}) if self.accumulate and skip_first: # skip the first one @@ -418,17 +437,19 @@ def __iter__(self): logger.info(f"fallback {op_name_type_lst} to {target_dtype}") yield new_tune_cfg # need to skip the first one + class LowerBitsSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_order_lst: List[TuningOrder], - initial_op_tuning_cfg: Dict[tuple, Any], - op_dtypes: Dict[str, str], - accumulate: bool, - skip_first: bool = True - ): + def __init__( + self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict[tuple, Any], + op_dtypes: Dict[str, str], + accumulate: bool, + skip_first: bool = True, + ): """Generate tuning config with lower bits. Args: @@ -468,40 +489,43 @@ def __iter__(self): logger.debug(f"Quantize {op_name_type} to {target_dtype}") yield new_tune_cfg # need to skip the first one + def _get_default_config_by_path(op_name_type, tuning_space, full_path): """Get default config according to path.""" from .constant import TUNING_ITEMS_LST - has_weight = op_name_type in tuning_space.ops_attr['weight'] + + has_weight = op_name_type in tuning_space.ops_attr["weight"] config_args = {} - att_lst = ['activation', 'weight'] if has_weight else ['activation'] + att_lst = ["activation", "weight"] if has_weight else ["activation"] for att in att_lst: att_full_path = tuning_space.get_default_full_path(op_name_type, full_path[att]) - config_args[att + '_dtype'] = tuning_space.ops_data_type[op_name_type].get(att_full_path, None) + config_args[att + "_dtype"] = tuning_space.ops_data_type[op_name_type].get(att_full_path, None) mode_item = tuning_space.get_item_by_path((op_name_type, *att_full_path)) if mode_item: - method_args = {method_item.name: method_item.options[0] for method_item in mode_item.options \ - if method_item.name in TUNING_ITEMS_LST} + method_args = { + method_item.name: method_item.options[0] + for method_item in mode_item.options + if method_item.name in TUNING_ITEMS_LST + } config_args.update(method_args) - quant_mode = full_path['weight'][0] + quant_mode = full_path["weight"][0] # set the first option as the default for each tuning item - op_tuning_config = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - tuning_space, - kwargs=config_args) + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], quant_mode, tuning_space, kwargs=config_args) return op_tuning_config class BlockFallbackTuningSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_order_lst: List[TuningOrder], - initial_op_tuning_cfg: Dict[tuple, Any], - op_block_lst: List[List[tuple]], - accumulate: bool, - target_dtype: str - ): + + def __init__( + self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict[tuple, Any], + op_block_lst: List[List[tuple]], + accumulate: bool, + target_dtype: str, + ): """Sampler for generate the tuning config of fallback stage. Args: @@ -528,33 +552,37 @@ def __iter__(self): for op_block in self.op_block_lst: # Only support fallback to lower precision. if not self.accumulate: - new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) + new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) logger.debug(f"[BlockFallbackTuningSampler] op_block: {op_block}") - for op_name_type in op_block: + for op_name_type in op_block: full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, self.target_dtype) self.op_complete_path[op_name_type] = copy.deepcopy(full_path) config_args = {} self._set_dtype(op_name_type, config_args) internal_pattern = pattern_to_internal(self.target_dtype) quant_mode = quant_mode_from_pattern(internal_pattern) - new_op_config = OpTuningConfig(op_name_type[0], op_name_type[1], - quant_mode, self.tuning_space, - kwargs=config_args) + new_op_config = OpTuningConfig( + op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args + ) new_tune_cfg.update({op_name_type: new_op_config}) logger.debug(f"[BlockFallbackTuningSampler] updated_tuning_cfg {op_name_type}: {new_op_config}") logger.debug(f"[BlockFallbackTuningSampler] fallback {op_name_type} to {self.target_dtype}") yield new_tune_cfg + @tuning_sampler_dict("smooth_quant") class SmoothQuantSampler(TuningSampler): """Not displayed in API Docs.""" - def __init__(self, - tuning_space: TuningSpace, - tuning_order_lst: List[TuningOrder], - initial_op_tuning_cfg: Dict, - alpha_list: List[float], - kwargs: Dict = {}): + + def __init__( + self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict, + alpha_list: List[float], + kwargs: Dict = {}, + ): """Init tuning sampler. Args: @@ -567,7 +595,6 @@ def __init__(self, super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) self.sq_alpha_list = alpha_list - def __iter__(self): """Yield the next tuning config. @@ -582,5 +609,3 @@ def __iter__(self): recipe_cfgs["smooth_quant_args"] = {"alpha": alpha} logger.debug(f"[STRATEGY] set smooth quant alpha with: {alpha:.4f}") yield new_tune_cfg - - diff --git a/neural_compressor/strategy/utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py index 06b9a0ef815..a05a8be2c2a 100644 --- a/neural_compressor/strategy/utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -17,16 +17,17 @@ """Tuning space.""" -from collections import defaultdict, OrderedDict +import itertools import re -from typing import Dict, Tuple, List +from collections import OrderedDict, defaultdict from copy import deepcopy -import itertools +from typing import Dict, List, Tuple + from ...utils import logger -from .utility import OrderedDefaultDict, preprocess_user_cfg, quant_options +from .constant import TUNING_ITEMS_LST, WEIGHT_ONLY_TUNING_ITEMS_LST from .tuning_structs import OpTuningConfig +from .utility import OrderedDefaultDict, preprocess_user_cfg, quant_options -from .constant import TUNING_ITEMS_LST, WEIGHT_ONLY_TUNING_ITEMS_LST class TuningItem: """Not displayed in API Docs.""" @@ -96,14 +97,14 @@ def get_details(self, depth=0): Returns: The tuning item and its options as a string. """ - details = ['\t' * depth + f"{self.name}, {self.item_type}"] + details = ["\t" * depth + f"{self.name}, {self.item_type}"] for option in self.options: if isinstance(option, int) or isinstance(option, str): details.append("\t" * depth + str(option)) else: details.append(option.get_details(depth + 1)) return "\n".join(details) - + def __repr__(self) -> str: """Display the tuning item as string. @@ -117,7 +118,6 @@ class TuningSpace: """Not displayed in API Docs. 1) capability -> internal format -> merge -> tuning space (tree) - """ def __init__(self, capability, conf, framework=None): @@ -130,7 +130,7 @@ def __init__(self, capability, conf, framework=None): """ self.capability = capability self.conf = conf - self.root_item = TuningItem(name='root', options=[], item_type='root') + self.root_item = TuningItem(name="root", options=[], item_type="root") self.quant_mode_wise_items = defaultdict(list) # quant_mode/precision_name: {(op_name, op_type),...} self.op_type_wise_items = defaultdict(list) # op_type: {(op_name, op_type), ...} self.framework = framework @@ -139,17 +139,17 @@ def __init__(self, capability, conf, framework=None): self.op_items = {} # {(op_name, op_type): {(path): data type}} self.ops_data_type = OrderedDefaultDict() - self.ops_attr = {'activation': set(), 'weight': set()} + self.ops_attr = {"activation": set(), "weight": set()} # {(op_name, op_type): {path1, path2, ...} self.ops_path_set = defaultdict(set) self._create_tuning_space(capability, self._usr_cfg) def _init_usr_cfg(self): """Init user config.""" - usr_cfg = {'quantization': {}} - usr_cfg['quantization']['model_wise'] = None - usr_cfg['quantization']['optype_wise'] = self.conf.op_type_dict if self.conf else None - usr_cfg['quantization']['op_wise'] = self.conf.op_name_dict if self.conf else None + usr_cfg = {"quantization": {}} + usr_cfg["quantization"]["model_wise"] = None + usr_cfg["quantization"]["optype_wise"] = self.conf.op_type_dict if self.conf else None + usr_cfg["quantization"]["op_wise"] = self.conf.op_name_dict if self.conf else None return usr_cfg def _parse_capability(self, capability: Dict) -> None: @@ -158,37 +158,40 @@ def _parse_capability(self, capability: Dict) -> None: Args: capability: merged framework capability. """ - calib = TuningItem(name='calib_sampling_size', - options=capability['calib']['calib_sampling_size'], - item_type='calib_sampling_size') + calib = TuningItem( + name="calib_sampling_size", + options=capability["calib"]["calib_sampling_size"], + item_type="calib_sampling_size", + ) self.root_item.append(calib) + def _parse(cap, root, path, op_name_type): if isinstance(cap, dict): for key, val in cap.items(): if isinstance(val, dict): - if len(path) > 1 and path[-2] == 'precision': + if len(path) > 1 and path[-2] == "precision": self.ops_path_set[op_name_type].add(tuple(path + [key])) tuning_item = TuningItem(name=key, options=[], item_type=key) root.append(tuning_item) _parse(val, tuning_item, path + [key], op_name_type) elif isinstance(val, list): - new_key = ('activation', key) if 'activation' in path else ('weight', key) - tuning_item = TuningItem(name=new_key, options=val, item_type='method') + new_key = ("activation", key) if "activation" in path else ("weight", key) + tuning_item = TuningItem(name=new_key, options=val, item_type="method") self.ops_path_set[op_name_type].add(tuple(path)) root.append(tuning_item) else: return - for op_name_type, op_cap in capability['op'].items(): + for op_name_type, op_cap in capability["op"].items(): op_name, op_type = op_name_type - op_item = TuningItem(name=op_name_type, options=[], item_type='op') + op_item = TuningItem(name=op_name_type, options=[], item_type="op") self.op_type_wise_items[op_type].append(op_item) self.root_item.append(op_item) self.op_items[op_name_type] = op_item _parse(op_cap, op_item, [], op_name_type) for q_option in op_item.options: - if q_option and q_option.name == 'precision': - acc_item = q_option.get_option_by_name('activation') + if q_option and q_option.name == "precision": + acc_item = q_option.get_option_by_name("activation") if acc_item and acc_item.options: for dtype_item in acc_item.options: self.quant_mode_wise_items[dtype_item.name].append(op_item) @@ -223,75 +226,81 @@ def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): Return the merged capability. """ from .utility import extract_data_type, reverted_data_type + fw_op_cap = deepcopy(fw_op_cap) new_op_cap = deepcopy(cur_op_cap) op_user_cfg = preprocess_user_cfg(op_user_cfg) - for att in ['activation', 'weight']: + for att in ["activation", "weight"]: if op_user_cfg.get(att, None) is not None: - user_dtype_lst = op_user_cfg[att]['dtype'] if op_user_cfg[att].get('dtype', None) is not None else [] + user_dtype_lst = op_user_cfg[att]["dtype"] if op_user_cfg[att].get("dtype", None) is not None else [] # Merge the precision part. - fwk_att_precision_cap = fw_op_cap['precision'].get(att, {}) + fwk_att_precision_cap = fw_op_cap["precision"].get(att, {}) fwk_precision_set = set(fwk_att_precision_cap.keys()) # The intersection of user cfg and fwk capability. valid_precision_set = set(fwk_precision_set).intersection(set(user_dtype_lst)) if len(valid_precision_set) != 0: - new_op_cap = dict(filter(lambda item: item[0] == 'precision', new_op_cap.items())) - new_op_cap['precision'][att] = dict(filter(lambda item: item[0] in valid_precision_set,\ - fw_op_cap['precision'][att].items())) + new_op_cap = dict(filter(lambda item: item[0] == "precision", new_op_cap.items())) + new_op_cap["precision"][att] = dict( + filter(lambda item: item[0] in valid_precision_set, fw_op_cap["precision"][att].items()) + ) else: # Filter the valid options for tuning item for quant_mode in fw_op_cap: if quant_mode not in new_op_cap: new_op_cap[quant_mode] = deepcopy(fw_op_cap[quant_mode]) - if quant_mode == 'precision': continue + if quant_mode == "precision": + continue for data_type in new_op_cap[quant_mode][att]: for signed_flag in new_op_cap[quant_mode][att][data_type]: cur_items = new_op_cap[quant_mode][att][data_type][signed_flag] fwk_items = fw_op_cap[quant_mode][att][data_type][signed_flag] for method_name, method_options in op_user_cfg[att].items(): - skip_list = ['dtype', 'quant_mode'] - if data_type == 'weight_only': - skip_list = ['quant_mode'] + skip_list = ["dtype", "quant_mode"] + if data_type == "weight_only": + skip_list = ["quant_mode"] if method_name not in skip_list and method_options: # filter the method options - options_intersection = set(fwk_items[method_name]\ - ).intersection(set(method_options)) + options_intersection = set(fwk_items[method_name]).intersection( + set(method_options) + ) # merge with fwk, if intersection -> use intersection if len(options_intersection) > 0: - cur_items[method_name] = [option for option in fwk_items[method_name] if\ - option in options_intersection] + cur_items[method_name] = [ + option + for option in fwk_items[method_name] + if option in options_intersection + ] return new_op_cap def _merge_optype_wise_cfg(self, cap: Dict, optype_wise_usr_cfg: Dict, fw_cap: Dict): for op_type, op_user_cfg in optype_wise_usr_cfg.items(): op_type_pattern = re.compile(op_type) - op_lst = [op_name_type for op_name_type in cap['op'] if op_type_pattern.fullmatch(op_name_type[1])] + op_lst = [op_name_type for op_name_type in cap["op"] if op_type_pattern.fullmatch(op_name_type[1])] for op_name_type in op_lst: - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], - op_user_cfg, - fw_cap['op'][op_name_type]) + cap["op"][op_name_type] = self._merge_op_cfg( + cap["op"][op_name_type], op_user_cfg, fw_cap["op"][op_name_type] + ) def _merge_model_wise_cfg(self, cap: Dict, model_wise_usr_cfg: Dict, fw_cap: Dict): - for op_name_type in cap['op'].keys(): - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], - model_wise_usr_cfg, - fw_cap['op'][op_name_type]) + for op_name_type in cap["op"].keys(): + cap["op"][op_name_type] = self._merge_op_cfg( + cap["op"][op_name_type], model_wise_usr_cfg, fw_cap["op"][op_name_type] + ) def _merge_op_wise_cfg(self, cap: Dict, op_wise_usr_cfg: Dict, fw_cap: Dict): - op_name_types = {key[0]: key for key in cap['op'].keys()} + op_name_types = {key[0]: key for key in cap["op"].keys()} for op_name_pattern, op_user_cfg in op_wise_usr_cfg.items(): if isinstance(op_name_pattern, str): op_name_pattern = re.compile(op_name_pattern) - str_flag=True + str_flag = True else: - str_flag=False + str_flag = False for op_name in op_name_types: - if str_flag and op_name_pattern.fullmatch(str(op_name)) \ - or op_name_pattern == op_name: + if str_flag and op_name_pattern.fullmatch(str(op_name)) or op_name_pattern == op_name: op_name_type = op_name_types[op_name] - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], - op_user_cfg, - fw_cap['op'][op_name_type]) + cap["op"][op_name_type] = self._merge_op_cfg( + cap["op"][op_name_type], op_user_cfg, fw_cap["op"][op_name_type] + ) def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): """Merge the capability with user config. @@ -399,12 +408,12 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): :return: """ fw_capability = deepcopy(capability) - if user_cfg['model_wise'] is not None: - self._merge_model_wise_cfg(capability, user_cfg['model_wise'], fw_capability) - if user_cfg['optype_wise'] is not None: - self._merge_optype_wise_cfg(capability, user_cfg['optype_wise'], fw_capability) - if user_cfg['op_wise'] is not None: - self._merge_op_wise_cfg(capability, user_cfg['op_wise'], fw_capability) + if user_cfg["model_wise"] is not None: + self._merge_model_wise_cfg(capability, user_cfg["model_wise"], fw_capability) + if user_cfg["optype_wise"] is not None: + self._merge_optype_wise_cfg(capability, user_cfg["optype_wise"], fw_capability) + if user_cfg["op_wise"] is not None: + self._merge_op_wise_cfg(capability, user_cfg["op_wise"], fw_capability) def _parse_cap_helper(self, cap): """Convert the cpa to internal format. @@ -460,40 +469,43 @@ def _parse_cap_helper(self, cap): } """ from .utility import OrderedDefaultDict, extract_data_type + cap = deepcopy(cap) - parsed_cap = OrderedDict() # {(op_name, op_type): parsed_op_cap} + parsed_cap = OrderedDict() # {(op_name, op_type): parsed_op_cap} for op_name_type, op_cap_lst in cap.items(): - parsed_op_cap = OrderedDefaultDict() # {ptq_type/precision, {}} - parsed_op_cap['precision'] = OrderedDefaultDict() + parsed_op_cap = OrderedDefaultDict() # {ptq_type/precision, {}} + parsed_op_cap["precision"] = OrderedDefaultDict() # WA for some op have extra weight dtype. - has_weight = all(['weight' in op_cap for op_cap in op_cap_lst]) - if has_weight: self.ops_attr['weight'].add(op_name_type) + has_weight = all(["weight" in op_cap for op_cap in op_cap_lst]) + if has_weight: + self.ops_attr["weight"].add(op_name_type) for op_cap in op_cap_lst: - if 'activation' in op_cap: - self.ops_attr['activation'].add(op_name_type) - attrs_lst = ['activation', 'weight'] if has_weight else ['activation'] + if "activation" in op_cap: + self.ops_attr["activation"].add(op_name_type) + attrs_lst = ["activation", "weight"] if has_weight else ["activation"] for att in attrs_lst: # Parse the data info for item that has options. - if 'activation' in op_cap and 'quant_mode' in op_cap['activation']: - quant_mode = op_cap['activation']['quant_mode'] - att_dtype = op_cap[att]['dtype'][0] + if "activation" in op_cap and "quant_mode" in op_cap["activation"]: + quant_mode = op_cap["activation"]["quant_mode"] + att_dtype = op_cap[att]["dtype"][0] signed_flag, _data_type = extract_data_type(att_dtype) if quant_options.quant_type == 3: - _data_type = 'weight_only' + _data_type = "weight_only" for item_name, item_options in op_cap[att].items(): - if item_name == 'dtype': + if item_name == "dtype": # The dtype should be a string, need to align with fwk.yaml. - self.ops_data_type[op_name_type][(quant_mode, att, _data_type, signed_flag)] = \ + self.ops_data_type[op_name_type][(quant_mode, att, _data_type, signed_flag)] = ( item_options[0] if isinstance(item_options, list) else item_options - if item_name not in ['quant_mode']: + ) + if item_name not in ["quant_mode"]: parsed_op_cap[quant_mode][att][_data_type][signed_flag][item_name] = item_options else: # Parse the data info for item with unique value. - att_dtype = op_cap[att]['dtype'] + att_dtype = op_cap[att]["dtype"] if isinstance(att_dtype, list): att_dtype = att_dtype[0] - parsed_op_cap['precision'][att][att_dtype] = {'dtype': att_dtype} - self.ops_data_type[op_name_type][('precision', att, att_dtype)] = att_dtype + parsed_op_cap["precision"][att][att_dtype] = {"dtype": att_dtype} + self.ops_data_type[op_name_type][("precision", att, att_dtype)] = att_dtype parsed_cap[op_name_type] = parsed_op_cap return parsed_cap @@ -508,10 +520,10 @@ def _create_tuning_space(self, capability, usr_cfg): :param usr_cfg: :return: """ - capability['op'] = self._parse_cap_helper(deepcopy(capability['op'])) + capability["op"] = self._parse_cap_helper(deepcopy(capability["op"])) if usr_cfg: - self._merge_with_user_cfg(capability, usr_cfg['quantization']) - logger.debug(f"*********** After Merged with user cfg ***********") + self._merge_with_user_cfg(capability, usr_cfg["quantization"]) + logger.debug("*********** After Merged with user cfg ***********") logger.debug(capability) self._parse_capability(capability) @@ -528,7 +540,8 @@ def query_item_option(self, op_name_type, path, method_name, method_val): Return the query result if exist. """ mode_item = self.get_item_by_path((op_name_type, *path)) - if not mode_item: return None + if not mode_item: + return None method_item = mode_item.get_option_by_name(method_name) return method_item is not None and method_val in method_item.options @@ -543,28 +556,28 @@ def get_default_config(self, op_name_type, quant_mode): op_tuning_config: the default config according to the specified quantization mode. """ from .tuning_structs import OpTuningConfig + # For quant_mode static/dynamic/((static, int8), (dynamic, int4)) # set the first option as the default if the not support the required quant mode full_path = self.get_op_default_path_by_pattern(op_name_type, quant_mode) config_args = {} - has_weight = op_name_type in self.ops_attr['weight'] - config_args['activation_dtype'] = self.ops_data_type[op_name_type].get(full_path['activation']) + has_weight = op_name_type in self.ops_attr["weight"] + config_args["activation_dtype"] = self.ops_data_type[op_name_type].get(full_path["activation"]) if has_weight: - config_args['weight_dtype'] = self.ops_data_type[op_name_type].get(full_path['weight']) + config_args["weight_dtype"] = self.ops_data_type[op_name_type].get(full_path["weight"]) for att in full_path: - mode_item = self.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + mode_item = self.query_quant_mode_item_by_full_path(op_name_type, full_path[att]) if mode_item: - method_args = {method_item.name: method_item.options[0] for method_item in mode_item.options \ - if method_item.name in TUNING_ITEMS_LST} + method_args = { + method_item.name: method_item.options[0] + for method_item in mode_item.options + if method_item.name in TUNING_ITEMS_LST + } config_args.update(method_args) quant_mode = quant_mode if isinstance(quant_mode, str) else quant_mode[0] # set the first option as the default for each tuning item - op_tuning_config = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - self, - kwargs=config_args) + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], quant_mode, self, kwargs=config_args) return op_tuning_config def get_item_by_path(self, path, default=None): @@ -590,9 +603,10 @@ def get_default_full_path(self, op_name_type, path): new_path: the complete path. """ # For precision - if path[0] == 'precision': + if path[0] == "precision": # If the path is ('precision', 'activation', dtype), return it directly. - if len(path) == 3: return path + if len(path) == 3: + return path assert len(path) == 2, f"Got the path: {path}, please provide the path include activation or weight." att_item = self.get_item_by_path((op_name_type, *path)) if not att_item or len(att_item.options) == 0: @@ -603,14 +617,18 @@ def get_default_full_path(self, op_name_type, path): else: # For quantization assert len(path) >= 2, f"Got the path: {path}, please provide the path include activation or weight." - if path[-1] == None: path = path[:-1] + if path[-1] is None: + path = path[:-1] item = self.get_item_by_path((op_name_type, *path)) new_path = path # For path ('static', 'activation', ...) while item: item_options = item.options - if len(item_options) > 0 and isinstance(item_options[0], TuningItem) and \ - item_options[0].item_type != 'method': + if ( + len(item_options) > 0 + and isinstance(item_options[0], TuningItem) + and item_options[0].item_type != "method" + ): new_path = new_path + (item_options[0].name,) item = item_options[0] else: @@ -645,11 +663,11 @@ def get_op_default_path_by_pattern(self, op_name_type, pattern): result(Dict): The default full path of activation and weight if have. """ internal_pattern = pattern_to_internal(pattern) - full_path = {'activation': None, 'weight': None} - full_path['activation'], full_path['weight'] = pattern_to_path(internal_pattern) + full_path = {"activation": None, "weight": None} + full_path["activation"], full_path["weight"] = pattern_to_path(internal_pattern) result = {} - has_weight = op_name_type in self.ops_attr['weight'] - att_lst = ['activation', 'weight'] if has_weight else ['activation'] + has_weight = op_name_type in self.ops_attr["weight"] + att_lst = ["activation", "weight"] if has_weight else ["activation"] for att in att_lst: result[att] = self.get_default_full_path(op_name_type, full_path[att]) return result @@ -664,11 +682,13 @@ def get_op_default_path_by_quant_bits(self, op_name_type, quant_bits): Returns: A dict includes the full path. """ - quant_modes = ['static', 'dynamic'] - attribute_options = ['activation', 'weight'] + quant_modes = ["static", "dynamic"] + attribute_options = ["activation", "weight"] quant_bits = [quant_bits] - support_attributes = {'activation': ('precision', 'activation', 'fp32'),\ - 'weight': ('precision', 'weight', 'fp32')} + support_attributes = { + "activation": ("precision", "activation", "fp32"), + "weight": ("precision", "weight", "fp32"), + } for path in itertools.product(quant_modes, attribute_options, quant_bits): if self.query_quant_mode_item_by_full_path(op_name_type, path): support_attributes[path[1]] = path @@ -683,11 +703,11 @@ def collect_op_by_quant_bits(self, quant_bits: str) -> List[TuningItem]: Args: quant_bits: the target quantization bits, like int4, int8. """ - quant_modes = ['static', 'dynamic'] - attribute_options = ['activation', 'weight'] + quant_modes = ["static", "dynamic"] + attribute_options = ["activation", "weight"] quant_bits = [quant_bits] - quant_op_items = set(self.query_items_by_quant_mode('static')).union(self.query_items_by_quant_mode('dynamic')) + quant_op_items = set(self.query_items_by_quant_mode("static")).union(self.query_items_by_quant_mode("dynamic")) op_items = [] for op in quant_op_items: for path in itertools.product(quant_modes, attribute_options, quant_bits): @@ -696,7 +716,8 @@ def collect_op_by_quant_bits(self, quant_bits: str) -> List[TuningItem]: break return op_items -def pattern_to_internal(pattern, default_dtype='int8'): + +def pattern_to_internal(pattern, default_dtype="int8"): """Convert pattern to internal format. 'static' -> ('static', (('int8'),('int8'))) @@ -709,25 +730,29 @@ def pattern_to_internal(pattern, default_dtype='int8'): #TODO to add the support for mixed data type of weight and activation """ from .constant import PRECISION_SET_V2_0 + pattern_bk = pattern if isinstance(pattern, str): - pattern = ('precision', pattern) if pattern in PRECISION_SET_V2_0 else (pattern, (None)) + pattern = ("precision", pattern) if pattern in PRECISION_SET_V2_0 else (pattern, (None)) internal_pattern = (pattern[0], ((pattern[1],), (pattern[1],))) return internal_pattern + def pattern_to_path(pattern): """Convert pattern to path.""" - act_path = (pattern[0], 'activation', *pattern[1][0]) - weight_path = (pattern[0], 'weight', *pattern[1][1]) + act_path = (pattern[0], "activation", *pattern[1][0]) + weight_path = (pattern[0], "weight", *pattern[1][1]) return act_path, weight_path + def quant_mode_from_pattern(internal_pattern): """Get quant mode from internal pattern.""" - if internal_pattern[0] == 'precision': + if internal_pattern[0] == "precision": return internal_pattern[1][0] else: return internal_pattern[0] + def initial_tuning_cfg_with_quant_mode(op_name_type, quant_mode, tuning_space: TuningSpace) -> OpTuningConfig: """Initialize the tuning cfg. @@ -746,26 +771,25 @@ def initial_tuning_cfg_with_quant_mode(op_name_type, quant_mode, tuning_space: T The initial tuning config. """ internal_pattern = pattern_to_internal(quant_mode) - full_path = {'activation': None, 'weight': None} - full_path['activation'], full_path['weight'] = pattern_to_path(internal_pattern) - has_weight = op_name_type in tuning_space.ops_attr['weight'] + full_path = {"activation": None, "weight": None} + full_path["activation"], full_path["weight"] = pattern_to_path(internal_pattern) + has_weight = op_name_type in tuning_space.ops_attr["weight"] config_args = {} - att_lst = ['activation', 'weight'] if has_weight else ['activation'] + att_lst = ["activation", "weight"] if has_weight else ["activation"] for att in att_lst: att_full_path = tuning_space.get_default_full_path(op_name_type, full_path[att]) - config_args[att + '_dtype'] = tuning_space.ops_data_type[op_name_type].get(att_full_path, None) + config_args[att + "_dtype"] = tuning_space.ops_data_type[op_name_type].get(att_full_path, None) mode_item = tuning_space.get_item_by_path((op_name_type, *att_full_path)) if mode_item: - item_list = WEIGHT_ONLY_TUNING_ITEMS_LST if att_full_path[0] == 'weight_only' else TUNING_ITEMS_LST - method_args = {method_item.name: method_item.options[0] for method_item in mode_item.options \ - if method_item.name in item_list} + item_list = WEIGHT_ONLY_TUNING_ITEMS_LST if att_full_path[0] == "weight_only" else TUNING_ITEMS_LST + method_args = { + method_item.name: method_item.options[0] + for method_item in mode_item.options + if method_item.name in item_list + } config_args.update(method_args) quant_mode = internal_pattern[0] # set the first option as the default for each tuning item - op_tuning_config = OpTuningConfig(op_name_type[0], - op_name_type[1], - quant_mode, - tuning_space, - kwargs=config_args) + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], quant_mode, tuning_space, kwargs=config_args) return op_tuning_config diff --git a/neural_compressor/strategy/utils/tuning_structs.py b/neural_compressor/strategy/utils/tuning_structs.py index f9d7ceba830..0c00df29409 100644 --- a/neural_compressor/strategy/utils/tuning_structs.py +++ b/neural_compressor/strategy/utils/tuning_structs.py @@ -18,7 +18,9 @@ """Tuning structure.""" from typing import Dict -from .constant import TUNING_ITEMS_LST, PRECISION_LIST, WEIGHT_ONLY_TUNING_ITEMS_LST + +from .constant import PRECISION_LIST, TUNING_ITEMS_LST, WEIGHT_ONLY_TUNING_ITEMS_LST + class OpTuningConfig: """Op tuning config.""" @@ -40,42 +42,41 @@ def __init__(self, op_name, op_type, op_quant_mode, tuning_space, kwargs={}): self.kwargs = kwargs self.act_dtype = None self.weight_dtype = None - self.has_weight = self.op_name_type in tuning_space.ops_attr['weight'] + self.has_weight = self.op_name_type in tuning_space.ops_attr["weight"] self._set_dtype() - self.tune_list = WEIGHT_ONLY_TUNING_ITEMS_LST if self.op_quant_mode == \ - 'weight_only' else TUNING_ITEMS_LST + self.tune_list = WEIGHT_ONLY_TUNING_ITEMS_LST if self.op_quant_mode == "weight_only" else TUNING_ITEMS_LST def _set_dtype(self): """Set the date type.""" if self.op_quant_mode in PRECISION_LIST: self.act_dtype, self.weight_dtype = self.op_quant_mode, self.op_quant_mode else: - self.act_dtype = self.kwargs.get('activation_dtype', None) - if ('weight', 'dtype') in self.kwargs: - self.weight_dtype = self.kwargs[('weight', 'dtype')] + self.act_dtype = self.kwargs.get("activation_dtype", None) + if ("weight", "dtype") in self.kwargs: + self.weight_dtype = self.kwargs[("weight", "dtype")] else: - self.weight_dtype = self.kwargs.get('weight_dtype', None) + self.weight_dtype = self.kwargs.get("weight_dtype", None) - assert self.act_dtype and isinstance(self.act_dtype, str),\ - (f"Didn't assign the activation data type for {self.op_name, self.op_type}", \ - f"with quant_mode {self.op_quant_mode}") + assert self.act_dtype and isinstance(self.act_dtype, str), ( + f"Didn't assign the activation data type for {self.op_name, self.op_type}", + f"with quant_mode {self.op_quant_mode}", + ) # if self.has_weight: # assert self.weight_dtype, \ # (f"Didn't assign the weight data type for {self.op_name, self.op_type}", \ # f"with quant_mode {self.op_quant_mode}") - def __repr__(self) -> str: """Display the tuning config as string. Returns: msg: the tuning config as string. """ - msg = f"op name: {self.op_name}, op type : {self.op_type} \n" + msg = f"op name: {self.op_name}, op type : {self.op_type} \n" msg += f"\t activation dtype: {self.act_dtype} \n" - if self.op_quant_mode != 'weight_only': + if self.op_quant_mode != "weight_only": # weight_dtype is contained in self.tune_list - msg += f"\t weight dtype: {self.weight_dtype} \n" if self.has_weight else "" + msg += f"\t weight dtype: {self.weight_dtype} \n" if self.has_weight else "" for key, val in self.kwargs.items(): if key in self.tune_list: msg += f"\t {key[0]} {key[1]}: {val}\n" @@ -89,13 +90,13 @@ def get_state(self): """ result = {} if self.has_weight: - result['weight'] = { - 'dtype': self.weight_dtype, - } - result['activation'] = { - 'dtype': self.act_dtype, - 'quant_mode': self.op_quant_mode, + result["weight"] = { + "dtype": self.weight_dtype, } + result["activation"] = { + "dtype": self.act_dtype, + "quant_mode": self.op_quant_mode, + } for key, val in self.kwargs.items(): if key in self.tune_list: result[key[0]][key[1]] = val diff --git a/neural_compressor/strategy/utils/utility.py b/neural_compressor/strategy/utils/utility.py index 533cf4b00f2..6680b73f164 100644 --- a/neural_compressor/strategy/utils/utility.py +++ b/neural_compressor/strategy/utils/utility.py @@ -14,57 +14,62 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Tuning utility.""" +import enum from collections import OrderedDict from copy import deepcopy -import enum from typing import Dict + class QuantType(enum.IntEnum): """Quantization type.""" + DYNAMIC = 0 STATIC = 1 QAT = 2 WEIGHT_ONLY = 3 AUTO = 4 + class QuantOptions: """Option Class for Quantization. - This class is used for configuring global variable related to quantization. + This class is used for configuring global variable related to quantization. The global variable quant_options is created with this class. Args: quant_type(int): Quantization type. Default value is 1. """ + def __init__(self, quant_type=1): """Init an QuantOptions object.""" self._quant_type = quant_type - + @property def quant_type(self): """Get quant type.""" return self._quant_type - + @quant_type.setter def quant_type(self, quant_type): """Set quant type. - + Args: quant_type(int): Quantization type. Default value is 1. """ self._quant_type = quant_type + quant_options = QuantOptions() + def preprocess_user_cfg(op_user_cfg: Dict): """Preprocess the op user config for weight only. Args: - op_user_cfg: The original user config. + op_user_cfg: The original user config. - Example: + Example: op_user_cfg = {'activation': {'bits': [4]}} op_user_cfg_modified = {'activation': {'bits': [4], 'group_size': [32]}} @@ -74,12 +79,13 @@ def preprocess_user_cfg(op_user_cfg: Dict): op_user_cfg_modified = deepcopy(op_user_cfg) if quant_options.quant_type == QuantType.WEIGHT_ONLY: for att, att_cfg in op_user_cfg.items(): - if 'bits' not in att_cfg: - op_user_cfg_modified[att]['bits'] = [4] - if 'group_size' not in att_cfg: - op_user_cfg_modified[att]['group_size'] = [32] + if "bits" not in att_cfg: + op_user_cfg_modified[att]["bits"] = [4] + if "group_size" not in att_cfg: + op_user_cfg_modified[att]["group_size"] = [32] return op_user_cfg_modified + class OrderedDefaultDict(OrderedDict): """Ordered default dict.""" @@ -98,12 +104,12 @@ def extract_data_type(data_type: str) -> str: Returns: (signed or unsigned, data type without signed) """ - return ('signed', data_type) if data_type[0] != 'u' else ('unsigned', data_type[1:]) + return ("signed", data_type) if data_type[0] != "u" else ("unsigned", data_type[1:]) def reverted_data_type(signed_flag: str, data_type: str) -> str: """Revert the data type.""" - return data_type if signed_flag == 'signed' else 'u' + data_type + return data_type if signed_flag == "signed" else "u" + data_type def get_adaptor_name(adaptor): @@ -113,7 +119,7 @@ def get_adaptor_name(adaptor): adaptor: adaptor instance. """ adaptor_name = type(adaptor).__name__.lower() - adaptor_name_lst = ['onnx', 'tensorflow', 'pytorch'] + adaptor_name_lst = ["onnx", "tensorflow", "pytorch"] for name in adaptor_name_lst: if adaptor_name.startswith(name): return name @@ -127,8 +133,8 @@ def build_slave_faker_model(): object: a class object where all properties and methods are virtual. """ from ...utils import logger - class FakerModel: + class FakerModel: def __call__(self, *args, **kwargs): logger.warning("Slave node has no quantized model, please handle it yourself.") @@ -141,18 +147,21 @@ def __getattr__(self, name): return FakerModel() + class ClassRegister: """Class register.""" - + def __init__(self): """Init class register.""" self.register = {} def __call__(self, name): """Call the class register.""" + def decorator(func): self.register[name] = func return func + return decorator def get_class(self, name): diff --git a/neural_compressor/template/__init__.py b/neural_compressor/template/__init__.py index e69de29bb2d..8989ae9d722 100644 --- a/neural_compressor/template/__init__.py +++ b/neural_compressor/template/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/neural_compressor/template/api_doc_example.py b/neural_compressor/template/api_doc_example.py index c6525f38d08..dd98ae43844 100644 --- a/neural_compressor/template/api_doc_example.py +++ b/neural_compressor/template/api_doc_example.py @@ -1,5 +1,17 @@ -""" -This module is only used as reference to convert Python docstring to API document. +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This module is only used as reference to convert Python docstring to API document. The created the API document is in `API Doc`_. @@ -26,11 +38,9 @@ def fun(a): .. _API Doc: https://intel.github.io/neural-compressor/latest/autoapi/neural_compressor/api_doc_example/index.html - """ module_debug_level1 = 1 - """int: Module debug level document. """ @@ -59,7 +69,7 @@ def function1(param1, param2): def function2(param1: str, param2: float) -> bool: - """function with PEP 484 type annotations. + """Function with PEP 484 type annotations. Args: param1: The parameter1. @@ -83,7 +93,6 @@ def eval_func(model): Returns: The return value. True|False. - """ @@ -113,10 +122,9 @@ def function3(param1, param2=None, *args, **kwargs): Raises: AttributeError: The ``Raises`` section is a list of exceptions. ValueError: If `param2` is equal to `param1`. - """ if param1 == param2: - raise ValueError('param1 may not be equal to param2') + raise ValueError("param1 may not be equal to param2") return True @@ -133,7 +141,6 @@ def generator1(n): >>> print([i for i in example_generator(4)]) [0, 1, 2, 3] - """ yield from range(n) @@ -147,7 +154,6 @@ class ExampleClass: Attributes: attr1 (str): Description of `attr1`. attr2 (:obj:`int`, optional): Description of `attr2`. - """ def __init__(self, param1, param2, param3): @@ -161,14 +167,13 @@ def __init__(self, param1, param2, param3): param2 (:obj:`int`, optional): Description of `param2`. Multiple lines are supported. param3 (list(str)): Description of `param3`. - """ self.attr1 = param1 self.attr2 = param2 self.attr3 = param3 #: Doc comment *inline* #: list(str): Doc comment *before* attribute, with type specified - self.attr4 = ['attr4'] + self.attr4 = ["attr4"] self.attr5 = None """str: Docstring *after* attribute, with type specified.""" @@ -176,11 +181,10 @@ def __init__(self, param1, param2, param3): @property def property1(self): """str: Property is documented.""" - return 'property1' - + return "property1" def method1(self, param1, param2): - """method1 for execute. + """Method1 for execute. Note: It's public. @@ -191,18 +195,14 @@ def method1(self, param1, param2): Returns: True|False. - """ return True def __special__(self): """This function won't be documented that start with and - end with a double underscore. - """ + end with a double underscore.""" pass def _private(self): - """private members are not included. - """ + """Private members are not included.""" pass - diff --git a/neural_compressor/template/graph_optimization.yaml b/neural_compressor/template/graph_optimization.yaml index 4832b3a3464..a84712bdbb2 100644 --- a/neural_compressor/template/graph_optimization.yaml +++ b/neural_compressor/template/graph_optimization.yaml @@ -26,7 +26,7 @@ device: cpu # optional. default value i graph_optimization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. precisions: fp32, bf16 - op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space. + op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space. 'conv1': { 'activation': {'dtype': ['bf16']} }, diff --git a/neural_compressor/template/pruning.yaml b/neural_compressor/template/pruning.yaml index cefa8aabeda..6a1adb7d3c2 100644 --- a/neural_compressor/template/pruning.yaml +++ b/neural_compressor/template/pruning.yaml @@ -41,7 +41,7 @@ pruning: # mandatory only for prunin ToTensor: Normalize: mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] + std: [0.229, 0.224, 0.225] criterion: CrossEntropyLoss: reduction: None @@ -66,7 +66,7 @@ pruning: # mandatory only for prunin end_epoch: 2 update_frequency: 0.1 -evaluation: # optional. used to config evaluation process. +evaluation: # optional. used to config evaluation process. accuracy: # optional. used to evaluate accuracy of passing model. metric: # optional. required if user doesn't provide eval_func in neural_compressor.Pruning. topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. @@ -112,4 +112,3 @@ tuning: workspace: path: /path/to/saving/directory # optional. default workspace is ./nc_workspace/current_time_stamp, saving tuning history and deploy yaml. resume: /path/to/a/specified/snapshot/file # optional. if specified, resume from tuning history. - diff --git a/neural_compressor/template/ptq.yaml b/neural_compressor/template/ptq.yaml index 939c8dcfd3a..a432ca8066e 100644 --- a/neural_compressor/template/ptq.yaml +++ b/neural_compressor/template/ptq.yaml @@ -53,7 +53,7 @@ quantization: # optional. tuning constrai scheme: asym dtype: int8, fp32 algorithm: minmax, kl - op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space. + op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space. 'conv1': { 'activation': {'dtype': ['uint8', 'fp32'], 'algorithm': ['minmax', 'kl'], 'scheme':['sym']}, 'weight': {'dtype': ['int8', 'fp32'], 'algorithm': ['minmax']} diff --git a/neural_compressor/template/qat.yaml b/neural_compressor/template/qat.yaml index 21280a50c63..9572fc0772a 100644 --- a/neural_compressor/template/qat.yaml +++ b/neural_compressor/template/qat.yaml @@ -51,7 +51,7 @@ quantization: # optional. required for QA learning_rate: 0.1 momentum: 0.9 weight_decay: 0.0004 - nesterov: False + nesterov: False model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space. weight: granularity: per_channel @@ -63,7 +63,7 @@ quantization: # optional. required for QA scheme: asym dtype: int8 algorithm: minmax - op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space. + op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space. 'conv1': { 'activation': {'dtype': ['uint8', 'fp32'], 'algorithm': ['minmax', 'kl'], 'scheme':['sym']}, 'weight': {'dtype': ['int8', 'fp32'], 'algorithm': ['minmax']} @@ -129,11 +129,11 @@ tuning: workspace: path: /path/to/saving/directory # optional. default workspace is ./nc_workspace/current_time_stamp, saving tuning history and deploy yaml. resume: /path/to/a/specified/snapshot/file # optional. if specified, resume from tuning history. - + diagnosis: diagnosis_after_tuning: False # optional. bool, defaults to False, whether or not dump tensor and show in Bench after tuning finish. op_list: [] # optional. List[str], defaults to [], the op(s) to be dumped to reduce local disk consumption. the default setting means dump all quantized op instead of not dump anything. iteration_list: [1] # optional. List[int], defaults to [1], the iteration that needs to dump activation, the default value is [1] which means dump the activation of the first iteration. inspect_type: activation # optional. str, defaults to activation, dump weight, activation or all. can be one of 'weight', 'activation' or 'all'. - save_to_disk: True # optional. bool, defaults to True, whether or not to save the dumped tensor. - save_path: './nc_workspace/inspect_saved/' # optional. str, defaults to './nc_workspace/inspect_saved/', a path to save the dumped tensor. \ No newline at end of file + save_to_disk: True # optional. bool, defaults to True, whether or not to save the dumped tensor. + save_path: './nc_workspace/inspect_saved/' # optional. str, defaults to './nc_workspace/inspect_saved/', a path to save the dumped tensor. diff --git a/neural_compressor/training.py b/neural_compressor/training.py index 28905f1bd78..32055c736ed 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -17,20 +17,21 @@ """The configuration of the training loop.""" import os import pickle -import numpy as np import random +from typing import Callable, List, Union + +import numpy as np + +from neural_compressor import DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig +from neural_compressor.strategy.strategy import STRATEGIES from .adaptor import FRAMEWORKS -from .compression.callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks +from .compression.callbacks import DistillationCallbacks, PruningCallbacks, QuantizationAwareTrainingCallbacks from .config import _Config, options from .metric import register_customer_metric from .model.model import Model from .utils import logger from .utils.utility import time_limit -from neural_compressor.strategy.strategy import STRATEGIES -from neural_compressor import (DistillationConfig, QuantizationAwareTrainingConfig, - WeightPruningConfig) -from typing import Callable, List, Union class CompressionManager: @@ -64,6 +65,7 @@ class CompressionManager: compression_manager.callbacks.on_train_end() compression_manager.save("path_to_save") """ + def __init__(self, model: Callable, confs: Union[Callable, List], **kwargs): """Initialize the CompressionManager's parameters. @@ -90,23 +92,21 @@ def __init__(self, model: Callable, confs: Union[Callable, List], **kwargs): q_conf = conf framework_specific_info = { - 'device': conf.device, - 'random_seed': options.random_seed, - 'workspace_path': options.workspace, - 'q_dataloader': None, - 'backend': getattr(confs, "backend", 'default'), - 'format': getattr(confs, "quant_format", 'default'), - 'approach': conf.approach, + "device": conf.device, + "random_seed": options.random_seed, + "workspace_path": options.workspace, + "q_dataloader": None, + "backend": getattr(confs, "backend", "default"), + "format": getattr(confs, "quant_format", "default"), + "approach": conf.approach, } - if 'tensorflow' in conf.framework: - framework_specific_info.update( - {"inputs": conf.inputs, - "outputs": conf.outputs}) - - # TODO: will be removed once 'op_type_dict' and 'op_name_dicts' + if "tensorflow" in conf.framework: + framework_specific_info.update({"inputs": conf.inputs, "outputs": conf.outputs}) + + # TODO: will be removed once 'op_type_dict' and 'op_name_dicts' # for quant_aware_training can be handled in strategy - framework_specific_info['qat_optype_wise'] = conf.op_type_dict - framework_specific_info['qat_op_wise'] = conf.op_name_dict + framework_specific_info["qat_optype_wise"] = conf.op_type_dict + framework_specific_info["qat_op_wise"] = conf.op_name_dict self.adaptor = FRAMEWORKS[conf.framework](framework_specific_info) self.adaptor.model = self.model @@ -127,23 +127,21 @@ def __init__(self, model: Callable, confs: Union[Callable, List], **kwargs): self.model = Model(model, conf=confs) framework_specific_info = { - 'device': confs.device, - 'random_seed': options.random_seed, - 'workspace_path': options.workspace, - 'q_dataloader': None, - 'backend': getattr(confs, "backend", 'default'), - 'format': getattr(confs, "quant_format", 'default'), - 'approach': confs.approach, + "device": confs.device, + "random_seed": options.random_seed, + "workspace_path": options.workspace, + "q_dataloader": None, + "backend": getattr(confs, "backend", "default"), + "format": getattr(confs, "quant_format", "default"), + "approach": confs.approach, } - if 'tensorflow' in confs.framework: - framework_specific_info.update( - {"inputs": confs.inputs, - "outputs": confs.outputs}) - - # TODO: will be removed once 'op_type_dict' and 'op_name_dicts' + if "tensorflow" in confs.framework: + framework_specific_info.update({"inputs": confs.inputs, "outputs": confs.outputs}) + + # TODO: will be removed once 'op_type_dict' and 'op_name_dicts' # for quant_aware_training can be handled in strategy - framework_specific_info['qat_optype_wise'] = confs.op_type_dict - framework_specific_info['qat_op_wise'] = confs.op_name_dict + framework_specific_info["qat_optype_wise"] = confs.op_type_dict + framework_specific_info["qat_op_wise"] = confs.op_name_dict self.adaptor = FRAMEWORKS[confs.framework](framework_specific_info) self.adaptor.model = self.model @@ -195,12 +193,7 @@ def export( self.model.export(save_path, conf) # pylint: disable=no-member -def fit(compression_manager, - train_func, - eval_func=None, - eval_dataloader=None, - eval_metric=None, - **kwargs): +def fit(compression_manager, train_func, eval_func=None, eval_dataloader=None, eval_metric=None, **kwargs): """Compress the model with accuracy tuning for quantization. Args: @@ -297,11 +290,14 @@ def eval_func(model): strategy_name = "conservative" if strategy_name == "mse_v2": - if not (compression_manager.conf.quantization.framework.startswith("tensorflow") - or compression_manager.conf.quantization.framework == 'pytorch_fx'): # pragma: no cover + if not ( + compression_manager.conf.quantization.framework.startswith("tensorflow") + or compression_manager.conf.quantization.framework == "pytorch_fx" + ): # pragma: no cover strategy_name = "basic" - logger.warning(f"MSE_v2 does not support {compression_manager.conf.quantization.framework} now," - "use basic instead.") + logger.warning( + f"MSE_v2 does not support {compression_manager.conf.quantization.framework} now," "use basic instead." + ) logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") assert strategy_name in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy_name) @@ -309,12 +305,12 @@ def eval_func(model): _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ - if options.workspace and options.resume_from else None + resume_file = ( + os.path.abspath(os.path.expanduser(options.resume_from)) if options.workspace and options.resume_from else None + ) if resume_file: - assert os.path.exists(resume_file), \ - "The specified resume file {} doesn't exist!".format(resume_file) - with open(resume_file, 'rb') as f: + assert os.path.exists(resume_file), "The specified resume file {} doesn't exist!".format(resume_file) + with open(resume_file, "rb") as f: _resume = pickle.load(f).__dict__ if eval_func is None and eval_dataloader is None: # pragma: no cover @@ -329,7 +325,7 @@ def eval_func(model): eval_dataloader=eval_dataloader, eval_metric=metric, resume=_resume, - q_hooks=None + q_hooks=None, ) try: with time_limit(compression_manager.conf.quantization.tuning_criterion.timeout): @@ -341,17 +337,19 @@ def eval_func(model): except Exception as e: logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) import traceback + traceback.print_exc() finally: if strategy.best_qmodel: logger.info( - "Specified timeout or max trials is reached! " - "Found a quantized model which meet accuracy goal. Exit.") + "Specified timeout or max trials is reached! " "Found a quantized model which meet accuracy goal. Exit." + ) strategy.deploy_config() else: logger.error( "Specified timeout or max trials is reached! " - "Not found any quantized model which meet accuracy goal. Exit.") + "Not found any quantized model which meet accuracy goal. Exit." + ) compression_manager.model = strategy.best_qmodel @@ -400,6 +398,7 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs) class CallBacks: """Define the basic command for the training loop.""" + def __init__(self, callbacks_list): """Callbacks list are used for execute the training procedure. diff --git a/neural_compressor/utils/__init__.py b/neural_compressor/utils/__init__.py index 032235e886f..6ddc5c84856 100644 --- a/neural_compressor/utils/__init__.py +++ b/neural_compressor/utils/__init__.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Utils: provide useful methods and auxiliary functionalities.""" from .collect_layer_histogram import LayerHistogramCollector @@ -22,5 +21,15 @@ from .options import OPTIONS from .utility import alias_param -__all__ = ["LayerHistogramCollector", "log", "info", "debug", "warn", "warning", "error", "fatal", - "OPTIONS", "alias_param"] +__all__ = [ + "LayerHistogramCollector", + "log", + "info", + "debug", + "warn", + "warning", + "error", + "fatal", + "OPTIONS", + "alias_param", +] diff --git a/neural_compressor/utils/collect_layer_histogram.py b/neural_compressor/utils/collect_layer_histogram.py index f27a79c6ed1..32bc36ac684 100644 --- a/neural_compressor/utils/collect_layer_histogram.py +++ b/neural_compressor/utils/collect_layer_histogram.py @@ -14,15 +14,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """LayerHistogramCollector: save the histogram by layer.""" import numpy as np + from neural_compressor.utils.utility import combine_histogram + class LayerHistogramCollector(object): """The collector of the histogram by layer. - + Saves layer histogram in a dict with layer names as keys and lists of NDArrays as values. The collected histogram will be used for calculating the optimal thresholds for quantization using KL divergence. @@ -30,7 +31,7 @@ class LayerHistogramCollector(object): def __init__(self, num_bins=8001, layer_tensor=None, include_layer=None, logger=None): """Init a LayerHistogramCollector object. - + Args: num_bins: Number of bins for the histogram layer_tensor: A dict with layer names as keys and lists of NDArrays as values @@ -55,15 +56,12 @@ def collect(self): continue for arr in self.layer_tensor[name]: if self.logger: - self.logger.debug( - "Collect layer {} histogram of shape {}.".format(name, arr.shape)) + self.logger.debug("Collect layer {} histogram of shape {}.".format(name, arr.shape)) min_range = np.min(arr) max_range = np.max(arr) th = max(abs(min_range), abs(max_range)) if name in self.hist_dict: self.hist_dict[name] = combine_histogram(self.hist_dict[name], arr) else: - hist, hist_edges = np.histogram( - arr, bins=self.num_bins, range=(-th, th)) - self.hist_dict[name] = ( - hist, hist_edges, min_range, max_range, th) + hist, hist_edges = np.histogram(arr, bins=self.num_bins, range=(-th, th)) + self.hist_dict[name] = (hist, hist_edges, min_range, max_range, th) diff --git a/neural_compressor/utils/constant.py b/neural_compressor/utils/constant.py index d255be1db16..651d5b02a7d 100644 --- a/neural_compressor/utils/constant.py +++ b/neural_compressor/utils/constant.py @@ -14,81 +14,73 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Constants used for the configuration.""" -FP32 = {'weight': {'dtype': ['fp32']}, 'activation': {'dtype': ['fp32']}} -BF16 = {'weight': {'dtype': ['bf16']}, 'activation': {'dtype': ['bf16']}} +FP32 = {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}} +BF16 = {"weight": {"dtype": ["bf16"]}, "activation": {"dtype": ["bf16"]}} + +INT8_SYM_MINMAX_PERCHANNEL = { + "dtype": ["int8"], + "scheme": ["sym"], + "algorithm": ["minmax"], + "granularity": ["per_channel"], +} + +INT8_SYM_MINMAX_PERTENSOR = { + "dtype": ["int8"], + "scheme": ["sym"], + "algorithm": ["minmax"], + "granularity": ["per_tensor"], +} -INT8_SYM_MINMAX_PERCHANNEL = {'dtype': ['int8'], - 'scheme': ['sym'], - 'algorithm': ['minmax'], - 'granularity': ['per_channel']} +INT8_SYM_KL_PERTENSOR = {"dtype": ["int8"], "scheme": ["sym"], "algorithm": ["kl"], "granularity": ["per_tensor"]} -INT8_SYM_MINMAX_PERTENSOR = {'dtype': ['int8'], - 'scheme': ['sym'], - 'algorithm': ['minmax'], - 'granularity': ['per_tensor']} - -INT8_SYM_KL_PERTENSOR = {'dtype': ['int8'], - 'scheme': ['sym'], - 'algorithm': ['kl'], - 'granularity': ['per_tensor']} - -INT8_SYM_KL_PERCHANNEL = {'dtype': ['int8'], - 'scheme': ['sym'], - 'algorithm': ['kl'], - 'granularity': ['per_channel']} +INT8_SYM_KL_PERCHANNEL = {"dtype": ["int8"], "scheme": ["sym"], "algorithm": ["kl"], "granularity": ["per_channel"]} + +UINT8_ASYM_MINMAX_PERCHANNEL = { + "dtype": ["uint8"], + "scheme": ["asym"], + "algorithm": ["minmax"], + "granularity": ["per_channel"], +} + +UINT8_ASYM_MINMAX_PERTENSOR = { + "dtype": ["uint8"], + "scheme": ["asym"], + "algorithm": ["minmax"], + "granularity": ["per_tensor"], +} -UINT8_ASYM_MINMAX_PERCHANNEL = {'dtype': ['uint8'], - 'scheme': ['asym'], - 'algorithm': ['minmax'], - 'granularity': ['per_channel']} +UINT8_ASYM_KL_PERTENSOR = {"dtype": ["uint8"], "scheme": ["asym"], "algorithm": ["kl"], "granularity": ["per_tensor"]} -UINT8_ASYM_MINMAX_PERTENSOR = {'dtype': ['uint8'], - 'scheme': ['asym'], - 'algorithm': ['minmax'], - 'granularity': ['per_tensor']} - -UINT8_ASYM_KL_PERTENSOR = {'dtype': ['uint8'], - 'scheme': ['asym'], - 'algorithm': ['kl'], - 'granularity': ['per_tensor']} - -UINT8_ASYM_KL_PERCHANNEL = {'dtype': ['uint8'], - 'scheme': ['asym'], - 'algorithm': ['kl'], - 'granularity': ['per_channel']} +UINT8_ASYM_KL_PERCHANNEL = {"dtype": ["uint8"], "scheme": ["asym"], "algorithm": ["kl"], "granularity": ["per_channel"]} # Options for recipes, the first options is the default value. RECIPES = { - "common":{ + "common": { # 'fast_bias_correction' : [False, True], # Disable it first # 'weight_correction' : [False, True], # Disable it first - }, + }, "tensorflow": { - 'smooth_quant': [False, True], - 'first_conv_or_matmul_quantization' : [True, False], - 'last_conv_or_matmul_quantization' : [True, False], - }, + "smooth_quant": [False, True], + "first_conv_or_matmul_quantization": [True, False], + "last_conv_or_matmul_quantization": [True, False], + }, "onnx": { - 'smooth_quant': [False, True], - 'first_conv_or_matmul_quantization' : [True, False], - 'last_conv_or_matmul_quantization' : [True, False], - 'pre_post_process_quantization' : [True, False], - }, - "pytorch": { - 'smooth_quant': [False, True], - 'layer_wise_quant': [False, True] - }, + "smooth_quant": [False, True], + "first_conv_or_matmul_quantization": [True, False], + "last_conv_or_matmul_quantization": [True, False], + "pre_post_process_quantization": [True, False], + }, + "pytorch": {"smooth_quant": [False, True], "layer_wise_quant": [False, True]}, } RECIPES_PRIORITY = [ - "smooth_quant", #Only support by ort/pt currently + "smooth_quant", # Only support by ort/pt currently # "fast_bias_correction", # Disable it first # "weight_correction", # Disable it first "first_conv_or_matmul_quantization", "last_conv_or_matmul_quantization", "pre_post_process_quantization", - ] \ No newline at end of file +] diff --git a/neural_compressor/utils/create_obj_from_config.py b/neural_compressor/utils/create_obj_from_config.py index 760ff9703ca..20204b5fa49 100644 --- a/neural_compressor/utils/create_obj_from_config.py +++ b/neural_compressor/utils/create_obj_from_config.py @@ -14,14 +14,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Utility methods to create corresponding objects from configuration.""" -from neural_compressor.metric import METRICS -from neural_compressor.data import Datasets, TRANSFORMS, FILTERS, DATALOADERS -from collections import OrderedDict import copy import gc +from collections import OrderedDict + +from neural_compressor.data import DATALOADERS, FILTERS, TRANSFORMS, Datasets +from neural_compressor.metric import METRICS DEFAULT_BATCH_SIZE = 64 @@ -38,8 +38,7 @@ def get_func_from_config(func_dict, cfg, compose=True): func_args.append(func_value) func_list.append(func_dict[func_name](*func_args, **func_kwargs)) - func = func_dict['Compose'](func_list) if compose else \ - (func_list[0] if len(func_list) > 0 else None) + func = func_dict["Compose"](func_list) if compose else (func_list[0] if len(func_list) > 0 else None) return func @@ -52,16 +51,18 @@ def get_metrics(metrics, cfg, compose=True): """Get the metrics function from configuration.""" return get_func_from_config(metrics, cfg, compose) + def get_postprocess(postprocesses, cfg, compose=True): """Get the postprocess function from configuration.""" return get_func_from_config(postprocesses, cfg, compose) + def get_algorithm(algorithms, cfg, compose=False): """Get the algorithms from configuration. - + Args: algorithms: the algorithm management. - cfg: a dict contain the algo name and use it or not. + cfg: a dict contain the algo name and use it or not. compose: compose all algo or not. Defaults to False. Returns: @@ -69,16 +70,17 @@ def get_algorithm(algorithms, cfg, compose=False): """ # recipes contains quantization part, only use algorithms in that algo_conf = algorithms.support_algorithms().intersection(set(cfg.keys())) - #(TODO) only support open/close according to cfg + # (TODO) only support open/close according to cfg return [algorithms()[algo] for algo in algo_conf if cfg[algo]] + def create_dataset(framework, data_source, cfg_preprocess, cfg_filter): """Create the dataset from the data source.""" transform_list = [] # generate framework specific transforms preprocess = None if cfg_preprocess is not None: - preprocesses = TRANSFORMS(framework, 'preprocess') + preprocesses = TRANSFORMS(framework, "preprocess") preprocess = get_preprocess(preprocesses, cfg_preprocess) # even we can unify transform, how can we handle the IO, or we do the transform here datasets = Datasets(framework) @@ -91,38 +93,34 @@ def create_dataset(framework, data_source, cfg_preprocess, cfg_filter): filter_dataset_type = filter_type + dataset_type filter = filters[filter_dataset_type](**cfg_filter[filter_type]) # in this case we should prepare eval_data and calib_data sperately - dataset = datasets[dataset_type](**data_source[dataset_type], - transform=preprocess, filter=filter) + dataset = datasets[dataset_type](**data_source[dataset_type], transform=preprocess, filter=filter) return dataset def create_dataloader(framework, dataloader_cfg): """Create the dataloader according to the framework.""" - batch_size = int(dataloader_cfg['batch_size']) \ - if dataloader_cfg.get('batch_size') is not None else DEFAULT_BATCH_SIZE - last_batch = dataloader_cfg['last_batch'] \ - if dataloader_cfg.get('last_batch') is not None else 'rollover' - shuffle = dataloader_cfg['shuffle'] \ - if dataloader_cfg.get('shuffle') is not None else False - distributed = dataloader_cfg['distributed'] \ - if dataloader_cfg.get('distributed') is not None else False - - dataset = create_dataset(framework, - copy.deepcopy(dataloader_cfg['dataset']), - copy.deepcopy(dataloader_cfg['transform']), - copy.deepcopy(dataloader_cfg['filter']),) - - return DATALOADERS[framework](dataset=dataset, - batch_size=batch_size, - last_batch=last_batch, - shuffle=shuffle, - distributed=distributed) - - -def create_eval_func(framework, dataloader, adaptor, - metric, postprocess_cfg=None, - iteration=-1, tensorboard=False, - fp32_baseline=False): + batch_size = ( + int(dataloader_cfg["batch_size"]) if dataloader_cfg.get("batch_size") is not None else DEFAULT_BATCH_SIZE + ) + last_batch = dataloader_cfg["last_batch"] if dataloader_cfg.get("last_batch") is not None else "rollover" + shuffle = dataloader_cfg["shuffle"] if dataloader_cfg.get("shuffle") is not None else False + distributed = dataloader_cfg["distributed"] if dataloader_cfg.get("distributed") is not None else False + + dataset = create_dataset( + framework, + copy.deepcopy(dataloader_cfg["dataset"]), + copy.deepcopy(dataloader_cfg["transform"]), + copy.deepcopy(dataloader_cfg["filter"]), + ) + + return DATALOADERS[framework]( + dataset=dataset, batch_size=batch_size, last_batch=last_batch, shuffle=shuffle, distributed=distributed + ) + + +def create_eval_func( + framework, dataloader, adaptor, metric, postprocess_cfg=None, iteration=-1, tensorboard=False, fp32_baseline=False +): """The interface to create evaluate function from config. Args: @@ -144,24 +142,27 @@ def create_eval_func(framework, dataloader, adaptor, postprocess = None if postprocess_cfg is not None: postprocesses = TRANSFORMS(framework, "postprocess") - postprocess = get_postprocess(postprocesses, postprocess_cfg['transform']) + postprocess = get_postprocess(postprocesses, postprocess_cfg["transform"]) if isinstance(metric, dict): fwk_metrics = METRICS(framework) metrics = [] for name, val in metric.items(): - if isinstance(val, int) and \ - len([i for i in gc.get_objects() if id(i) == val]) > 0 and \ - 'user_' + type([i for i in gc.get_objects() if id(i) == val][0]).__name__ == name: + if ( + isinstance(val, int) + and len([i for i in gc.get_objects() if id(i) == val]) > 0 + and "user_" + type([i for i in gc.get_objects() if id(i) == val][0]).__name__ == name + ): metrics.extend([i for i in gc.get_objects() if id(i) == val]) - elif name not in ['weight', 'higher_is_better']: + elif name not in ["weight", "higher_is_better"]: metrics.append(get_metrics(fwk_metrics, {name: val}, compose=False)) else: metrics = metric def eval_func(model, measurer=None): - return adaptor.evaluate(model, dataloader, postprocess, - metrics, measurer, iteration, - tensorboard, fp32_baseline) + return adaptor.evaluate( + model, dataloader, postprocess, metrics, measurer, iteration, tensorboard, fp32_baseline + ) + # TODO: to find a better way eval_func.builtin = True @@ -188,42 +189,47 @@ def create_train_func(framework, dataloader, adaptor, train_cfg, hooks=None, cal assert dataloader, "dataloader should NOT be empty when train_func is None" assert adaptor, "adaptor should NOT be empty" - from neural_compressor.experimental.common import Optimizers, Criterions + from neural_compressor.experimental.common import Criterions, Optimizers + postprocess_cfg = train_cfg.postprocess if postprocess_cfg is not None: postprocesses = TRANSFORMS(framework, "postprocess") - postprocess = get_postprocess(postprocesses, postprocess_cfg['transform']) + postprocess = get_postprocess(postprocesses, postprocess_cfg["transform"]) else: postprocess = None if isinstance(train_cfg.optimizer, dict): - assert train_cfg.optimizer and len(train_cfg.optimizer) == 1, \ - "optimizer should only set once" + assert train_cfg.optimizer and len(train_cfg.optimizer) == 1, "optimizer should only set once" key, value = next(iter(train_cfg.optimizer.items())) optimizer = Optimizers(framework)[key](value) optimizer = optimizer() else: if framework == "pytorch": - optimizer = (lambda mp, p: train_cfg.optimizer, {'p':0}) - elif framework == 'tensorflow': - optimizer = (lambda p: train_cfg.optimizer, {'p':0}) + optimizer = (lambda mp, p: train_cfg.optimizer, {"p": 0}) + elif framework == "tensorflow": + optimizer = (lambda p: train_cfg.optimizer, {"p": 0}) if isinstance(train_cfg.criterion, dict): - assert train_cfg.criterion and len(train_cfg.criterion) == 1, \ - "criterion should only set once" + assert train_cfg.criterion and len(train_cfg.criterion) == 1, "criterion should only set once" key, value = next(iter(train_cfg.criterion.items())) criterion = Criterions(framework)[next(iter(train_cfg.criterion))](value) criterion = criterion() else: - criterion = (lambda p: train_cfg.criterion, {'p':0}) + criterion = (lambda p: train_cfg.criterion, {"p": 0}) - default_dict = {k: train_cfg[k] for k in train_cfg.keys() - {'optimizer', 'criterion', - 'dataloader'}} - default_dict['callbacks'] = callbacks + default_dict = {k: train_cfg[k] for k in train_cfg.keys() - {"optimizer", "criterion", "dataloader"}} + default_dict["callbacks"] = callbacks def train_func(model): - return adaptor.train(model, dataloader, optimizer_tuple=optimizer, - criterion_tuple=criterion, hooks=hooks, - postprocess=postprocess, kwargs=default_dict) + return adaptor.train( + model, + dataloader, + optimizer_tuple=optimizer, + criterion_tuple=criterion, + hooks=hooks, + postprocess=postprocess, + kwargs=default_dict, + ) + # TODO: to find a better way train_func.builtin = True diff --git a/neural_compressor/utils/kl_divergence.py b/neural_compressor/utils/kl_divergence.py index 5c960a9b7cd..6e0691f50bf 100644 --- a/neural_compressor/utils/kl_divergence.py +++ b/neural_compressor/utils/kl_divergence.py @@ -14,11 +14,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """KL Divergence: measure probability distribution difference to determine the thresholds per quantized op.""" -class KL_Divergence(object): # pragma: no cover + +class KL_Divergence(object): # pragma: no cover """The class of supporting KL divergence calibration algorithm.""" + def __init__(self): """Init a KL Divergence object.""" pass @@ -35,11 +36,9 @@ def expand_quantized_bins(self, quantized_bins, reference_bins): if zero_count == num_merged_bins: avg_bin_ele = 0 else: - avg_bin_ele = quantized_bins[idx] / (num_merged_bins - - zero_count + 0.0) + avg_bin_ele = quantized_bins[idx] / (num_merged_bins - zero_count + 0.0) for idx1 in range(j_start, j_end): - expanded_quantized_bins[ - idx1] = 0 if reference_bins[idx1] == 0 else avg_bin_ele + expanded_quantized_bins[idx1] = 0 if reference_bins[idx1] == 0 else avg_bin_ele j_start += num_merged_bins j_end += num_merged_bins if idx + 1 == len(quantized_bins) - 1: @@ -59,21 +58,14 @@ def safe_entropy(self, reference_distr_P, P_sum, candidate_distr_Q, Q_sum): tmp_sum2 += 0 else: if q_idx == 0: - print("Fatal error!, idx = " + str(idx) + - " qindex = 0! p_idx = " + str(p_idx)) + print("Fatal error!, idx = " + str(idx) + " qindex = 0! p_idx = " + str(p_idx)) import math + tmp_sum1 += p_idx * (math.log(Q_sum * p_idx)) tmp_sum2 += p_idx * (math.log(P_sum * q_idx)) return (tmp_sum1 - tmp_sum2) / P_sum - def get_threshold(self, - hist, - hist_edges, - min_val, - max_val, - num_bins, - quantized_type, - num_quantized_bins=255): + def get_threshold(self, hist, hist_edges, min_val, max_val, num_bins, quantized_type, num_quantized_bins=255): """The interface of getting threshold per op using KL divergency algorithm.""" if min_val >= 0: ending_iter = num_bins - 1 @@ -118,18 +110,15 @@ def get_threshold(self, j_end = num_merged_bins for idx in range(num_quantized_bins): - candidate_distr_Q_quantized[idx] = sum( - candidate_distr_Q[j_start:j_end]) + candidate_distr_Q_quantized[idx] = sum(candidate_distr_Q[j_start:j_end]) j_start += num_merged_bins j_end += num_merged_bins if idx + 1 == num_quantized_bins - 1: j_end = i - candidate_distr_Q = self.expand_quantized_bins( - candidate_distr_Q_quantized, reference_distr_bins) + candidate_distr_Q = self.expand_quantized_bins(candidate_distr_Q_quantized, reference_distr_bins) P_sum = sum(reference_distr_P) Q_sum = sum(candidate_distr_Q) - kl_divergence = self.safe_entropy(reference_distr_P, P_sum, - candidate_distr_Q, Q_sum) + kl_divergence = self.safe_entropy(reference_distr_P, P_sum, candidate_distr_Q, Q_sum) if not kl_inited: min_kl_divergence = kl_divergence min_kl_index = i @@ -148,4 +137,4 @@ def get_threshold(self, else: break min_kl_index = starting_iter - return (min_kl_index + 0.5) * bin_width \ No newline at end of file + return (min_kl_index + 0.5) * bin_width diff --git a/neural_compressor/utils/load_huggingface.py b/neural_compressor/utils/load_huggingface.py index cf4190811b2..c68259a4abc 100644 --- a/neural_compressor/utils/load_huggingface.py +++ b/neural_compressor/utils/load_huggingface.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Huggingface Loader: provides access to Huggingface pretrained models.""" import copy @@ -31,7 +30,8 @@ class OptimizedModel: """The class provides a method from_pretrained to access Huggingface models.""" - def __init__(self, *args, **kwargs): # pragma: no cover + + def __init__(self, *args, **kwargs): # pragma: no cover """Init method (Not used).""" raise EnvironmentError( f"{self.__class__.__name__} is designed to be instantiated using the" @@ -39,11 +39,7 @@ def __init__(self, *args, **kwargs): # pragma: no cover ) @classmethod - def from_pretrained( - cls, - model_name_or_path: str, - **kwargs - ) -> torch.nn.Module: + def from_pretrained(cls, model_name_or_path: str, **kwargs) -> torch.nn.Module: """Instantiate a quantized pytorch model from a given Intel Neural Compressor (INC) configuration file. Args: @@ -67,6 +63,7 @@ def from_pretrained( q_model: Quantized model. """ from neural_compressor.utils.pytorch import load + config = kwargs.pop("config", None) cache_dir = kwargs.pop("cache_dir", None) force_download = kwargs.pop("force_download", False) @@ -85,7 +82,7 @@ def from_pretrained( **kwargs, ) - model_class = eval(f'transformers.{config.architectures[0]}') + model_class = eval(f"transformers.{config.architectures[0]}") if config.torch_dtype is not torch.int8: model = model_class.from_pretrained( model_name_or_path, @@ -102,23 +99,27 @@ def from_pretrained( keys_to_ignore_on_load_unexpected = copy.deepcopy( getattr(model_class, "_keys_to_ignore_on_load_unexpected", None) ) - keys_to_ignore_on_load_missing = \ - copy.deepcopy(getattr(model_class, "_keys_to_ignore_on_load_missing", None)) + keys_to_ignore_on_load_missing = copy.deepcopy( + getattr(model_class, "_keys_to_ignore_on_load_missing", None) + ) # Avoid unnecessary warnings resulting from quantized model initialization - quantized_keys_to_ignore_on_load = [r"zero_point", r"scale", - r"packed_params", r"constant", - r"module", r"best_configure"] + quantized_keys_to_ignore_on_load = [ + r"zero_point", + r"scale", + r"packed_params", + r"constant", + r"module", + r"best_configure", + ] if keys_to_ignore_on_load_unexpected is None: model_class._keys_to_ignore_on_load_unexpected = quantized_keys_to_ignore_on_load else: - model_class._keys_to_ignore_on_load_unexpected.extend( - quantized_keys_to_ignore_on_load - ) + model_class._keys_to_ignore_on_load_unexpected.extend(quantized_keys_to_ignore_on_load) missing_keys_to_ignore_on_load = [r"weight", r"bias"] if keys_to_ignore_on_load_missing is None: model_class._keys_to_ignore_on_load_missing = missing_keys_to_ignore_on_load - else: # pragma: no cover + else: # pragma: no cover model_class._keys_to_ignore_on_load_missing.extend(missing_keys_to_ignore_on_load) model = model_class.from_pretrained( @@ -129,20 +130,19 @@ def from_pretrained( use_auth_token=use_auth_token, revision=revision, **kwargs, - ) + ) model_class._keys_to_ignore_on_load_unexpected = keys_to_ignore_on_load_unexpected model_class._keys_to_ignore_on_load_missing = keys_to_ignore_on_load_missing - if not os.path.isdir(model_name_or_path) and \ - not os.path.isfile(model_name_or_path): # pragma: no cover + if not os.path.isdir(model_name_or_path) and not os.path.isfile(model_name_or_path): # pragma: no cover # pylint: disable=E0611 from packaging.version import Version - if Version(transformers.__version__) < Version('4.22.0'): + + if Version(transformers.__version__) < Version("4.22.0"): from transformers.file_utils import cached_path, hf_bucket_url - weights_file = hf_bucket_url(model_name_or_path, - filename=WEIGHTS_NAME, - revision=revision) + + weights_file = hf_bucket_url(model_name_or_path, filename=WEIGHTS_NAME, revision=revision) try: # Load from URL or cache if already cached resolved_weights_file = cached_path( @@ -152,7 +152,7 @@ def from_pretrained( resume_download=resume_download, use_auth_token=use_auth_token, ) - except EnvironmentError as err: # pragma: no cover + except EnvironmentError as err: # pragma: no cover logger.error(err) msg = ( f"Can't load weights for '{model_name_or_path}'. Make sure that:\n\n" @@ -164,16 +164,19 @@ def from_pretrained( f"named one of {WEIGHTS_NAME}\n\n" ) if revision is not None: - msg += (f"- or '{revision}' is a valid git identifier " - f"(branch name, a tag name, or a commit id) that " - f"exists for this model name as listed on its model " - f"page on 'https://huggingface.co/models'\n\n" - ) + msg += ( + f"- or '{revision}' is a valid git identifier " + f"(branch name, a tag name, or a commit id) that " + f"exists for this model name as listed on its model " + f"page on 'https://huggingface.co/models'\n\n" + ) raise EnvironmentError(msg) else: from pathlib import Path + from huggingface_hub import hf_hub_download from transformers.utils import TRANSFORMERS_CACHE, is_offline_mode + local_files_only = False if is_offline_mode(): logger.info("Offline mode: forcing local_files_only=True") @@ -202,17 +205,17 @@ def from_pretrained( f"named one of {WEIGHTS_NAME}\n\n" ) if revision is not None: - msg += (f"- or '{revision}' is a valid git identifier " - f"(branch name, a tag name, or a commit id) that " - f"exists for this model name as listed on its model " - f"page on 'https://huggingface.co/models'\n\n" - ) + msg += ( + f"- or '{revision}' is a valid git identifier " + f"(branch name, a tag name, or a commit id) that " + f"exists for this model name as listed on its model " + f"page on 'https://huggingface.co/models'\n\n" + ) raise EnvironmentError(msg) q_model = load(resolved_weights_file, model) else: - weights_file = os.path.join(os.path.abspath( - os.path.expanduser(model_name_or_path)), WEIGHTS_NAME) + weights_file = os.path.join(os.path.abspath(os.path.expanduser(model_name_or_path)), WEIGHTS_NAME) q_model = load(weights_file, model) del model diff --git a/neural_compressor/utils/logger.py b/neural_compressor/utils/logger.py index deb8fdc97f9..643c25ca24b 100644 --- a/neural_compressor/utils/logger.py +++ b/neural_compressor/utils/logger.py @@ -14,15 +14,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Logger: handles logging functionalities.""" -import os import logging +import os class Logger(object): """Logger class.""" + __instance = None def __new__(cls): @@ -34,13 +34,11 @@ def __new__(cls): def _log(self): """Setup the logger format and handler.""" - LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper() + LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper() self._logger = logging.getLogger("neural_compressor") self._logger.handlers.clear() self._logger.setLevel(LOGLEVEL) - formatter = logging.Formatter( - '%(asctime)s [%(levelname)s] %(message)s', - "%Y-%m-%d %H:%M:%S") + formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s", "%Y-%m-%d %H:%M:%S") streamHandler = logging.StreamHandler() streamHandler.setFormatter(formatter) self._logger.addHandler(streamHandler) @@ -53,25 +51,16 @@ def get_logger(self): def _pretty_dict(value, indent=0): """Make the logger dict pretty.""" - prefix = '\n' + ' ' * (indent + 4) + prefix = "\n" + " " * (indent + 4) if isinstance(value, dict): - items = [ - prefix + repr(key) + ': ' + _pretty_dict(value[key], indent + 4) - for key in value - ] - return '{%s}' % (','.join(items) + '\n' + ' ' * indent) + items = [prefix + repr(key) + ": " + _pretty_dict(value[key], indent + 4) for key in value] + return "{%s}" % (",".join(items) + "\n" + " " * indent) elif isinstance(value, list): - items = [ - prefix + _pretty_dict(item, indent + 4) - for item in value - ] - return '[%s]' % (','.join(items) + '\n' + ' ' * indent) + items = [prefix + _pretty_dict(item, indent + 4) for item in value] + return "[%s]" % (",".join(items) + "\n" + " " * indent) elif isinstance(value, tuple): - items = [ - prefix + _pretty_dict(item, indent + 4) - for item in value - ] - return '(%s)' % (','.join(items) + '\n' + ' ' * indent) + items = [prefix + _pretty_dict(item, indent + 4) for item in value] + return "(%s)" % (",".join(items) + "\n" + " " * indent) else: return repr(value) @@ -83,7 +72,7 @@ def _pretty_dict(value, indent=0): def log(level, msg, *args, **kwargs): """Output log with the level as a parameter.""" if isinstance(msg, dict): - for _, line in enumerate(_pretty_dict(msg).split('\n')): + for _, line in enumerate(_pretty_dict(msg).split("\n")): Logger().get_logger().log(level, line, *args, **kwargs) else: Logger().get_logger().log(level, msg, *args, **kwargs) @@ -92,7 +81,7 @@ def log(level, msg, *args, **kwargs): def debug(msg, *args, **kwargs): """Output log with the debug level.""" if isinstance(msg, dict): - for _, line in enumerate(_pretty_dict(msg).split('\n')): + for _, line in enumerate(_pretty_dict(msg).split("\n")): Logger().get_logger().debug(line, *args, **kwargs) else: Logger().get_logger().debug(msg, *args, **kwargs) @@ -101,7 +90,7 @@ def debug(msg, *args, **kwargs): def error(msg, *args, **kwargs): """Output log with the error level.""" if isinstance(msg, dict): - for _, line in enumerate(_pretty_dict(msg).split('\n')): + for _, line in enumerate(_pretty_dict(msg).split("\n")): Logger().get_logger().error(line, *args, **kwargs) else: Logger().get_logger().error(msg, *args, **kwargs) @@ -110,7 +99,7 @@ def error(msg, *args, **kwargs): def fatal(msg, *args, **kwargs): """Output log with the fatal level.""" if isinstance(msg, dict): - for _, line in enumerate(_pretty_dict(msg).split('\n')): + for _, line in enumerate(_pretty_dict(msg).split("\n")): Logger().get_logger().fatal(line, *args, **kwargs) else: Logger().get_logger().fatal(msg, *args, **kwargs) @@ -119,7 +108,7 @@ def fatal(msg, *args, **kwargs): def info(msg, *args, **kwargs): """Output log with the info level.""" if isinstance(msg, dict): - for _, line in enumerate(_pretty_dict(msg).split('\n')): + for _, line in enumerate(_pretty_dict(msg).split("\n")): Logger().get_logger().info(line, *args, **kwargs) else: Logger().get_logger().info(msg, *args, **kwargs) @@ -128,7 +117,7 @@ def info(msg, *args, **kwargs): def warn(msg, *args, **kwargs): """Output log with the warning level.""" if isinstance(msg, dict): - for _, line in enumerate(_pretty_dict(msg).split('\n')): + for _, line in enumerate(_pretty_dict(msg).split("\n")): Logger().get_logger().warning(line, *args, **kwargs) else: Logger().get_logger().warning(msg, *args, **kwargs) @@ -137,7 +126,7 @@ def warn(msg, *args, **kwargs): def warning(msg, *args, **kwargs): """Output log with the warining level (Alias of the method warn).""" if isinstance(msg, dict): - for _, line in enumerate(_pretty_dict(msg).split('\n')): + for _, line in enumerate(_pretty_dict(msg).split("\n")): Logger().get_logger().warning(line, *args, **kwargs) else: Logger().get_logger().warning(msg, *args, **kwargs) diff --git a/neural_compressor/utils/neural_insights_utils.py b/neural_compressor/utils/neural_insights_utils.py index f85abda3a29..32d5b4f46c6 100644 --- a/neural_compressor/utils/neural_insights_utils.py +++ b/neural_compressor/utils/neural_insights_utils.py @@ -14,16 +14,16 @@ # See the License for the specific language governing permissions and # limitations under the License. """Neural Insights utils functions.""" -from typing import Optional, Any +from typing import Any, Optional from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.utils import logger def register_neural_insights_workload( - workload_location: str, - model: Any, - workload_mode: str, + workload_location: str, + model: Any, + workload_mode: str, ) -> Optional[str]: """Register workload to Neural Insights. @@ -37,8 +37,9 @@ def register_neural_insights_workload( """ try: import os + from neural_insights import NeuralInsights - from neural_insights.utils.consts import WorkloadModes, WORKDIR_LOCATION + from neural_insights.utils.consts import WORKDIR_LOCATION, WorkloadModes try: mode = WorkloadModes(workload_mode) @@ -50,10 +51,11 @@ def register_neural_insights_workload( model_path: str = os.path.abspath(model.model_path) elif isinstance(model, ONNXModel): import onnx + model_path: str = os.path.join(workload_location, "input_model.onnx") os.makedirs(workload_location, exist_ok=True) onnx.save(model.model, model_path) - assert isinstance(model_path, str), 'Model path not detected' + assert isinstance(model_path, str), "Model path not detected" neural_insights = NeuralInsights(workdir_location=WORKDIR_LOCATION) ni_workload_uuid = neural_insights.add_workload( @@ -83,6 +85,7 @@ def update_neural_insights_workload(workload_uuid: str, status: str) -> None: try: from neural_insights import NeuralInsights from neural_insights.utils.consts import WORKDIR_LOCATION + neural_insights = NeuralInsights(workdir_location=WORKDIR_LOCATION) neural_insights.update_workload_status(workload_uuid, status) except ImportError: @@ -92,9 +95,9 @@ def update_neural_insights_workload(workload_uuid: str, status: str) -> None: def update_neural_insights_workload_accuracy_data( - workload_uuid: str, - baseline_accuracy: float, - optimized_accuracy: float, + workload_uuid: str, + baseline_accuracy: float, + optimized_accuracy: float, ) -> None: """Update accuracy data of specific workload. @@ -109,6 +112,7 @@ def update_neural_insights_workload_accuracy_data( try: from neural_insights import NeuralInsights from neural_insights.utils.consts import WORKDIR_LOCATION + neural_insights = NeuralInsights(workdir_location=WORKDIR_LOCATION) neural_insights.update_workload_accuracy_data( workload_uuid, diff --git a/neural_compressor/utils/options.py b/neural_compressor/utils/options.py index b683ecf7389..12f959e8e4e 100644 --- a/neural_compressor/utils/options.py +++ b/neural_compressor/utils/options.py @@ -14,28 +14,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """ONNX options.""" from ..conf.dotdict import DotDict + class onnxrt: """ONNX helper configuration.""" - graph_optimization = DotDict({'level': None, 'gemm2matmul': True}) - qdq_setting = DotDict({'OpTypesToExcludeOutputQuantizatioin': [], - 'AddQDQPairToWeight': False, - 'DedicatedQDQPair': False}) -OPTIONS = {'tensorflow': None, - 'tensorflow_itex': None, - 'pytorch': None, - 'pytorch_fx': None, - 'pytorch_ipex': None, - 'mxnet': None, - 'onnxrt_integerops': onnxrt, - 'onnxrt_qlinearops': onnxrt, - 'onnxrt_qdq': onnxrt, - 'onnxruntime': onnxrt, - } + graph_optimization = DotDict({"level": None, "gemm2matmul": True}) + qdq_setting = DotDict( + {"OpTypesToExcludeOutputQuantizatioin": [], "AddQDQPairToWeight": False, "DedicatedQDQPair": False} + ) +OPTIONS = { + "tensorflow": None, + "tensorflow_itex": None, + "pytorch": None, + "pytorch_fx": None, + "pytorch_ipex": None, + "mxnet": None, + "onnxrt_integerops": onnxrt, + "onnxrt_qlinearops": onnxrt, + "onnxrt_qdq": onnxrt, + "onnxruntime": onnxrt, +} diff --git a/neural_compressor/utils/pytorch.py b/neural_compressor/utils/pytorch.py index fb06ec0e2a0..3a7709ee72d 100644 --- a/neural_compressor/utils/pytorch.py +++ b/neural_compressor/utils/pytorch.py @@ -14,25 +14,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Pytorch utilities.""" -from ..adaptor.pytorch import _cfg_to_qconfig, _cfgs_to_fx_cfgs -from ..adaptor.pytorch import _propagate_qconfig, get_torch_version -from ..adaptor.pytorch import PyTorch_FXAdaptor -from ..adaptor.torch_utils import util -from . import logger -from packaging.version import Version -from torch.quantization import convert +import json +import os + import torch import torch.quantization as tq import yaml -import os -import json +from packaging.version import Version +from torch.quantization import convert +from ..adaptor.pytorch import ( + PyTorch_FXAdaptor, + _cfg_to_qconfig, + _cfgs_to_fx_cfgs, + _propagate_qconfig, + get_torch_version, +) +from ..adaptor.torch_utils import util +from . import logger -yaml.SafeLoader.add_constructor('tag:yaml.org,2002:python/tuple', - lambda loader, node: tuple(loader.construct_sequence(node))) +yaml.SafeLoader.add_constructor( + "tag:yaml.org,2002:python/tuple", lambda loader, node: tuple(loader.construct_sequence(node)) +) def is_int8_model(model): @@ -44,9 +49,10 @@ def is_int8_model(model): Returns: result(bool): Return True if the input model is a int8 model. """ + def _is_int8_value(value): """Check whether the input tensor is a int8 tensor.""" - if hasattr(value, 'dtype') and 'int8' in str(value.dtype): + if hasattr(value, "dtype") and "int8" in str(value.dtype): return True else: return False @@ -62,11 +68,12 @@ def _is_int8_value(value): return True return False -def _set_sub_module_scale_zeropoint(model, tune_cfg, prefix=''): + +def _set_sub_module_scale_zeropoint(model, tune_cfg, prefix=""): """Set activation scale and zero_point for converted sub modules recursively. Args: - q_model (dir): Int8 model converted from fp32 model. + q_model (dir): Int8 model converted from fp32 model. scale=1, zero_point=0 for each module tune_cfg (object): This file provides scale and zero_point of \ output activation of each quantized module. @@ -76,12 +83,12 @@ def _set_sub_module_scale_zeropoint(model, tune_cfg, prefix=''): (object): quantized model with scale and zero_point """ for name, module in model.named_children(): - op_name = prefix + '.' + name if prefix != '' else name - if op_name in tune_cfg['fx_sub_module_list']: - for key_name in tune_cfg['get_attr'].keys(): - node_name, node_target = key_name.split('--') + op_name = prefix + "." + name if prefix != "" else name + if op_name in tune_cfg["fx_sub_module_list"]: + for key_name in tune_cfg["get_attr"].keys(): + node_name, node_target = key_name.split("--") if op_name == node_name: - setattr(model, node_target, torch.tensor(tune_cfg['get_attr'][key_name])) + setattr(model, node_target, torch.tensor(tune_cfg["get_attr"][key_name])) else: _set_sub_module_scale_zeropoint(module, tune_cfg, op_name) @@ -90,7 +97,7 @@ def _set_activation_scale_zeropoint(q_model, tune_cfg): """Set activation scale and zero_point for converted model. Args: - q_model (dir): Int8 model converted from fp32 model. + q_model (dir): Int8 model converted from fp32 model. scale=1, zero_point=0 for each module tune_cfg (object): This file provides scale and zero_point of \ output activation of each quantized module. @@ -100,81 +107,87 @@ def _set_activation_scale_zeropoint(q_model, tune_cfg): """ # pylint: disable=not-callable # tune_ops splits tune_cfg['op'].keys() into {op_name: op_type} - if tune_cfg['approach'] == "post_training_dynamic_quant": + if tune_cfg["approach"] == "post_training_dynamic_quant": return tune_ops = dict() - for key in tune_cfg['op']: + for key in tune_cfg["op"]: tune_ops[key[0]] = key[1] for name, module in q_model.named_modules(): if name in tune_ops.keys(): key = (name, tune_ops[name]) - value = tune_cfg['op'][key] + value = tune_cfg["op"][key] assert isinstance(value, dict) - if 'scale' in value['activation'].keys(): - module.scale = torch.tensor(value['activation']['scale']) - if 'zero_point' in value['activation'].keys(): - module.zero_point = torch.tensor(value['activation']['zero_point']) + if "scale" in value["activation"].keys(): + module.scale = torch.tensor(value["activation"]["scale"]) + if "zero_point" in value["activation"].keys(): + module.zero_point = torch.tensor(value["activation"]["zero_point"]) - if tune_cfg['framework'] == "pytorch_fx": + if tune_cfg["framework"] == "pytorch_fx": # get scale and zero_point of getattr ops. - if not tune_cfg['fx_sub_module_list']: - for node_target in tune_cfg['get_attr'].keys(): - setattr(q_model, node_target, - torch.tensor(tune_cfg['get_attr'][node_target])) + if not tune_cfg["fx_sub_module_list"]: + for node_target in tune_cfg["get_attr"].keys(): + setattr(q_model, node_target, torch.tensor(tune_cfg["get_attr"][node_target])) else: _set_sub_module_scale_zeropoint(q_model, tune_cfg) def _load_int8_orchestration(model, tune_cfg, stat_dict, example_inputs, **kwargs): q_cfgs = torch.quantization.QConfig( - activation=torch.quantization.FakeQuantize.with_args( - dtype=torch.quint8, - qscheme=torch.per_tensor_affine, - reduce_range=tune_cfg['reduce_range']), - weight=torch.quantization.default_weight_fake_quant) - if tune_cfg['framework'] == 'pytorch_fx': - from torch.quantization.quantize_fx import prepare_qat_fx, convert_fx - quantized_ops = {op[0]: q_cfgs for op in tune_cfg['quantizable_ops']} + activation=torch.quantization.FakeQuantize.with_args( + dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=tune_cfg["reduce_range"] + ), + weight=torch.quantization.default_weight_fake_quant, + ) + if tune_cfg["framework"] == "pytorch_fx": + from torch.quantization.quantize_fx import convert_fx, prepare_qat_fx + + quantized_ops = {op[0]: q_cfgs for op in tune_cfg["quantizable_ops"]} version = get_torch_version() if version.release < Version("1.11.0").release: quantized_ops["default_qconfig"] = None else: from torch.ao.quantization import default_embedding_qat_qconfig - for op in tune_cfg['quantizable_ops']: - if op[1] in ['Embedding', 'EmbeddingBag']: + + for op in tune_cfg["quantizable_ops"]: + if op[1] in ["Embedding", "EmbeddingBag"]: quantized_ops[op[0]] = default_embedding_qat_qconfig - fx_op_cfgs = _cfgs_to_fx_cfgs(quantized_ops, 'quant_aware_training') + fx_op_cfgs = _cfgs_to_fx_cfgs(quantized_ops, "quant_aware_training") model.train() - if tune_cfg['sub_module_list'] is None: + if tune_cfg["sub_module_list"] is None: # pylint: disable=E1123 if version.release >= Version("1.13.0").release: # pragma: no cover model = prepare_qat_fx( model, fx_op_cfgs, example_inputs=example_inputs, - prepare_custom_config=kwargs.get('prepare_custom_config_dict', None) - if kwargs is not None else None) + prepare_custom_config=kwargs.get("prepare_custom_config_dict", None) + if kwargs is not None + else None, + ) model = convert_fx( model, - convert_custom_config=kwargs.get('convert_custom_config_dict', None) - if kwargs is not None else None) + convert_custom_config=kwargs.get("convert_custom_config_dict", None) + if kwargs is not None + else None, + ) else: model = prepare_qat_fx( model, fx_op_cfgs, - prepare_custom_config_dict=kwargs.get('prepare_custom_config_dict', None) - if kwargs is not None else None) + prepare_custom_config_dict=kwargs.get("prepare_custom_config_dict", None) + if kwargs is not None + else None, + ) model = convert_fx( model, - convert_custom_config_dict=kwargs.get('convert_custom_config_dict', None) - if kwargs is not None else None) + convert_custom_config_dict=kwargs.get("convert_custom_config_dict", None) + if kwargs is not None + else None, + ) else: - logger.info('Fx trace of the entire model failed. ' + \ - 'We will conduct auto quantization') - PyTorch_FXAdaptor.prepare_sub_graph(tune_cfg['sub_module_list'], fx_op_cfgs, \ - model, prefix='', is_qat=True) - PyTorch_FXAdaptor.convert_sub_graph(tune_cfg['sub_module_list'], \ - model, prefix='') + logger.info("Fx trace of the entire model failed. " + "We will conduct auto quantization") + PyTorch_FXAdaptor.prepare_sub_graph(tune_cfg["sub_module_list"], fx_op_cfgs, model, prefix="", is_qat=True) + PyTorch_FXAdaptor.convert_sub_graph(tune_cfg["sub_module_list"], model, prefix="") else: model.training = True model.qconfig = q_cfgs @@ -188,34 +201,33 @@ def load_weight_only(checkpoint_dir, model): """Load model in weight_only mode. Args: - checkpoint_dir (dir/file/dict): The folder of checkpoint. 'qconfig.json' and - 'best_model.pt' are needed in This directory. - 'checkpoint' dir is under workspace folder and + checkpoint_dir (dir/file/dict): The folder of checkpoint. 'qconfig.json' and + 'best_model.pt' are needed in This directory. + 'checkpoint' dir is under workspace folder and workspace folder is define in configure yaml file. model (object): fp32 model need to do quantization. Returns: (object): quantized model """ - import neural_compressor # for eval(config['module_type']) + import neural_compressor # for eval(config['module_type']) from neural_compressor.adaptor.torch_utils.model_wrapper import MulLinear - weights_file = os.path.join(os.path.abspath(os.path.expanduser(checkpoint_dir)), - 'best_model.pt') + + weights_file = os.path.join(os.path.abspath(os.path.expanduser(checkpoint_dir)), "best_model.pt") # for weight only quantized model. - weights_only_config_file = os.path.join( - os.path.abspath(os.path.expanduser(checkpoint_dir)),'qconfig.json') - with open(weights_only_config_file, 'r') as f: + weights_only_config_file = os.path.join(os.path.abspath(os.path.expanduser(checkpoint_dir)), "qconfig.json") + with open(weights_only_config_file, "r") as f: weight_only_config = json.load(f) for op_name, config in weight_only_config.items(): - if config['dtype'] == 'fp32': + if config["dtype"] == "fp32": continue - if eval(config['module_type']) == MulLinear: + if eval(config["module_type"]) == MulLinear: # op should be repleced by MulLinear module = util.fetch_module(model, op_name) new_module = MulLinear(module) util.set_module(model, op_name, new_module) model.load_state_dict(torch.load(weights_file)) - logger.info('Load weight_only quantized model') + logger.info("Load weight_only quantized model") return model @@ -223,9 +235,9 @@ def load(checkpoint_dir=None, model=None, history_cfg=None, **kwargs): """Execute the quantize process on the specified model. Args: - checkpoint_dir (dir/file/dict): The folder of checkpoint. 'best_configure.yaml' and - 'best_model_weights.pt' are needed in This directory. - 'checkpoint' dir is under workspace folder and + checkpoint_dir (dir/file/dict): The folder of checkpoint. 'best_configure.yaml' and + 'best_model_weights.pt' are needed in This directory. + 'checkpoint' dir is under workspace folder and workspace folder is define in configure yaml file. model (object): fp32 model need to do quantization. history_cfg (object): configurations from history.snapshot file. @@ -234,7 +246,7 @@ def load(checkpoint_dir=None, model=None, history_cfg=None, **kwargs): Returns: (object): quantized model """ - weigth_only = kwargs.get('weight_only', False) + weigth_only = kwargs.get("weight_only", False) if weigth_only: return load_weight_only(checkpoint_dir, model) if checkpoint_dir is not None: @@ -249,34 +261,34 @@ def load(checkpoint_dir=None, model=None, history_cfg=None, **kwargs): stat_dict = torch.load(weights_file) elif os.path.isdir(checkpoint_dir): try: - weights_file = os.path.join(os.path.abspath(os.path.expanduser(checkpoint_dir)), - 'best_model.pt') + weights_file = os.path.join(os.path.abspath(os.path.expanduser(checkpoint_dir)), "best_model.pt") try: stat_dict = torch.jit.load(weights_file) logger.info("torch.jit.load is used to recovery the int8 model quantized by INC IPEX backend") except: stat_dict = torch.load(weights_file) except: - tune_cfg_file = os.path.join(os.path.abspath(os.path.expanduser(checkpoint_dir)), - 'best_configure.yaml') - weights_file = os.path.join(os.path.abspath(os.path.expanduser(checkpoint_dir)), - 'best_model_weights.pt') + tune_cfg_file = os.path.join(os.path.abspath(os.path.expanduser(checkpoint_dir)), "best_configure.yaml") + weights_file = os.path.join( + os.path.abspath(os.path.expanduser(checkpoint_dir)), "best_model_weights.pt" + ) stat_dict = torch.load(weights_file) - with open(tune_cfg_file, 'r') as f: + with open(tune_cfg_file, "r") as f: tune_cfg = yaml.safe_load(f) - stat_dict['best_configure'] = tune_cfg + stat_dict["best_configure"] = tune_cfg else: - logger.error("Unexpected checkpoint type:{}. \ - Only file dir/path or state_dict is acceptable") + logger.error( + "Unexpected checkpoint type:{}. \ + Only file dir/path or state_dict is acceptable" + ) if not isinstance(stat_dict, torch.jit._script.RecursiveScriptModule): - assert 'best_configure' in stat_dict, \ - "No best_configure found in the model file, " \ - "please use the int8 model file generated by INC." - tune_cfg = stat_dict.pop('best_configure') + assert "best_configure" in stat_dict, ( + "No best_configure found in the model file, " "please use the int8 model file generated by INC." + ) + tune_cfg = stat_dict.pop("best_configure") else: - assert history_cfg is not None, \ - "Need chieckpoint_dir or history_cfg to rebuild int8 model" + assert history_cfg is not None, "Need chieckpoint_dir or history_cfg to rebuild int8 model" tune_cfg = history_cfg stat_dict = None @@ -289,137 +301,131 @@ def load(checkpoint_dir=None, model=None, history_cfg=None, **kwargs): logger.info("Finish load the model quantized by INC IPEX backend.") return q_model - if 'is_oneshot' in tune_cfg and tune_cfg['is_oneshot']: + if "is_oneshot" in tune_cfg and tune_cfg["is_oneshot"]: return _load_int8_orchestration(model, tune_cfg, stat_dict, example_inputs, **kwargs) model.eval() approach_quant_mode = None - if tune_cfg['approach'] == "post_training_dynamic_quant": - approach_quant_mode = 'dynamic' - elif tune_cfg['approach'] == "post_training_static_quant": - approach_quant_mode = 'static' - - recipe_cfgs = tune_cfg.get('recipe_cfgs', None) - if recipe_cfgs and recipe_cfgs.get('smooth_quant', False) \ - and not recipe_cfgs['smooth_quant_args']['folding'] \ - and approach_quant_mode != 'dynamic': - from ..adaptor.torch_utils.model_wrapper import _wrapper_sq_linear, _wrapper_qdq_linear - model = _wrapper_sq_linear(model, recipe_cfgs['smoothquant_op_info']['sq_linear']) - model = _wrapper_qdq_linear(model, recipe_cfgs['smoothquant_op_info']['qdq_linear']) + if tune_cfg["approach"] == "post_training_dynamic_quant": + approach_quant_mode = "dynamic" + elif tune_cfg["approach"] == "post_training_static_quant": + approach_quant_mode = "static" + + recipe_cfgs = tune_cfg.get("recipe_cfgs", None) + if ( + recipe_cfgs + and recipe_cfgs.get("smooth_quant", False) + and not recipe_cfgs["smooth_quant_args"]["folding"] + and approach_quant_mode != "dynamic" + ): + from ..adaptor.torch_utils.model_wrapper import _wrapper_qdq_linear, _wrapper_sq_linear + + model = _wrapper_sq_linear(model, recipe_cfgs["smoothquant_op_info"]["sq_linear"]) + model = _wrapper_qdq_linear(model, recipe_cfgs["smoothquant_op_info"]["qdq_linear"]) model.load_state_dict(stat_dict) return model - if recipe_cfgs and recipe_cfgs.get('layer_wise_quant', False) \ - and approach_quant_mode != 'dynamic': + if recipe_cfgs and recipe_cfgs.get("layer_wise_quant", False) and approach_quant_mode != "dynamic": from ..adaptor.torch_utils.model_wrapper import _wrap_lwq_layer - op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg['approach']) - fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg['approach']) - model = _wrap_lwq_layer(model, recipe_cfgs['lwq_layers'], fx_op_cfgs) + + op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg["approach"]) + fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg["approach"]) + model = _wrap_lwq_layer(model, recipe_cfgs["lwq_layers"], fx_op_cfgs) model.load_state_dict(stat_dict) return model - for _, op_cfg in tune_cfg['op'].items(): - if 'quant_mode' not in op_cfg['activation']: - op_cfg['activation']['quant_mode'] = approach_quant_mode + for _, op_cfg in tune_cfg["op"].items(): + if "quant_mode" not in op_cfg["activation"]: + op_cfg["activation"]["quant_mode"] = approach_quant_mode - if tune_cfg['approach'] != "post_training_dynamic_quant": - if version.release < Version("1.7.0").release: # pragma: no cover + if tune_cfg["approach"] != "post_training_dynamic_quant": + if version.release < Version("1.7.0").release: # pragma: no cover q_mapping = tq.default_mappings.DEFAULT_MODULE_MAPPING - elif version.release < Version("1.8.0").release: # pragma: no cover - q_mapping = \ - tq.quantization_mappings.get_static_quant_module_mappings() + elif version.release < Version("1.8.0").release: # pragma: no cover + q_mapping = tq.quantization_mappings.get_static_quant_module_mappings() else: - q_mapping = \ - tq.quantization_mappings.get_default_static_quant_module_mappings() + q_mapping = tq.quantization_mappings.get_default_static_quant_module_mappings() else: - if version.release < Version("1.7.0").release: # pragma: no cover - q_mapping = \ - tq.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING - elif version.release < Version("1.8.0").release: # pragma: no cover - q_mapping = \ - tq.quantization_mappings.get_dynamic_quant_module_mappings() + if version.release < Version("1.7.0").release: # pragma: no cover + q_mapping = tq.default_mappings.DEFAULT_DYNAMIC_MODULE_MAPPING + elif version.release < Version("1.8.0").release: # pragma: no cover + q_mapping = tq.quantization_mappings.get_dynamic_quant_module_mappings() else: - q_mapping = \ - tq.quantization_mappings.get_default_dynamic_quant_module_mappings() + q_mapping = tq.quantization_mappings.get_default_dynamic_quant_module_mappings() - if tune_cfg['framework'] == "pytorch_fx": # pragma: no cover + if tune_cfg["framework"] == "pytorch_fx": # pragma: no cover # For torch.fx approach - assert version.release >= Version("1.8.0").release, \ - "Please use PyTroch 1.8 or higher version with pytorch_fx backend" - from torch.quantization.quantize_fx import prepare_fx, convert_fx, prepare_qat_fx + assert ( + version.release >= Version("1.8.0").release + ), "Please use PyTroch 1.8 or higher version with pytorch_fx backend" + from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx + if kwargs is None: kwargs = {} - prepare_custom_config_dict = kwargs.get( - 'prepare_custom_config_dict', None) - convert_custom_config_dict = kwargs.get( - 'convert_custom_config_dict', None) - - op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg['approach']) - fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg['approach']) - if not tune_cfg['fx_sub_module_list']: + prepare_custom_config_dict = kwargs.get("prepare_custom_config_dict", None) + convert_custom_config_dict = kwargs.get("convert_custom_config_dict", None) + + op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg["approach"]) + fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, tune_cfg["approach"]) + if not tune_cfg["fx_sub_module_list"]: tmp_model = model - if tune_cfg['approach'] == "quant_aware_training": + if tune_cfg["approach"] == "quant_aware_training": model.train() if version.release > Version("1.12.1").release: # pragma: no cover # pylint: disable=E1123 - model = prepare_qat_fx(model, - fx_op_cfgs, - prepare_custom_config=prepare_custom_config_dict, - example_inputs=example_inputs) + model = prepare_qat_fx( + model, + fx_op_cfgs, + prepare_custom_config=prepare_custom_config_dict, + example_inputs=example_inputs, + ) else: - model = prepare_qat_fx(model, - fx_op_cfgs, - prepare_custom_config_dict=prepare_custom_config_dict) + model = prepare_qat_fx(model, fx_op_cfgs, prepare_custom_config_dict=prepare_custom_config_dict) else: if version.release > Version("1.12.1").release: # pragma: no cover # pylint: disable=E1123 - model = prepare_fx(model, - fx_op_cfgs, - prepare_custom_config=prepare_custom_config_dict, - example_inputs=example_inputs) + model = prepare_fx( + model, + fx_op_cfgs, + prepare_custom_config=prepare_custom_config_dict, + example_inputs=example_inputs, + ) else: - model = prepare_fx(model, - fx_op_cfgs, - prepare_custom_config_dict=prepare_custom_config_dict) + model = prepare_fx(model, fx_op_cfgs, prepare_custom_config_dict=prepare_custom_config_dict) if version.release > Version("1.12.1").release: # pragma: no cover # pylint: disable=E1123 - model = convert_fx(model, - convert_custom_config=convert_custom_config_dict) + model = convert_fx(model, convert_custom_config=convert_custom_config_dict) else: - model = convert_fx(model, - convert_custom_config_dict=convert_custom_config_dict) + model = convert_fx(model, convert_custom_config_dict=convert_custom_config_dict) util.append_attr(model, tmp_model) del tmp_model else: - sub_module_list = tune_cfg['fx_sub_module_list'] - if tune_cfg['approach'] == "quant_aware_training": + sub_module_list = tune_cfg["fx_sub_module_list"] + if tune_cfg["approach"] == "quant_aware_training": model.train() - PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, - fx_op_cfgs, - model, - prefix='', - is_qat=True, - example_inputs=example_inputs) + PyTorch_FXAdaptor.prepare_sub_graph( + sub_module_list, fx_op_cfgs, model, prefix="", is_qat=True, example_inputs=example_inputs + ) else: - PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, - fx_op_cfgs, - model, - prefix='', - example_inputs=example_inputs) - PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, model, prefix='') + PyTorch_FXAdaptor.prepare_sub_graph( + sub_module_list, fx_op_cfgs, model, prefix="", example_inputs=example_inputs + ) + PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, model, prefix="") else: - if tune_cfg['approach'] == "post_training_dynamic_quant": - op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg['approach']) + if tune_cfg["approach"] == "post_training_dynamic_quant": + op_cfgs = _cfg_to_qconfig(tune_cfg, tune_cfg["approach"]) else: op_cfgs = _cfg_to_qconfig(tune_cfg) - _propagate_qconfig(model, op_cfgs, approach=tune_cfg['approach']) + _propagate_qconfig(model, op_cfgs, approach=tune_cfg["approach"]) # sanity check common API misusage - if not any(hasattr(m, 'qconfig') and m.qconfig for m in model.modules()): - logger.warn("None of the submodule got qconfig applied. Make sure you " - "passed correct configuration through `qconfig_dict` or " - "by assigning the `.qconfig` attribute directly on submodules") - if tune_cfg['approach'] != "post_training_dynamic_quant": + if not any(hasattr(m, "qconfig") and m.qconfig for m in model.modules()): + logger.warn( + "None of the submodule got qconfig applied. Make sure you " + "passed correct configuration through `qconfig_dict` or " + "by assigning the `.qconfig` attribute directly on submodules" + ) + if tune_cfg["approach"] != "post_training_dynamic_quant": if version.release < Version("2.0.0").release: from torch.quantization.quantize import add_observer_ else: @@ -427,11 +433,12 @@ def load(checkpoint_dir=None, model=None, history_cfg=None, **kwargs): add_observer_(model) model = convert(model, mapping=q_mapping, inplace=True) - bf16_ops_list = tune_cfg['bf16_ops_list'] if 'bf16_ops_list' in tune_cfg.keys() else [] + bf16_ops_list = tune_cfg["bf16_ops_list"] if "bf16_ops_list" in tune_cfg.keys() else [] if len(bf16_ops_list) > 0 and (version >= Version("1.11.0-rc1")): from ..adaptor.torch_utils.bf16_convert import Convert + model = Convert(model, tune_cfg) - if not is_int8_model(model): # pragma: no cover + if not is_int8_model(model): # pragma: no cover logger.warning("The loaded model is not a int8 model.") if checkpoint_dir is None and history_cfg is not None: _set_activation_scale_zeropoint(model, history_cfg) @@ -441,7 +448,6 @@ def load(checkpoint_dir=None, model=None, history_cfg=None, **kwargs): except: # set strict=False to avoid loading linked tensors, ignore missing_keys. mismatch_log = model.load_state_dict(stat_dict, strict=False) - assert len(mismatch_log.unexpected_keys) == 0, \ - "Loading state_dict failed: {}".format(mismatch_log) + assert len(mismatch_log.unexpected_keys) == 0, "Loading state_dict failed: {}".format(mismatch_log) util.get_embedding_contiguous(model) return model diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index fd82bb7f0fe..72a67aeb4a0 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -14,8 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""quantization auto-tuning config system. +"""Quantization auto-tuning config system. This file specifies default config options for quantization auto-tuning tool. User should not change values in this file. Instead, user should write a config @@ -38,7 +37,7 @@ from enum import Enum from functools import wraps from tempfile import NamedTemporaryFile -from typing import Dict, List, Any, Optional +from typing import Any, Dict, List, Optional import cpuinfo import numpy as np @@ -49,14 +48,14 @@ from neural_compressor.utils import logger required_libs = { - 'tensorflow': ['tensorflow'], - 'pytorch': ['torch'], - 'pytorch_fx': ['torch'], - 'pytorch_ipex': ['torch', 'intel_extension_for_pytorch'], - 'onnxrt_qlinearops': ['onnx', 'onnxruntime'], - 'onnxrt_integerops': ['onnx', 'onnxruntime'], - 'onnxruntime': ['onnx', 'onnxruntime'], - 'mxnet': ['mxnet'], + "tensorflow": ["tensorflow"], + "pytorch": ["torch"], + "pytorch_fx": ["torch"], + "pytorch_ipex": ["torch", "intel_extension_for_pytorch"], + "onnxrt_qlinearops": ["onnx", "onnxruntime"], + "onnxrt_integerops": ["onnx", "onnxruntime"], + "onnxruntime": ["onnx", "onnxruntime"], + "mxnet": ["mxnet"], } @@ -103,15 +102,15 @@ def __getattr__(self, name): self.module = importlib.import_module(self.module_name) mod = getattr(self.module, name) except: - spec = importlib.util.find_spec(str(self.module_name + '.' + name)) + spec = importlib.util.find_spec(str(self.module_name + "." + name)) mod = importlib.util.module_from_spec(spec) spec.loader.exec_module(mod) return mod def __call__(self, *args, **kwargs): """Call the function in that module.""" - function_name = self.module_name.split('.')[-1] - module_name = self.module_name.split(f'.{function_name}')[0] + function_name = self.module_name.split(".")[-1] + module_name = self.module_name.split(f".{function_name}")[0] self.module = importlib.import_module(module_name) function = getattr(self.module, function_name) return function(*args, **kwargs) @@ -129,6 +128,7 @@ def _singleton(*args, **kw): if cls not in instances: instances[cls] = cls(*args, **kw) return instances[cls] + return _singleton @@ -136,7 +136,7 @@ def _singleton(*args, **kw): def time_limit(seconds): """Limit the time for context execution.""" if seconds == 0: - #seconds = threading.TIMEOUT_MAX + # seconds = threading.TIMEOUT_MAX # TODO WA for fixed the crash for py 3.11.3 seconds = 3600 * 24 * 365 timer = threading.Timer(seconds, lambda: _thread.interrupt_main()) @@ -147,6 +147,7 @@ def time_limit(seconds): # if the action ends in specified time, timer is canceled timer.cancel() + def get_size(obj, seen=None): """Recursively finds size of objects.""" tf = LazyImport("tensorflow") @@ -162,8 +163,7 @@ def get_size(obj, seen=None): # for Tensorflow case if isinstance(obj, tf.Graph): _graph_def = obj.as_graph_def() - _graph_node = _graph_def.node if isinstance(_graph_def, tf.compat.v1.GraphDef) \ - else _graph_def.graph_def.node + _graph_node = _graph_def.node if isinstance(_graph_def, tf.compat.v1.GraphDef) else _graph_def.graph_def.node for node in _graph_node: if node.op == "Const": input_tensor = node.attr["value"].tensor @@ -203,8 +203,7 @@ def compute_sparsity(tensor): def fault_tolerant_file(name): """Make another temporary copy of the file.""" dirpath, filename = osp.split(name) - with NamedTemporaryFile(dir=os.path.abspath(os.path.expanduser(dirpath)), - delete=False, suffix='.tmp') as f: + with NamedTemporaryFile(dir=os.path.abspath(os.path.expanduser(dirpath)), delete=False, suffix=".tmp") as f: yield f f.flush() os.fsync(f) @@ -215,14 +214,14 @@ def fault_tolerant_file(name): def equal_dicts(d1, d2, compare_keys=None, ignore_keys=None): """Check whether two dicts are same except for those ignored keys.""" assert not (compare_keys and ignore_keys) - if compare_keys == None and ignore_keys == None: + if compare_keys is None and ignore_keys is None: return d1 == d2 - elif compare_keys == None and ignore_keys != None: - return {k: v for k, v in d1.items() if k not in ignore_keys} == \ - {k: v for k, v in d2.items() if k not in ignore_keys} - elif compare_keys != None and ignore_keys == None: - return {k: v for k, v in d1.items() if k in compare_keys} == \ - {k: v for k, v in d2.items() if k in compare_keys} + elif compare_keys is None and ignore_keys is not None: + return {k: v for k, v in d1.items() if k not in ignore_keys} == { + k: v for k, v in d2.items() if k not in ignore_keys + } + elif compare_keys is not None and ignore_keys is None: + return {k: v for k, v in d1.items() if k in compare_keys} == {k: v for k, v in d2.items() if k in compare_keys} else: assert False @@ -230,28 +229,24 @@ def equal_dicts(d1, d2, compare_keys=None, ignore_keys=None): @singleton class CpuInfo(object): """Get CPU Info.""" + def __init__(self): """Get whether the cpu numerical format is bf16, the number of sockets, cores and cores per socket.""" self._bf16 = False self._vnni = False info = cpuinfo.get_cpu_info() - if 'arch' in info and 'X86' in info['arch']: + if "arch" in info and "X86" in info["arch"]: cpuid = cpuinfo.CPUID() max_extension_support = cpuid.get_max_extension_support() if max_extension_support >= 7: ecx = cpuid._run_asm( - b"\x31\xC9", # xor ecx, ecx - b"\xB8\x07\x00\x00\x00" # mov eax, 7 - b"\x0f\xa2" # cpuid - b"\x89\xC8" # mov ax, cx - b"\xC3" # ret + b"\x31\xC9", # xor ecx, ecx + b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\x89\xC8" b"\xC3", # mov eax, 7 # cpuid # mov ax, cx # ret ) self._vnni = bool(ecx & (1 << 11)) eax = cpuid._run_asm( b"\xB9\x01\x00\x00\x00", # mov ecx, 1 - b"\xB8\x07\x00\x00\x00" # mov eax, 7 - b"\x0f\xa2" # cpuid - b"\xC3" # ret + b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\xC3", # mov eax, 7 # cpuid # ret ) self._bf16 = bool(eax & (1 << 5)) self._sockets = self.get_number_of_sockets() @@ -299,16 +294,20 @@ def dump_elapsed_time(customized_msg=""): Args: customized_msg (string, optional): The parameter passed to decorator. Defaults to None. """ + def f(func): def fi(*args, **kwargs): start = time.time() res = func(*args, **kwargs) end = time.time() - logging.getLogger("neural_compressor").info('%s elapsed time: %s ms' % - (customized_msg if customized_msg else func.__qualname__, - round((end - start) * 1000, 2))) + logging.getLogger("neural_compressor").info( + "%s elapsed time: %s ms" + % (customized_msg if customized_msg else func.__qualname__, round((end - start) * 1000, 2)) + ) return res + return fi + return f @@ -319,23 +318,17 @@ def combine_histogram(old_hist, arr): new_th = max(abs(new_min), abs(new_max)) (old_hist, old_hist_edges, old_min, old_max, old_th) = old_hist if new_th <= old_th: - hist, _ = np.histogram(arr, - bins=len(old_hist), - range=(-old_th, old_th)) - return (old_hist + hist, old_hist_edges, min(old_min, new_min), - max(old_max, new_max), old_th) + hist, _ = np.histogram(arr, bins=len(old_hist), range=(-old_th, old_th)) + return (old_hist + hist, old_hist_edges, min(old_min, new_min), max(old_max, new_max), old_th) else: old_num_bins = len(old_hist) old_step = 2 * old_th / old_num_bins half_increased_bins = int((new_th - old_th) // old_step + 1) new_num_bins = half_increased_bins * 2 + old_num_bins new_th = half_increased_bins * old_step + old_th - hist, hist_edges = np.histogram(arr, - bins=new_num_bins, - range=(-new_th, new_th)) - hist[half_increased_bins:new_num_bins - half_increased_bins] += old_hist - return (hist, hist_edges, min(old_min, new_min), max(old_max, - new_max), new_th) + hist, hist_edges = np.histogram(arr, bins=new_num_bins, range=(-new_th, new_th)) + hist[half_increased_bins : new_num_bins - half_increased_bins] += old_hist + return (hist, hist_edges, min(old_min, new_min), max(old_max, new_max), new_th) def get_tensor_histogram(tensor_data, bins=2048): @@ -349,7 +342,7 @@ def get_tensor_histogram(tensor_data, bins=2048): def get_all_fp32_data(data): """Get all the fp32 data.""" - return [float(i) for i in data.replace('[', ' ').replace(']', ' ').split(' ') if i.strip() and len(i) < 32] + return [float(i) for i in data.replace("[", " ").replace("]", " ").split(" ") if i.strip() and len(i) < 32] def get_tuning_history(tuning_history_path): @@ -358,7 +351,7 @@ def get_tuning_history(tuning_history_path): Args: tuning_history_path: The tuning history path, which need users to assign """ - with open(tuning_history_path, 'rb') as f: + with open(tuning_history_path, "rb") as f: strategy_object = pickle.load(f) tuning_history = strategy_object.tuning_history return tuning_history @@ -373,32 +366,37 @@ def recover(fp32_model, tuning_history_path, num, **kwargs): num: tune index """ tuning_history = get_tuning_history(tuning_history_path) - target_history = tuning_history[0]['history'] - q_config = target_history[num]['q_config'] + target_history = tuning_history[0]["history"] + q_config = target_history[num]["q_config"] try: - framework = tuning_history[0]['cfg']['model']['framework'] + framework = tuning_history[0]["cfg"]["model"]["framework"] except Exception as e: - framework = tuning_history[0]['cfg'].quantization.framework + framework = tuning_history[0]["cfg"].quantization.framework - if 'pytorch' in framework: + if "pytorch" in framework: from neural_compressor.utils.pytorch import load + tune_index_qmodel = load(model=fp32_model, history_cfg=q_config, **kwargs) return tune_index_qmodel from neural_compressor.adaptor import FRAMEWORKS - adaptor = FRAMEWORKS[framework](q_config['framework_specific_info']) - if 'onnx' in framework: + + adaptor = FRAMEWORKS[framework](q_config["framework_specific_info"]) + if "onnx" in framework: from neural_compressor.model import Model + ox_fp32_model = Model(fp32_model) tune_index_qmodel = adaptor.recover(ox_fp32_model, q_config) return tune_index_qmodel - elif 'tensorflow' in framework: + elif "tensorflow" in framework: from neural_compressor.model import Model + tf_fp32_model = Model(fp32_model) tune_index_qmodel = adaptor.recover_tuned_model(tf_fp32_model, q_config) return tune_index_qmodel - elif 'mxnet' in framework: + elif "mxnet" in framework: from neural_compressor.model import Model + mx_fp32_model = Model(fp32_model) tune_index_qmodel = adaptor.recover_tuned_model(mx_fp32_model, q_config) return tune_index_qmodel @@ -406,9 +404,9 @@ def recover(fp32_model, tuning_history_path, num, **kwargs): def str2array(s): """Get the array of the string.""" - s = re.sub(r'\[ +', '[', s.strip()) - s = re.sub(r'[,\s]+', ', ', s) - s = re.sub(r'\]\[', '], [', s) + s = re.sub(r"\[ +", "[", s.strip()) + s = re.sub(r"[,\s]+", ", ", s) + s = re.sub(r"\]\[", "], [", s) return np.array(ast.literal_eval(s)) @@ -417,16 +415,20 @@ def dequantize_weight(weight_tensor, min_filter_tensor, max_filter_tensor): """Dequantize the weight with min-max filter tensors.""" weight_channel = weight_tensor.shape[-1] if len(min_filter_tensor) == 1: - weight_tensor = weight_tensor * ((max_filter_tensor[0] - min_filter_tensor[0])/ 127.0) + weight_tensor = weight_tensor * ((max_filter_tensor[0] - min_filter_tensor[0]) / 127.0) else: # TODO to calculate the de-quantized result in a parallel way for i in range(weight_channel): - weight_tensor[:,:,:,i] = weight_tensor[:,:,:,i] * ((max_filter_tensor[i] - min_filter_tensor[i])/ 127.0) + weight_tensor[:, :, :, i] = weight_tensor[:, :, :, i] * ( + (max_filter_tensor[i] - min_filter_tensor[i]) / 127.0 + ) return weight_tensor + def Dequantize(data, scale_info): """Dequantize the data with the scale_info.""" import numpy as np + original_shape = data.shape max_value = 255.0 if scale_info[0].find("Relu") != -1.0 else 127.0 _scale = (np.array(scale_info[2]) - np.array(scale_info[1])) / max_value @@ -440,10 +442,11 @@ class CaptureOutputToFile(object): Capture the output to file. """ + def __init__(self, tmp_file_path, stream=sys.stderr): """Open a temporary file.""" self.orig_stream_fileno = stream.fileno() - self.tmp_file = open(tmp_file_path, 'w') + self.tmp_file = open(tmp_file_path, "w") def __enter__(self): """Duplicate the file desciptor to the stream.""" @@ -458,8 +461,9 @@ def __exit__(self, type, value, traceback): self.tmp_file.close() -class Statistics(): +class Statistics: """The statistics printer.""" + def __init__(self, data, header, field_names, output_handle=logger.info): """Init a Statistics object. @@ -493,21 +497,23 @@ def print_stat(self): tmp_data.append(value) if any(tmp_data[1:]): self.tb.add_row(tmp_data) - lines = self.tb.get_string().split('\n') - self.output_handle('|' + self.header.center(len(lines[0]) - 2, "*") + '|') + lines = self.tb.get_string().split("\n") + self.output_handle("|" + self.header.center(len(lines[0]) - 2, "*") + "|") for i in lines: self.output_handle(i) class MODE(Enum): """Mode: Quantization, Benchmark or Pruning.""" + QUANTIZATION = 1 BENCHMARK = 2 PRUNING = 3 -class GLOBAL_STATE(): +class GLOBAL_STATE: """Access the global model.""" + STATE = MODE.QUANTIZATION @@ -520,11 +526,11 @@ def load_data_from_pkl(path, filename): """ try: file_path = os.path.join(path, filename) - with open(file_path, 'rb') as fp: + with open(file_path, "rb") as fp: data = pickle.load(fp) return data except FileExistsError: - logging.getLogger("neural_compressor").info('Can not open %s.' % path) + logging.getLogger("neural_compressor").info("Can not open %s." % path) def dump_data_to_local(data, path, filename): @@ -539,10 +545,11 @@ def dump_data_to_local(data, path, filename): loaded data """ from pathlib import Path + if not os.path.exists(path): Path(path).mkdir(parents=True, exist_ok=True) file_path = os.path.join(path, filename) - with open(file_path, 'wb') as fp: + with open(file_path, "wb") as fp: pickle.dump(data, fp) logging.getLogger("neural_compressor").info("Dumped data to %s" % file_path) @@ -550,24 +557,28 @@ def dump_data_to_local(data, path, filename): def set_random_seed(seed: int): """Set the random seed in config.""" from neural_compressor.config import options + options.random_seed = seed def set_workspace(workspace: str): """Set the workspace in config.""" from neural_compressor.config import options + options.workspace = workspace def set_resume_from(resume_from: str): """Set the resume_from in config.""" from neural_compressor.config import options + options.resume_from = resume_from def set_tensorboard(tensorboard: bool): """Set the tensorboard in config.""" from neural_compressor.config import options + options.tensorboard = tensorboard @@ -577,8 +588,8 @@ def show_memory_info(hint): p = psutil.Process(pid) info = p.memory_full_info() - memory = info.uss / 1024. / 1024 - print('{} memory used: {} MB'.format(hint, memory)) + memory = info.uss / 1024.0 / 1024 + print("{} memory used: {} MB".format(hint, memory)) def dump_class_attrs(obj, result={}): @@ -595,19 +606,18 @@ def dump_class_attrs(obj, result={}): if not attr.startswith("__"): value = getattr(obj, attr) value_class_name = value.__class__.__name__ - if 'Config' in value_class_name or 'Criterion' in value_class_name: + if "Config" in value_class_name or "Criterion" in value_class_name: dump_class_attrs(value, result=result[obj_name]) else: - attr = attr[1:] if attr.startswith('_') else attr + attr = attr[1:] if attr.startswith("_") else attr result[obj_name][attr] = value class DotDict(dict): - """access yaml using attributes instead of using the dictionary notation. + """Access yaml using attributes instead of using the dictionary notation. Args: value (dict): The dict object to access. - """ def __init__(self, value=None): @@ -622,7 +632,7 @@ def __init__(self, value=None): for key in value: self.__setitem__(key, value[key]) else: - raise TypeError('expected dict') + raise TypeError("expected dict") def __getitem__(self, key): """Get value by key. @@ -642,11 +652,9 @@ def __setitem__(self, key, value): """ if isinstance(value, dict) and not isinstance(value, DotDict): value = DotDict(value) - if isinstance(value, list) and len(value) == 1 and isinstance( - value[0], dict): + if isinstance(value, list) and len(value) == 1 and isinstance(value[0], dict): value = DotDict(value[0]) - if isinstance(value, list) and len(value) > 1 and all(isinstance( - v, dict) for v in value): + if isinstance(value, list) and len(value) > 1 and all(isinstance(v, dict) for v in value): value = DotDict({k: v for d in value for k, v in d.items()}) super(DotDict, self).__setitem__(key, value) @@ -695,6 +703,7 @@ def alias_param(param_name: str, param_alias: str): param_name: Name of param in function to alias. param_alias: Alias that can be used for this param. """ + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): @@ -704,17 +713,19 @@ def wrapper(*args, **kwargs): del kwargs[param_alias] result = func(*args, **kwargs) return result + return wrapper + return decorator def print_table( - column_mapping: Dict[str, str], - table_entries: List[Any], - output_handler=logger.info, - title: Optional[str] = None, - insert_newlines=False, - precision: Optional[int] = None, + column_mapping: Dict[str, str], + table_entries: List[Any], + output_handler=logger.info, + title: Optional[str] = None, + insert_newlines=False, + precision: Optional[int] = None, ) -> None: """Print table with prettytable. @@ -732,7 +743,9 @@ def print_table( None """ from functools import reduce + import numpy as np + table = pt.PrettyTable(min_table_width=40) if title is not None: table.title = title @@ -746,12 +759,12 @@ def print_table( value = reduce(getattr, [entry] + attribute.split(".")) if (isinstance(value, np.floating) or isinstance(value, float)) and isinstance(precision, int): if "e" in str(value): - value = f'{value:.{precision}e}' + value = f"{value:.{precision}e}" else: - value = round(value, precision) + value = round(value, precision) table_row.append(value) table.add_row(table_row) - lines = table.get_string().split('\n') + lines = table.get_string().split("\n") for i in lines: if insert_newlines: i += "\n" @@ -798,16 +811,14 @@ def get_weights_details(workload_location: str) -> list: list of WeightDetails objects """ from neural_compressor.utils.weights_details import WeightsDetails + weights_details = [] input_model_tensors: dict = get_tensors_info(workload_location, model_type="input")["weight"] - optimized_model_tensors: dict = get_tensors_info(workload_location, model_type="optimized")[ - "weight" - ] + optimized_model_tensors: dict = get_tensors_info(workload_location, model_type="optimized")["weight"] common_ops = list(set(input_model_tensors.keys()) & set(optimized_model_tensors.keys())) for op_name in common_ops: - input_model_op_tensors = input_model_tensors[op_name] optimized_model_op_tensors = optimized_model_tensors[op_name] @@ -827,10 +838,10 @@ def get_weights_details(workload_location: str) -> list: def dump_table( - filepath: str, - column_mapping: Dict[str, str], - table_entries: List[Any], - file_type: str = "csv", + filepath: str, + column_mapping: Dict[str, str], + table_entries: List[Any], + file_type: str = "csv", ) -> None: """Dump table data to file. @@ -953,11 +964,15 @@ def print_op_list(workload_location: str): input_model_tensors = get_tensors_info( workload_location, model_type="input", - )["activation"][0] + )[ + "activation" + ][0] optimized_model_tensors = get_tensors_info( workload_location, model_type="optimized", - )["activation"][0] + )[ + "activation" + ][0] op_list = get_op_list(minmax_file_path, input_model_tensors, optimized_model_tensors) sorted_op_list = sorted(op_list, key=lambda x: x.mse, reverse=True) if len(op_list) <= 0: diff --git a/neural_compressor/utils/weights_details.py b/neural_compressor/utils/weights_details.py index d78ae0876fb..3f2a7db5cb2 100644 --- a/neural_compressor/utils/weights_details.py +++ b/neural_compressor/utils/weights_details.py @@ -21,6 +21,7 @@ PRECISION = 5 + class WeightsDetails: """Weights details class.""" diff --git a/neural_compressor/version.py b/neural_compressor/version.py index c107ed24647..b4408882b7d 100644 --- a/neural_compressor/version.py +++ b/neural_compressor/version.py @@ -14,6 +14,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques.""" __version__ = "2.3" diff --git a/neural_insights/README.md b/neural_insights/README.md index 5e6bd9eed78..673d886ecfb 100644 --- a/neural_insights/README.md +++ b/neural_insights/README.md @@ -1,121 +1,122 @@ -
    - -Neural Insights -=========================== -
    - -Neural Insights is a web application for easier use of Intel® Neural Compressor [diagnosis](/docs/source/diagnosis.md) feature. -It provides the capability to show the model graph, histograms of weights and activations, quantization configs, etc. -The workflow shows the relationship of Neural Insights and diagnosis. -![workflow](/docs/source/imgs/workflow.jpg) - -## Installation - -Installation of Neural Insights is possible in one of following ways: - -### Install from pypi -```Shell -pip install neural-insights -``` - -### Install from Source - - ```Shell - # Install Neural Compressor - git clone https://github.com/intel/neural-compressor.git - cd neural-compressor - pip install -r requirements.txt - python setup.py install - - # Install Neural Insights - pip install -r neural_insights/requirements.txt - python setup.py install neural_insights - ``` - - -## Getting Started - -### Start the Neural Insights - -To start the Neural Insights server execute `neural_insights` command: - -```shell -neural_insights -``` -The server generates a self-signed TLS certificate and prints instruction how to access the Web UI. - -```text -Neural Insights Server started. - -Open address https://10.11.12.13:5000/?token=338174d13706855fc6924cec7b3a8ae8 - -``` - -Server generated certificate is not trusted by your web browser, you will need to accept usage of such certificate. - - -You might also use additional parameters and settings: -* Neural Insights listens on port 5000. -Make sure that port 5000 is accessible to your browser (you might need to open it in your firewall), -or specify different port that is already opened, for example 8080: - ```shell - neural_insights -p 8080 - ``` - - -* To start the Neural Insights server with your own TLS certificate add `--cert` and `--key` parameters: - - ```shell - neural_insights --cert path_to_cert.crt --key path_to_private_key.key - ``` - -* To start the Neural Insights server without TLS encryption use `--allow-insecure-connections` parameter: - - ```shell - neural_insights --allow-insecure-connections - ``` - - This enables access to the server from any machine in your local network (or the whole Internet if your server is exposed to it). - - You are forfeiting security, confidentiality and integrity of all client-server communication. Your server is exposed to external threats. -### Quantization with Python API - -```shell -# Install Intel Neural Compressor and TensorFlow -pip install neural-compressor -pip install neural-insights -pip install tensorflow -# Prepare fp32 model -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb -``` - -```python -from neural_compressor import Metric -from neural_compressor.config import PostTrainingQuantConfig -from neural_compressor.data import DataLoader -from neural_compressor.data import Datasets - -top1 = Metric(name="topk", k=1) -dataset = Datasets('tensorflow')['dummy'](shape=(1, 224, 224, 3)) -dataloader = DataLoader(framework='tensorflow', dataset=dataset) - -from neural_compressor.quantization import fit -q_model = fit( - model="./mobilenet_v1_1.0_224_frozen.pb", - conf=PostTrainingQuantConfig(diagnosis=True), - calib_dataloader=dataloader, - eval_dataloader=dataloader, - eval_metric=top1 -) -``` - -When the quantization is started, the workload should appear on the Neural Insights page and successively, new information should be available while quantization is in progress (such as weights distribution and accuracy data). - -> Note that above example uses dummy data which is used to describe usage of Neural Insights. For diagnosis purposes you should use real dataset specific for your use case. - -## Step by Step Diagnosis Example -Refer to [Step by Step Diagnosis Example with TensorFlow](https://github.com/intel/neural-compressor/tree/master/neural_insights/docs/source/tf_accuracy_debug.md) and [Step by Step Diagnosis Example with ONNXRT](https://github.com/intel/neural-compressor/tree/master/neural_insights/docs/source/onnx_accuracy_debug.md) to get started with some basic quantization accuracy diagnostic skills. - -## Research Collaborations - -Welcome to raise any interesting research ideas on model compression techniques and feel free to reach us (inc.maintainers@intel.com). Look forward to our collaborations on Neural Insights! +
    + +Neural Insights +=========================== +
    + +Neural Insights is a web application for easier use of Intel® Neural Compressor [diagnosis](/docs/source/diagnosis.md) feature. +It provides the capability to show the model graph, histograms of weights and activations, quantization configs, etc. +The workflow shows the relationship of Neural Insights and diagnosis. +![workflow](/docs/source/imgs/workflow.jpg) + +## Installation + +Installation of Neural Insights is possible in one of following ways: + +### Install from pypi +```Shell +pip install neural-insights +``` + +### Install from Source + + ```Shell + # Install Neural Compressor + git clone https://github.com/intel/neural-compressor.git + cd neural-compressor + pip install -r requirements.txt + python setup.py install + + # Install Neural Insights + pip install -r neural_insights/requirements.txt + python setup.py install neural_insights + ``` + + +## Getting Started + +### Start the Neural Insights + +To start the Neural Insights server execute `neural_insights` command: + +```shell +neural_insights +``` +The server generates a self-signed TLS certificate and prints instruction how to access the Web UI. + +```text +Neural Insights Server started. + +Open address https://10.11.12.13:5000/?token=338174d13706855fc6924cec7b3a8ae8 + +``` + +Server generated certificate is not trusted by your web browser, you will need to accept usage of such certificate. + + +You might also use additional parameters and settings: +* Neural Insights listens on port 5000. +Make sure that port 5000 is accessible to your browser (you might need to open it in your firewall), +or specify different port that is already opened, for example 8080: + ```shell + neural_insights -p 8080 + ``` + + +* To start the Neural Insights server with your own TLS certificate add `--cert` and `--key` parameters: + + ```shell + neural_insights --cert path_to_cert.crt --key path_to_private_key.key + ``` + +* To start the Neural Insights server without TLS encryption use `--allow-insecure-connections` parameter: + + ```shell + neural_insights --allow-insecure-connections + ``` + + This enables access to the server from any machine in your local network (or the whole Internet if your server is exposed to it). + + You are forfeiting security, confidentiality and integrity of all client-server communication. Your server is exposed to external threats. +### Quantization with Python API + +```shell +# Install Intel Neural Compressor and TensorFlow +pip install neural-compressor +pip install neural-insights +pip install tensorflow +# Prepare fp32 model +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb +``` + +```python +from neural_compressor import Metric +from neural_compressor.config import PostTrainingQuantConfig +from neural_compressor.data import DataLoader +from neural_compressor.data import Datasets + +top1 = Metric(name="topk", k=1) +dataset = Datasets("tensorflow")["dummy"](shape=(1, 224, 224, 3)) +dataloader = DataLoader(framework="tensorflow", dataset=dataset) + +from neural_compressor.quantization import fit + +q_model = fit( + model="./mobilenet_v1_1.0_224_frozen.pb", + conf=PostTrainingQuantConfig(diagnosis=True), + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_metric=top1, +) +``` + +When the quantization is started, the workload should appear on the Neural Insights page and successively, new information should be available while quantization is in progress (such as weights distribution and accuracy data). + +> Note that above example uses dummy data which is used to describe usage of Neural Insights. For diagnosis purposes you should use real dataset specific for your use case. + +## Step by Step Diagnosis Example +Refer to [Step by Step Diagnosis Example with TensorFlow](https://github.com/intel/neural-compressor/tree/master/neural_insights/docs/source/tf_accuracy_debug.md) and [Step by Step Diagnosis Example with ONNXRT](https://github.com/intel/neural-compressor/tree/master/neural_insights/docs/source/onnx_accuracy_debug.md) to get started with some basic quantization accuracy diagnostic skills. + +## Research Collaborations + +Welcome to raise any interesting research ideas on model compression techniques and feel free to reach us (inc.maintainers@intel.com). Look forward to our collaborations on Neural Insights! diff --git a/neural_insights/bin/neural_insights.py b/neural_insights/bin/neural_insights.py index c2c3e5a55a4..388038027cf 100644 --- a/neural_insights/bin/neural_insights.py +++ b/neural_insights/bin/neural_insights.py @@ -13,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """WSGI Web Server.""" import gevent.monkey @@ -22,6 +21,7 @@ def execute(): from neural_insights.main import main + main() diff --git a/neural_insights/components/diagnosis/diagnosis.py b/neural_insights/components/diagnosis/diagnosis.py index a3d47c5871a..db2eae4c5e5 100644 --- a/neural_insights/components/diagnosis/diagnosis.py +++ b/neural_insights/components/diagnosis/diagnosis.py @@ -89,11 +89,8 @@ def get_op_list(self) -> List[dict]: op_list: List[dict] = [] input_model_tensors: dict = self.get_tensors_info(model_type="input")["activation"][0] - optimized_model_tensors: dict = self.get_tensors_info(model_type="optimized")[ - "activation" - ][0] + optimized_model_tensors: dict = self.get_tensors_info(model_type="optimized")["activation"][0] for op_name, min_max in min_max_data.items(): - mse = self.calculate_mse(op_name, input_model_tensors, optimized_model_tensors) if mse is None or np.isnan(mse): continue @@ -116,15 +113,12 @@ def get_weights_details(self, inspect_type: str) -> List[WeightsDetails]: min_max_data: dict = pickle.load(min_max_file) input_model_tensors: dict = self.get_tensors_info(model_type="input")[inspect_type] - optimized_model_tensors: dict = self.get_tensors_info(model_type="optimized")[ - inspect_type - ] + optimized_model_tensors: dict = self.get_tensors_info(model_type="optimized")[inspect_type] if inspect_type == "activation": input_model_tensors = input_model_tensors[0] optimized_model_tensors = optimized_model_tensors[0] common_ops = list(set(input_model_tensors.keys()) & set(optimized_model_tensors.keys())) for op_name in common_ops: - input_model_op_tensors = input_model_tensors[op_name] optimized_model_op_tensors = optimized_model_tensors[op_name] @@ -132,8 +126,9 @@ def get_weights_details(self, inspect_type: str) -> List[WeightsDetails]: continue if isinstance(input_model_op_tensors, dict): - for (input_op_name, input_op_values), (optimized_op_name, optimized_op_values) in\ - zip(input_model_op_tensors.items(), optimized_model_op_tensors.items()): + for (input_op_name, input_op_values), (optimized_op_name, optimized_op_values) in zip( + input_model_op_tensors.items(), optimized_model_op_tensors.items() + ): if input_op_values.ndim != 4 or optimized_op_values.ndim != 4: continue @@ -237,9 +232,7 @@ def mse_metric_gap(fp32_tensor: Any, dequantize_tensor: Any) -> float: # Normalize tensor values fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) - dequantize_tensor = (dequantize_tensor - dequantize_min) / ( - dequantize_max - dequantize_min - ) + dequantize_tensor = (dequantize_tensor - dequantize_min) / (dequantize_max - dequantize_min) diff_tensor = fp32_tensor - dequantize_tensor euclidean_dist = np.sum(diff_tensor**2) # type: ignore @@ -248,6 +241,7 @@ def mse_metric_gap(fp32_tensor: Any, dequantize_tensor: Any) -> float: def get_weights_data(self, op_name: str, channel_normalization=True) -> list: """Get weights data for optimized model.""" from PIL import Image + check_module("numpy") import numpy as np diff --git a/neural_insights/components/diagnosis/factory.py b/neural_insights/components/diagnosis/factory.py index 353cb7534bf..6aa3baadfb6 100644 --- a/neural_insights/components/diagnosis/factory.py +++ b/neural_insights/components/diagnosis/factory.py @@ -14,13 +14,8 @@ # limitations under the License. """Diagnosis class factory.""" from neural_insights.components.diagnosis.diagnosis import Diagnosis -from neural_insights.components.diagnosis.onnx_diagnosis.onnxrt_diagnosis import ( - OnnxRtDiagnosis, -) -from neural_insights.components.diagnosis.tensorflow_diagnosis.tensorflow_diagnosis import ( - TensorflowDiagnosis, -) - +from neural_insights.components.diagnosis.onnx_diagnosis.onnxrt_diagnosis import OnnxRtDiagnosis +from neural_insights.components.diagnosis.tensorflow_diagnosis.tensorflow_diagnosis import TensorflowDiagnosis from neural_insights.components.workload_manager.workload import Workload from neural_insights.utils.consts import Frameworks from neural_insights.utils.exceptions import InternalException diff --git a/neural_insights/components/diagnosis/weights_details.py b/neural_insights/components/diagnosis/weights_details.py index 7c09c71553e..68a64662668 100644 --- a/neural_insights/components/diagnosis/weights_details.py +++ b/neural_insights/components/diagnosis/weights_details.py @@ -54,6 +54,7 @@ def __init__(self, tensor_data) -> None: """Initialize Weights details.""" check_module("numpy") import numpy as np + super().__init__() self.min: float = np.min(tensor_data) self.max: float = np.max(tensor_data) @@ -62,8 +63,8 @@ def __init__(self, tensor_data) -> None: self.var: float = np.var(tensor_data) def serialize( - self, - serialization_type: str = "default", + self, + serialization_type: str = "default", ) -> Dict[str, Any]: """Serialize Weights details.""" return { diff --git a/neural_insights/components/graph/collapser.py b/neural_insights/components/graph/collapser.py index af6f24c04f2..40d4ac2f038 100644 --- a/neural_insights/components/graph/collapser.py +++ b/neural_insights/components/graph/collapser.py @@ -74,9 +74,7 @@ def _add_edges_to_collapsed_graph(self, collapsed_graph: Graph, graph: Graph) -> # skip edges inside collapsed node continue - collapsed_edge_designation = ( - f"collapsed edge between {source_node_id} ans {target_node_id}" - ) + collapsed_edge_designation = f"collapsed edge between {source_node_id} ans {target_node_id}" is_collapsed_already_added = collapsed_edges_repository.get( collapsed_edge_designation, False, @@ -131,4 +129,4 @@ def _unprepare_group_name(self, name: str) -> str: if not name.startswith(self.GROUP_NAME_PREFIX): return name - return name[len(self.GROUP_NAME_PREFIX):] + return name[len(self.GROUP_NAME_PREFIX) :] diff --git a/neural_insights/components/graph/graph.py b/neural_insights/components/graph/graph.py index 2c470dbcc39..3d3093bb92f 100644 --- a/neural_insights/components/graph/graph.py +++ b/neural_insights/components/graph/graph.py @@ -53,8 +53,7 @@ def add_edge(self, source_id: str, target_id: str) -> bool: target = self.get_node(target_id) except NotFoundException as err: log.debug( - f"Got an error: {str(err)} while attempted " - f"to add an Edge from {source_id} to {target_id}", + f"Got an error: {str(err)} while attempted " f"to add an Edge from {source_id} to {target_id}", ) return False self._edges.append(Edge(source, target)) diff --git a/neural_insights/components/model/onnxrt/model.py b/neural_insights/components/model/onnxrt/model.py index a3e5a60fa58..a421966f501 100644 --- a/neural_insights/components/model/onnxrt/model.py +++ b/neural_insights/components/model/onnxrt/model.py @@ -43,24 +43,13 @@ def __init__(self, path: str) -> None: def domain(self) -> Domain: """Get model domain.""" try: - input_node_names = { - node.name for node in self.nc_model_instance.graph().input # pylint: disable=E1101 - } - node_names = { - node.name for node in self.nc_model_instance.nodes() # pylint: disable=E1101 - } - boundary_nodes = [ - node.name for node in self.nc_model_instance.graph().input # pylint: disable=E1101 - ] + input_node_names = {node.name for node in self.nc_model_instance.graph().input} # pylint: disable=E1101 + node_names = {node.name for node in self.nc_model_instance.nodes()} # pylint: disable=E1101 + boundary_nodes = [node.name for node in self.nc_model_instance.graph().input] # pylint: disable=E1101 boundary_nodes.extend( - [ - node.name - for node in self.nc_model_instance.graph().output # pylint: disable=E1101 - ], + [node.name for node in self.nc_model_instance.graph().output], # pylint: disable=E1101 ) - op_names = { - node.op_type for node in self.nc_model_instance.nodes() # pylint: disable=E1101 - } + op_names = {node.op_type for node in self.nc_model_instance.nodes()} # pylint: disable=E1101 except Exception: return Domain() @@ -152,9 +141,7 @@ def input_shape(self) -> Shape: for input_node in self.filtered_input_nodes: node_dict = MessageToDict(input_node) - dimensions = ( - node_dict.get("type", {}).get("tensorType", {}).get("shape", {}).get("dim", []) - ) + dimensions = node_dict.get("type", {}).get("tensorType", {}).get("shape", {}).get("dim", []) input_shape = [] for dim in dimensions: if dim.get("dimValue", None) is not None: diff --git a/neural_insights/components/model/tensorflow/utils.py b/neural_insights/components/model/tensorflow/utils.py index b70c95c1337..c0cf391a4a6 100644 --- a/neural_insights/components/model/tensorflow/utils.py +++ b/neural_insights/components/model/tensorflow/utils.py @@ -54,9 +54,7 @@ def _convert_shape_to_list( if tf_module.__version__ >= "2.0.0": shape = [item if item is not None else fix_dynamic_shape for item in _shape] else: - shape = [ - item.value if item.value is not None else fix_dynamic_shape for item in _shape - ] + shape = [item.value if item.value is not None else fix_dynamic_shape for item in _shape] # if shape dimension > 1, suppose first dimension is batch-size if isinstance(shape, list) and len(shape) > 1: return shape[1:] diff --git a/neural_insights/components/workload_manager/quantization_workload.py b/neural_insights/components/workload_manager/quantization_workload.py index da6d60007c9..63979d61a57 100644 --- a/neural_insights/components/workload_manager/quantization_workload.py +++ b/neural_insights/components/workload_manager/quantization_workload.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """The quantization_workload module for Neural Insights quantization workloads.""" -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional from neural_insights.components.workload_manager.workload import Workload from neural_insights.utils.json_serializer import JsonSerializer diff --git a/neural_insights/components/workload_manager/workload.py b/neural_insights/components/workload_manager/workload.py index 17cce059d89..9d6241355d4 100644 --- a/neural_insights/components/workload_manager/workload.py +++ b/neural_insights/components/workload_manager/workload.py @@ -16,10 +16,10 @@ import os from datetime import datetime -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional from uuid import uuid4 -from neural_insights.utils.consts import WorkloadModes, Frameworks, WorkloadStatus +from neural_insights.utils.consts import Frameworks, WorkloadModes, WorkloadStatus from neural_insights.utils.exceptions import InternalException from neural_insights.utils.json_serializer import JsonSerializer from neural_insights.utils.utils import get_framework_from_path diff --git a/neural_insights/components/workload_manager/workload_manager.py b/neural_insights/components/workload_manager/workload_manager.py index 7c887906af0..8f65f9523ce 100644 --- a/neural_insights/components/workload_manager/workload_manager.py +++ b/neural_insights/components/workload_manager/workload_manager.py @@ -20,11 +20,10 @@ from os import PathLike from typing import List, Optional -from neural_insights.components.workload_manager.quantization_workload import \ - QuantizationWorkload, AccuracyData +from neural_insights.components.workload_manager.quantization_workload import AccuracyData, QuantizationWorkload from neural_insights.components.workload_manager.workload import Workload -from neural_insights.utils.consts import WORKDIR_LOCATION, WorkloadStatus, WorkloadModes -from neural_insights.utils.exceptions import InternalException, ClientErrorException +from neural_insights.utils.consts import WORKDIR_LOCATION, WorkloadModes, WorkloadStatus +from neural_insights.utils.exceptions import ClientErrorException, InternalException from neural_insights.utils.json_serializer import JsonSerializer from neural_insights.utils.logger import log from neural_insights.utils.singleton import Singleton @@ -64,10 +63,10 @@ def update_workload_status(self, workload_uuid: str, status: str) -> None: self.dump_config() def update_workload_accuracy_data( - self, - workload_uuid: str, - baseline_accuracy: float, - optimized_accuracy: float, + self, + workload_uuid: str, + baseline_accuracy: float, + optimized_accuracy: float, ) -> None: """Update workload status.""" workload = self.get_workload(workload_uuid) @@ -87,8 +86,8 @@ def update_workload_accuracy_data( @staticmethod def validate_status_flow( - prev_status: WorkloadStatus, - new_status: WorkloadStatus, + prev_status: WorkloadStatus, + new_status: WorkloadStatus, ) -> None: """Validate status flow.""" status_indices = { @@ -101,7 +100,7 @@ def validate_status_flow( # Initialize adjacency matrix with zeros adjacency_matrix = [[0 for _ in status_indices] for _ in status_indices] - for (source, target) in status_connections: + for source, target in status_connections: adjacency_matrix[source][target] = 1 prev_status_indice = status_indices[prev_status] diff --git a/neural_insights/docs/source/onnx_accuracy_debug.md b/neural_insights/docs/source/onnx_accuracy_debug.md index 9e1e4b993cb..78ddece14d3 100644 --- a/neural_insights/docs/source/onnx_accuracy_debug.md +++ b/neural_insights/docs/source/onnx_accuracy_debug.md @@ -28,10 +28,10 @@ Generate a quantized model. ```python onnx_model = onnx.load(input_model) calib_dataset = IncDataset(eval_dataset, onnx_model) -config = PostTrainingQuantConfig(approach='static', quant_format="QOperator") -q_model = quantization.fit(onnx_model, - config, - calib_dataloader=DataLoader(framework='onnxruntime', dataset=calib_dataset)) +config = PostTrainingQuantConfig(approach="static", quant_format="QOperator") +q_model = quantization.fit( + onnx_model, config, calib_dataloader=DataLoader(framework="onnxruntime", dataset=calib_dataset) +) ``` Execute benchmark to get the F1 score of both FP32 and INT8 models and then compute the relative accuracy ratio. @@ -45,11 +45,12 @@ fp32 f1 = 0.9049, int8 f1 = 0.2989, accuracy ratio = -66.9631% In this section, the diagnosis tool is used for debugging to achieve higher INT8 model accuracy. We need to set `diagnosis` parameter to `True` as shown below. ```python -config = PostTrainingQuantConfig(approach="static", quant_format="QOperator", quant_level=1, diagnosis=True) # set 'diagnosis' to True -q_model = quantization.fit(onnx_model, - config, - eval_func=eval_func, - calib_dataloader=DataLoader(framework='onnxruntime', dataset=calib_dataset)) +config = PostTrainingQuantConfig( + approach="static", quant_format="QOperator", quant_level=1, diagnosis=True +) # set 'diagnosis' to True +q_model = quantization.fit( + onnx_model, config, eval_func=eval_func, calib_dataloader=DataLoader(framework="onnxruntime", dataset=calib_dataset) +) ``` The diagnosis tool will output `Activations summary` and `Weights summary` in terminal. @@ -57,21 +58,22 @@ For easy to check, here we reload them to .csv files as shown below. ```python import glob import pandas as pd -pd.set_option('display.max_rows',None) -pd.set_option('display.max_columns',None) + +pd.set_option("display.max_rows", None) +pd.set_option("display.max_columns", None) subfolders = glob.glob("./nc_workspace" + "/*/") subfolders.sort(key=os.path.getmtime, reverse=True) if subfolders: activations_table = os.path.join(subfolders[0], "activations_table.csv") weights_table = os.path.join(subfolders[0], "weights_table.csv") - + activations_table = pd.read_csv(activations_table) weights_table = pd.read_csv(weights_table) - + print("Activations summary") display(activations_table) - + print("\nWeights summary") display(weights_table) ``` @@ -104,17 +106,22 @@ Refer to [diagnosis.md](https://github.com/intel/neural-compressor/blob/master/d ```python from neural_compressor.utils.constant import FP32 -config = PostTrainingQuantConfig(approach="static", - quant_format="QOperator", - op_name_dict={"/layoutlmv3/encoder/layer.\d+/output/dense/MatMul":FP32, - "/layoutlmv3/encoder/layer.\d+/output/Add":FP32}) -q_model = quantization.fit(onnx_model, - config, - calib_dataloader=DataLoader(framework='onnxruntime', dataset=calib_dataset)) + +config = PostTrainingQuantConfig( + approach="static", + quant_format="QOperator", + op_name_dict={ + "/layoutlmv3/encoder/layer.\d+/output/dense/MatMul": FP32, + "/layoutlmv3/encoder/layer.\d+/output/Add": FP32, + }, +) +q_model = quantization.fit( + onnx_model, config, calib_dataloader=DataLoader(framework="onnxruntime", dataset=calib_dataset) +) q_model.save(output_model) ``` Execute benchmark on the new quantized model again and the accuracy ratio is improved to <1%. ``` fp32 f1 = 0.9049, int8 f1 = 0.8981, accuracy ratio = -0.7502% -``` \ No newline at end of file +``` diff --git a/neural_insights/docs/source/tf_accuracy_debug.md b/neural_insights/docs/source/tf_accuracy_debug.md index be2622e1745..8422aa57f11 100644 --- a/neural_insights/docs/source/tf_accuracy_debug.md +++ b/neural_insights/docs/source/tf_accuracy_debug.md @@ -103,8 +103,7 @@ As is shown in the chart, the distribution of weights often concentrates in a sm Therefore, you can disable this Op: ```python -op_name_dict = {'v0/cg/conv0/conv2d/Conv2D': { - 'activation': {'dtype': ['fp32']}}} +op_name_dict = {"v0/cg/conv0/conv2d/Conv2D": {"activation": {"dtype": ["fp32"]}}} conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100], op_name_dict=op_name_dict) ``` diff --git a/neural_insights/main.py b/neural_insights/main.py index f9e2e405909..e139cbd38fa 100644 --- a/neural_insights/main.py +++ b/neural_insights/main.py @@ -13,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """WSGI Web Server.""" import sys @@ -44,8 +43,7 @@ def main() -> None: if configuration.allow_insecure_connections: log.warning( - "Running in insecure mode.\n" - "Everyone in your network may attempt to access this server.\n", + "Running in insecure mode.\n" "Everyone in your network may attempt to access this server.\n", ) log.info(f"Open address {configuration.get_url()}") diff --git a/neural_insights/ni.py b/neural_insights/ni.py index 57f28051435..4550ad2c4a6 100644 --- a/neural_insights/ni.py +++ b/neural_insights/ni.py @@ -16,23 +16,21 @@ """Neural Insights main class.""" from os import PathLike -from neural_insights.components.workload_manager.quantization_workload import \ - QuantizationWorkload -from neural_insights.components.workload_manager.workload_manager import WorkloadManager +from neural_insights.components.workload_manager.quantization_workload import QuantizationWorkload from neural_insights.components.workload_manager.workload import Workload +from neural_insights.components.workload_manager.workload_manager import WorkloadManager from neural_insights.utils.consts import WorkloadModes class NeuralInsights: - def __init__(self, workdir_location: PathLike) -> None: self.workdir_location: PathLike = workdir_location def add_workload( - self, - workload_location: str, - model_path: str, - workload_mode: WorkloadModes, + self, + workload_location: str, + model_path: str, + workload_mode: WorkloadModes, ) -> str: """Add workload to Neural Insights.""" if workload_mode == WorkloadModes.QUANTIZATION: @@ -54,10 +52,10 @@ def update_workload_status(self, workload_uuid: str, status: str) -> None: workload_manager.update_workload_status(workload_uuid, status) def update_workload_accuracy_data( - self, - workload_uuid: str, - baseline_accuracy: float, - optimized_accuracy: float, + self, + workload_uuid: str, + baseline_accuracy: float, + optimized_accuracy: float, ) -> None: """Update accuracy data of specified workload.""" workload_manager = WorkloadManager(workdir_location=self.workdir_location) diff --git a/neural_insights/requirements.txt b/neural_insights/requirements.txt index 0c43c687ef9..9b44bc2d804 100644 --- a/neural_insights/requirements.txt +++ b/neural_insights/requirements.txt @@ -1,8 +1,8 @@ -neural_compressor>=2.2 +cryptography Flask Flask-Cors Flask-SocketIO gevent gevent-websocket -cryptography +neural_compressor>=2.2 pywin32; sys_platform != 'linux' diff --git a/neural_insights/utils/consts.py b/neural_insights/utils/consts.py index cad373a0cc6..e74fa43aea5 100644 --- a/neural_insights/utils/consts.py +++ b/neural_insights/utils/consts.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Constant values.""" import os diff --git a/neural_insights/utils/exceptions.py b/neural_insights/utils/exceptions.py index d6b056036c0..80baa1057b0 100644 --- a/neural_insights/utils/exceptions.py +++ b/neural_insights/utils/exceptions.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Domain Exceptions.""" diff --git a/neural_insights/utils/expiring_dict.py b/neural_insights/utils/expiring_dict.py index 10e07976510..69ad958715f 100644 --- a/neural_insights/utils/expiring_dict.py +++ b/neural_insights/utils/expiring_dict.py @@ -41,7 +41,7 @@ def __init__(self, initial_value: Optional[dict] = None, ttl: int = 120) -> None if initial_value is None: initial_value = {} - for (key, value) in initial_value.items(): + for key, value in initial_value.items(): self[key] = value def __setitem__(self, key: str, item: Any) -> None: diff --git a/neural_insights/utils/logger.py b/neural_insights/utils/logger.py index 076ec9dd907..578bee33ae0 100644 --- a/neural_insights/utils/logger.py +++ b/neural_insights/utils/logger.py @@ -15,7 +15,6 @@ """Neural Insights Logger module.""" import logging - log = logging.getLogger("Neural Insights") handler = logging.StreamHandler() diff --git a/neural_insights/utils/utils.py b/neural_insights/utils/utils.py index 8c372f7180d..7a8da311b6f 100644 --- a/neural_insights/utils/utils.py +++ b/neural_insights/utils/utils.py @@ -17,7 +17,7 @@ import socket from importlib.util import find_spec from pathlib import Path -from typing import Union, Optional +from typing import Optional, Union from neural_insights.utils.exceptions import ClientErrorException, NotFoundException @@ -61,7 +61,10 @@ def get_size(path: str, unit: str = "MB", add_unit: bool = False) -> Union[str, def check_module(module_name: str) -> None: - """Check if module exists. Raise exception when not found.""" + """Check if module exists. + + Raise exception when not found. + """ if module_name == "onnxrt": module_name = "onnxruntime" if module_name == "pytorch": diff --git a/neural_insights/web/communication.py b/neural_insights/web/communication.py index e6e8527e515..5d335442080 100644 --- a/neural_insights/web/communication.py +++ b/neural_insights/web/communication.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Objects to communicate between domain logic and outside layers.""" from queue import Queue diff --git a/neural_insights/web/configuration.py b/neural_insights/web/configuration.py index 16e6d2f2248..fabb92f92bf 100644 --- a/neural_insights/web/configuration.py +++ b/neural_insights/web/configuration.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Configuration module for Neural Insights server.""" import argparse @@ -23,9 +22,9 @@ import sys from typing import Dict -from neural_compressor.utils.utility import singleton from numpy.random import randint +from neural_compressor.utils.utility import singleton from neural_insights.utils.consts import WORKDIR_LOCATION from neural_insights.utils.exceptions import NotFoundException from neural_insights.utils.logger import log diff --git a/neural_insights/web/exceptions.py b/neural_insights/web/exceptions.py index e711a8fb31e..9d8340527d7 100644 --- a/neural_insights/web/exceptions.py +++ b/neural_insights/web/exceptions.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Web Exceptions.""" from neural_insights.utils.exceptions import NotFoundException diff --git a/neural_insights/web/router.py b/neural_insights/web/router.py index bb8caa138c5..e59b763afa4 100644 --- a/neural_insights/web/router.py +++ b/neural_insights/web/router.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Connector between api.py and components.""" import json import os @@ -218,8 +217,7 @@ def get_histogram(data: Dict[str, Any]) -> list: parsed_histogram_type: Optional[str] = histogram_type_map.get(histogram_type, None) if parsed_histogram_type is None: raise ClientErrorException( - f"Histogram type not supported. " - f"Use one of following: {histogram_type_map.keys()}", + f"Histogram type not supported. " f"Use one of following: {histogram_type_map.keys()}", ) histogram_data = diagnosis.get_histogram_data(op_name, parsed_histogram_type) diff --git a/neural_insights/web/server.py b/neural_insights/web/server.py index 1c701bee651..d1cc43aef54 100644 --- a/neural_insights/web/server.py +++ b/neural_insights/web/server.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Main endpoint for GUI.""" import os import time diff --git a/neural_insights/web/service/__init__.py b/neural_insights/web/service/__init__.py index e5c3ec684e0..51720d2b907 100644 --- a/neural_insights/web/service/__init__.py +++ b/neural_insights/web/service/__init__.py @@ -12,5 +12,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Web services.""" diff --git a/neural_insights/web/service/request_data_processor.py b/neural_insights/web/service/request_data_processor.py index 326f73102a3..e6cc24f9529 100644 --- a/neural_insights/web/service/request_data_processor.py +++ b/neural_insights/web/service/request_data_processor.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Request data processor.""" from typing import Any, Dict diff --git a/neural_insights/web/service/response_generator.py b/neural_insights/web/service/response_generator.py index 02c39f6d897..f782c91824b 100644 --- a/neural_insights/web/service/response_generator.py +++ b/neural_insights/web/service/response_generator.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Response generator.""" from werkzeug.wrappers import Response diff --git a/neural_solution/README.md b/neural_solution/README.md index 578c4b5143d..bb88539e4af 100644 --- a/neural_solution/README.md +++ b/neural_solution/README.md @@ -54,5 +54,3 @@ python setup.py neural_solution install # Contact Please contact us at [inc.maintainers@intel.com](mailto:inc.maintainers@intel.com) for any Neural Solution related question. - - diff --git a/neural_solution/__init__.py b/neural_solution/__init__.py index 300d6010498..67e0ac52f38 100644 --- a/neural_solution/__init__.py +++ b/neural_solution/__init__.py @@ -11,6 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution.""" from neural_solution.utils import logger diff --git a/neural_solution/backend/__init__.py b/neural_solution/backend/__init__.py index c93847d4e0b..0fea45c0091 100644 --- a/neural_solution/backend/__init__.py +++ b/neural_solution/backend/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution backend.""" from neural_solution.backend.cluster import Cluster from neural_solution.backend.result_monitor import ResultMonitor diff --git a/neural_solution/backend/cluster.py b/neural_solution/backend/cluster.py index 3c3d17e8ba2..f742bde0726 100644 --- a/neural_solution/backend/cluster.py +++ b/neural_solution/backend/cluster.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution cluster.""" import sqlite3 import threading @@ -104,7 +103,7 @@ def get_free_socket(self, num_sockets: int) -> List[str]: booked_socket_lst = [] # detect and append new resource - self.cursor.execute(f"SELECT id, name, total_sockets FROM cluster where status = 'join'") + self.cursor.execute("SELECT id, name, total_sockets FROM cluster where status = 'join'") new_node_lst = self.cursor.fetchall() for index, name, total_sockets in new_node_lst: sql = """ @@ -112,7 +111,7 @@ def get_free_socket(self, num_sockets: int) -> List[str]: SET status = ? WHERE id = ? """ - self.cursor.execute(sql, ('alive', index)) + self.cursor.execute(sql, ("alive", index)) self.conn.commit() self.socket_queue += [str(index) + " " + name] * total_sockets logger.info(f"[Cluster] add new node-id {index} to socket_queue: {self.socket_queue}") @@ -166,7 +165,12 @@ def initial_cluster_from_node_lst(self, node_lst): self.cursor.execute( r"insert into cluster(name, node_info, status, free_sockets, busy_sockets, total_sockets)" + "values ('{}', '{}', '{}', {}, {}, {})".format( - node.name, repr(node).replace("Node", f"Node{index+1}"), "alive", node.num_sockets, 0, node.num_sockets + node.name, + repr(node).replace("Node", f"Node{index+1}"), + "alive", + node.num_sockets, + 0, + node.num_sockets, ) ) diff --git a/neural_solution/backend/result_monitor.py b/neural_solution/backend/result_monitor.py index 0972889f890..fb99a34f409 100644 --- a/neural_solution/backend/result_monitor.py +++ b/neural_solution/backend/result_monitor.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution result monitor.""" import socket diff --git a/neural_solution/backend/runner.py b/neural_solution/backend/runner.py index 8162b7b524f..ebf013137c8 100644 --- a/neural_solution/backend/runner.py +++ b/neural_solution/backend/runner.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Main backend runner.""" import argparse import threading diff --git a/neural_solution/backend/scheduler.py b/neural_solution/backend/scheduler.py index 1a494122cbf..15eb3b05f98 100644 --- a/neural_solution/backend/scheduler.py +++ b/neural_solution/backend/scheduler.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution scheduler.""" import glob import json @@ -89,7 +88,7 @@ def prepare_env(self, task: Task): if requirement == [""]: return env_prefix # Construct the command to list all the conda environments - cmd = f"conda env list" + cmd = "conda env list" output = subprocess.getoutput(cmd) # Parse the output to get a list of conda environment names env_list = [line.strip().split()[0] for line in output.splitlines()[2:]] diff --git a/neural_solution/backend/task.py b/neural_solution/backend/task.py index 00a6ec22a9a..669f1e4a99b 100644 --- a/neural_solution/backend/task.py +++ b/neural_solution/backend/task.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution task.""" diff --git a/neural_solution/backend/task_db.py b/neural_solution/backend/task_db.py index 35708d63264..08636c12b8c 100644 --- a/neural_solution/backend/task_db.py +++ b/neural_solution/backend/task_db.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution task database.""" import sqlite3 import threading diff --git a/neural_solution/backend/task_monitor.py b/neural_solution/backend/task_monitor.py index c6aa3745072..02da9124369 100644 --- a/neural_solution/backend/task_monitor.py +++ b/neural_solution/backend/task_monitor.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution task monitor.""" import socket diff --git a/neural_solution/backend/utils/__init__.py b/neural_solution/backend/utils/__init__.py index a716ddb1a6e..108ea605857 100644 --- a/neural_solution/backend/utils/__init__.py +++ b/neural_solution/backend/utils/__init__.py @@ -11,5 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution backend utils.""" diff --git a/neural_solution/backend/utils/utility.py b/neural_solution/backend/utils/utility.py index 31a65984b07..48c3703cd9a 100644 --- a/neural_solution/backend/utils/utility.py +++ b/neural_solution/backend/utils/utility.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution backend utils.""" import json import os diff --git a/neural_solution/bin/__init__.py b/neural_solution/bin/__init__.py index 63ed8c79470..7f80f09b1f0 100644 --- a/neural_solution/bin/__init__.py +++ b/neural_solution/bin/__init__.py @@ -11,5 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution.""" diff --git a/neural_solution/bin/neural_solution.py b/neural_solution/bin/neural_solution.py index dccaa9bfdd8..a562313febe 100644 --- a/neural_solution/bin/neural_solution.py +++ b/neural_solution/bin/neural_solution.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution entry point.""" diff --git a/neural_solution/config.py b/neural_solution/config.py index 8c30b4c6118..8ab52da571f 100644 --- a/neural_solution/config.py +++ b/neural_solution/config.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Config for both frontend and backend.""" INTERVAL_TIME_BETWEEN_DISPATCH_TASK = 3 diff --git a/neural_solution/docs/source/README.md b/neural_solution/docs/source/README.md index 2e599268405..cf5e2cc87f7 100644 --- a/neural_solution/docs/source/README.md +++ b/neural_solution/docs/source/README.md @@ -167,4 +167,4 @@ This command is used to remove nodes from the cluster based on the IDs obtained ```shell neural_solution cluster --remove ``` -Please note that the above commands are just examples and may require additional parameters or configurations based on your specific setup. \ No newline at end of file +Please note that the above commands are just examples and may require additional parameters or configurations based on your specific setup. diff --git a/neural_solution/docs/source/description_api.md b/neural_solution/docs/source/description_api.md index e63cc41bf62..7940a6c2768 100644 --- a/neural_solution/docs/source/description_api.md +++ b/neural_solution/docs/source/description_api.md @@ -178,4 +178,4 @@ curl -X GET {host_ip}:port/description | Status Code | Description | Content | | ----------- | ----------- | ---------------- | -| 200 | User-facing API. | `msg`: The user-facing API. | \ No newline at end of file +| 200 | User-facing API. | `msg`: The user-facing API. | diff --git a/neural_solution/docs/source/ns_design_doc.md b/neural_solution/docs/source/ns_design_doc.md index 38db1683961..3a3872b7b48 100644 --- a/neural_solution/docs/source/ns_design_doc.md +++ b/neural_solution/docs/source/ns_design_doc.md @@ -121,4 +121,3 @@ class Cluster{ ### Extensibility - The service can be deployed on various resource pool, including a set of worker nodes, such as a local cluster or cloud cluster (AWS and GCP). - diff --git a/neural_solution/docs/source/template/task_request_description.md b/neural_solution/docs/source/template/task_request_description.md index 75eafe901d5..137c66129b3 100644 --- a/neural_solution/docs/source/template/task_request_description.md +++ b/neural_solution/docs/source/template/task_request_description.md @@ -22,4 +22,4 @@ An example: ], "priority": 1 } -``` \ No newline at end of file +``` diff --git a/neural_solution/examples/README.md b/neural_solution/examples/README.md index a58efe09845..85fe48fd54f 100644 --- a/neural_solution/examples/README.md +++ b/neural_solution/examples/README.md @@ -24,4 +24,4 @@ hf_models_grpc - \ No newline at end of file + diff --git a/neural_solution/examples/custom_models_optimized/tf_example1/test.py b/neural_solution/examples/custom_models_optimized/tf_example1/test.py index 006e241551d..3ba86ebbb38 100644 --- a/neural_solution/examples/custom_models_optimized/tf_example1/test.py +++ b/neural_solution/examples/custom_models_optimized/tf_example1/test.py @@ -1,5 +1,19 @@ +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Running script.""" import tensorflow as tf + from neural_compressor import Metric from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.data import BilinearImagenetTransform, ComposeTransform, DefaultDataLoader, TensorflowImageRecord diff --git a/neural_solution/examples/hf_models/README.md b/neural_solution/examples/hf_models/README.md index 97331484e3f..afa2e83e698 100644 --- a/neural_solution/examples/hf_models/README.md +++ b/neural_solution/examples/hf_models/README.md @@ -139,4 +139,4 @@ neural_solution cluster --remove ### Stop the service ```shell neural_solution stop -``` \ No newline at end of file +``` diff --git a/neural_solution/examples/hf_models_grpc/README.md b/neural_solution/examples/hf_models_grpc/README.md index ccbf200652e..1be2ddb2019 100644 --- a/neural_solution/examples/hf_models_grpc/README.md +++ b/neural_solution/examples/hf_models_grpc/README.md @@ -103,4 +103,4 @@ optional arguments: ### Stop the service ```shell neural_solution stop -``` \ No newline at end of file +``` diff --git a/neural_solution/launcher.py b/neural_solution/launcher.py index 218c631b97d..767597b985a 100644 --- a/neural_solution/launcher.py +++ b/neural_solution/launcher.py @@ -11,11 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """The entry of Neural Solution.""" import argparse import os -import psutil import shlex import socket import sqlite3 @@ -23,9 +21,11 @@ import sys import time from datetime import datetime -from neural_solution.utils.utility import get_db_path + +import psutil from prettytable import PrettyTable +from neural_solution.utils.utility import get_db_path def check_ports(args): @@ -129,7 +129,7 @@ def start_service(args): sys.exit(1) else: print( - f"No environment specified, use environment activated:" + "No environment specified, use environment activated:" + f" ({conda_env}) as the task runtime environment." ) conda_env_name = conda_env @@ -257,6 +257,7 @@ def start_service(args): print(f"To submit task at: {ip_address}:{args.restful_api_port}/task/submit/") print("[For information] neural_solution -h") + def query_cluster(db_path: str): """Query cluster information from database. @@ -291,11 +292,13 @@ def create_node(line: str): Node: node object """ from neural_solution.backend.cluster import Node + hostname, num_sockets, num_cores_per_socket = line.strip().split(" ") num_sockets, num_cores_per_socket = int(num_sockets), int(num_cores_per_socket) node = Node(name=hostname, num_sockets=num_sockets, num_cores_per_socket=num_cores_per_socket) return node + def join_node_to_cluster(db_path: str, args): """Append new node into cluster. @@ -306,7 +309,7 @@ def join_node_to_cluster(db_path: str, args): node_lst = [] if is_file: num_threads_per_process = 5 - with open(args.join, 'r') as f: + with open(args.join, "r") as f: for line in f: node_lst.append(create_node(line)) else: @@ -323,13 +326,12 @@ def join_node_to_cluster(db_path: str, args): result = cursor.fetchone() index = result[0] if result else 0 - cursor.execute(r"insert into cluster(name, node_info, status, free_sockets, busy_sockets, total_sockets)" + - "values ('{}', '{}', '{}', {}, {}, {})".format(node.name, - repr(node).replace("Node", f"Node{index+1}"), - "join", - node.num_sockets, - 0, - node.num_sockets)) + cursor.execute( + r"insert into cluster(name, node_info, status, free_sockets, busy_sockets, total_sockets)" + + "values ('{}', '{}', '{}', {}, {}, {})".format( + node.name, repr(node).replace("Node", f"Node{index+1}"), "join", node.num_sockets, 0, node.num_sockets + ) + ) conn.commit() index += 1 print(f"Insert node-id: {index} successfully!") @@ -337,6 +339,7 @@ def join_node_to_cluster(db_path: str, args): cursor.close() conn.close() + def remove_node_from_cluster(db_path: str, node_id: int): """Remove one node from cluster table. In the future, it will be deleted in the Cluster class. @@ -360,12 +363,13 @@ def remove_node_from_cluster(db_path: str, node_id: int): else: sql = f"UPDATE cluster SET status = 'remove' WHERE id = {node_id}" cursor.execute(sql) - print(f"Resource occupied, will be removed after resource release") + print("Resource occupied, will be removed after resource release") conn.commit() cursor.close() conn.close() + def manage_cluster(args): """Neural Solution resource management. query/join/remove node. @@ -384,9 +388,7 @@ def manage_cluster(args): def main(): """Implement the main function.""" parser = argparse.ArgumentParser(description="Neural Solution") - parser.add_argument( - 'action', choices=['start', 'stop', "cluster"], help='start/stop/management service' - ) + parser.add_argument("action", choices=["start", "stop", "cluster"], help="start/stop/management service") parser.add_argument( "--hostfile", default=None, help="start backend serve host file which contains all available nodes" ) @@ -408,27 +410,15 @@ def main(): default=2222, help="start serve for task monitor at {task_monitor_port}, default 2222", ) - parser.add_argument( - "--api_type", default="all", help="start web serve with all/grpc/restful, default all" - ) + parser.add_argument("--api_type", default="all", help="start web serve with all/grpc/restful, default all") parser.add_argument( "--workspace", default="./ns_workspace", help='neural solution workspace, default "./ns_workspace"' ) - parser.add_argument( - "--conda_env", default=None, help="specify the running environment for the task" - ) - parser.add_argument( - "--upload_path", default="examples", help="specify the file path for the tasks" - ) - parser.add_argument( - "--query", action="store_true", help="[cluster parameter] query cluster information" - ) - parser.add_argument( - "--join", help="[cluster parameter] add new node into cluster" - ) - parser.add_argument( - "--remove", help="[cluster parameter] remove from cluster" - ) + parser.add_argument("--conda_env", default=None, help="specify the running environment for the task") + parser.add_argument("--upload_path", default="examples", help="specify the file path for the tasks") + parser.add_argument("--query", action="store_true", help="[cluster parameter] query cluster information") + parser.add_argument("--join", help="[cluster parameter] add new node into cluster") + parser.add_argument("--remove", help="[cluster parameter] remove from cluster") args = parser.parse_args() # Check parameters ending in '_port' @@ -440,6 +430,7 @@ def main(): stop_service() elif args.action == "cluster": manage_cluster(args) - + + if __name__ == "__main__": main() diff --git a/neural_solution/requirements.txt b/neural_solution/requirements.txt index daabaf43bb3..975b78686e1 100644 --- a/neural_solution/requirements.txt +++ b/neural_solution/requirements.txt @@ -1,8 +1,8 @@ +fastapi +grpcio +mpi4py neural_compressor>=2.2 +protobuf pydantic -fastapi uvicorn[standard] watchdog -protobuf -grpcio -mpi4py diff --git a/neural_solution/scripts/prepare_deps.py b/neural_solution/scripts/prepare_deps.py index c7df3524ac2..da7a716d4a6 100644 --- a/neural_solution/scripts/prepare_deps.py +++ b/neural_solution/scripts/prepare_deps.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Prepare all dependencies.""" """ diff --git a/neural_solution/utils/__init__.py b/neural_solution/utils/__init__.py index 0415332cb3f..1e1ab4fbdf4 100644 --- a/neural_solution/utils/__init__.py +++ b/neural_solution/utils/__init__.py @@ -14,5 +14,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """All common functions for both backend and frontend.""" diff --git a/neural_solution/utils/logger.py b/neural_solution/utils/logger.py index 03c8b86be56..13b3e8ca44e 100644 --- a/neural_solution/utils/logger.py +++ b/neural_solution/utils/logger.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Logger: handles logging functionalities.""" import logging diff --git a/neural_solution/utils/utility.py b/neural_solution/utils/utility.py index 13ef5da1558..371963b9004 100644 --- a/neural_solution/utils/utility.py +++ b/neural_solution/utils/utility.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution utility.""" import json diff --git a/neural_solution/version.py b/neural_solution/version.py index cef9e3af87a..106f241e994 100644 --- a/neural_solution/version.py +++ b/neural_solution/version.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Neural Solution.""" from neural_compressor.version import __version__ diff --git a/pyproject.toml b/pyproject.toml index e38031d1034..c23a7761950 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,17 +1,3 @@ -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - [tool.isort] profile = "black" line_length = 120 diff --git a/requirements.txt b/requirements.txt index dbf38f564b8..9ef1b26a999 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ +deprecated >= 1.2.13 numpy +opencv-python-headless pandas -pyyaml -scikit-learn -schema -py-cpuinfo -requests -psutil Pillow -pycocotools-windows; sys_platform == 'win32' and python_version <= '3.8' -pycocotools; sys_platform != 'win32' or python_version > '3.8' -opencv-python-headless prettytable -deprecated >= 1.2.13 +psutil +py-cpuinfo +pycocotools; sys_platform != 'win32' or python_version > '3.8' +pycocotools-windows; sys_platform == 'win32' and python_version <= '3.8' +pyyaml +requests +schema +scikit-learn diff --git a/setup.py b/setup.py index bc3885e1c3b..81e585eab2a 100644 --- a/setup.py +++ b/setup.py @@ -1,20 +1,21 @@ -from io import open -from setuptools import find_packages, setup import re import sys +from io import open + +from setuptools import find_packages, setup def fetch_requirements(path): - with open(path, 'r') as fd: + with open(path, "r") as fd: return [r.strip() for r in fd.readlines()] try: - filepath = './neural_compressor/version.py' + filepath = "./neural_compressor/version.py" with open(filepath) as version_file: - __version__, = re.findall('__version__ = "(.*)"', version_file.read()) + (__version__,) = re.findall('__version__ = "(.*)"', version_file.read()) except Exception as error: - assert False, "Error: Could not open '%s' due %s\n" % (filepath, error) + assert False, "Error: Could not open '%s' due %s\n" % (filepath, error) neural_insights = False if "neural_insights" in sys.argv: @@ -27,32 +28,36 @@ def fetch_requirements(path): sys.argv.remove("neural_solution") # define include packages -include_packages = find_packages(include=['neural_compressor', 'neural_compressor.*', - 'neural_coder', 'neural_coder.*'], - exclude=["neural_compressor.template"]) -neural_insights_packages = find_packages(include=['neural_insights', 'neural_insights.*'], - exclude=["test.*", "test"]) -neural_solution_packages = find_packages(include=['neural_solution', 'neural_solution.*']) +include_packages = find_packages( + include=["neural_compressor", "neural_compressor.*", "neural_coder", "neural_coder.*"], + exclude=["neural_compressor.template"], +) +neural_insights_packages = find_packages(include=["neural_insights", "neural_insights.*"], exclude=["test.*", "test"]) +neural_solution_packages = find_packages(include=["neural_solution", "neural_solution.*"]) # define package data -package_data = {'': ['*.yaml']} -neural_insights_data = {'neural_insights': [ - 'bin/*', - '*.yaml', - 'web/app/*.*', - 'web/app/static/css/*', - 'web/app/static/js/*', - 'web/app/static/media/*', -]} -neural_solution_data = {'neural_solution': [ - 'scripts/*.*', - "frontend/*.json", - ]} +package_data = {"": ["*.yaml"]} +neural_insights_data = { + "neural_insights": [ + "bin/*", + "*.yaml", + "web/app/*.*", + "web/app/static/css/*", + "web/app/static/js/*", + "web/app/static/media/*", + ] +} +neural_solution_data = { + "neural_solution": [ + "scripts/*.*", + "frontend/*.json", + ] +} # define install requirements -install_requires_list = fetch_requirements('requirements.txt') -neural_insights_requires = fetch_requirements('neural_insights/requirements.txt') -neural_solution_requires = fetch_requirements('neural_solution/requirements.txt') +install_requires_list = fetch_requirements("requirements.txt") +neural_insights_requires = fetch_requirements("neural_insights/requirements.txt") +neural_solution_requires = fetch_requirements("neural_solution/requirements.txt") # define entry points entry_points = {} @@ -62,47 +67,38 @@ def fetch_requirements(path): package_data = neural_insights_data install_requires_list = neural_insights_requires include_packages = neural_insights_packages - entry_points = { - 'console_scripts': [ - 'neural_insights = neural_insights.bin.neural_insights:execute' - ] - } + entry_points = {"console_scripts": ["neural_insights = neural_insights.bin.neural_insights:execute"]} elif neural_solution: project_name = "neural_solution" package_data = neural_solution_data install_requires_list = neural_solution_requires include_packages = neural_solution_packages - entry_points = { - 'console_scripts': [ - 'neural_solution = neural_solution.bin.neural_solution:exec' - ] - } + entry_points = {"console_scripts": ["neural_solution = neural_solution.bin.neural_solution:exec"]} else: project_name = "neural_compressor" -if __name__ == '__main__': - +if __name__ == "__main__": setup( name=project_name, version=__version__, author="Intel AIA Team", author_email="feng.tian@intel.com, haihao.shen@intel.com, suyue.chen@intel.com", description="Repository of Intel® Neural Compressor", - long_description=open("README.md", "r", encoding='utf-8').read(), + long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown", - keywords='quantization, auto-tuning, post-training static quantization, post-training dynamic quantization, quantization-aware training', - license='Apache 2.0', + keywords="quantization, auto-tuning, post-training static quantization, post-training dynamic quantization, quantization-aware training", + license="Apache 2.0", url="https://github.com/intel/neural-compressor", packages=include_packages, include_package_data=True, package_data=package_data, install_requires=install_requires_list, entry_points=entry_points, - python_requires='>=3.6.0', + python_requires=">=3.6.0", classifiers=[ - 'Intended Audience :: Science/Research', - 'Programming Language :: Python :: 3', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'License :: OSI Approved :: Apache Software License', + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "License :: OSI Approved :: Apache Software License", ], ) diff --git a/test/adaptor/mxnet_adaptor/test_adaptor_mxnet.py b/test/adaptor/mxnet_adaptor/test_adaptor_mxnet.py index c0d5605de64..b576a1f6eb0 100644 --- a/test/adaptor/mxnet_adaptor/test_adaptor_mxnet.py +++ b/test/adaptor/mxnet_adaptor/test_adaptor_mxnet.py @@ -1,28 +1,29 @@ -import unittest -import os -import sys -import shutil -import yaml import json +import os import platform -import numpy as np -import mxnet as mx -import mxnet.gluon.nn as nn +import shutil +import sys +import unittest from pathlib import Path from tempfile import TemporaryDirectory +import mxnet as mx +import mxnet.gluon.nn as nn +import numpy as np +import yaml + +from neural_compressor.adaptor.mxnet_utils.util import check_mx_version from neural_compressor.experimental import Quantization, common from neural_compressor.experimental.metric.metric import MXNetMetrics from neural_compressor.utils.utility import recover -from neural_compressor.adaptor.mxnet_utils.util import check_mx_version -WORKSPACE_DIR = Path('./saved') +WORKSPACE_DIR = Path("./saved") -MX_NAMESPACE = mx.np if check_mx_version('2.0.0') else mx.nd +MX_NAMESPACE = mx.np if check_mx_version("2.0.0") else mx.nd def build_mxnet(): - fake_yaml = ''' + fake_yaml = """ model: name: imagenet framework: mxnet @@ -40,15 +41,17 @@ def build_mxnet(): random_seed: 9527 workspace: path: {} - '''.format(str(WORKSPACE_DIR)) - configs = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('mxnet.yaml', "w", encoding="utf-8") as f: - yaml.dump(configs, f) - f.close() + """.format( + str(WORKSPACE_DIR) + ) + configs = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open("mxnet.yaml", "w", encoding="utf-8") as f: + yaml.dump(configs, f) + f.close() def build_mxnet_kl(): - fake_yaml = ''' + fake_yaml = """ model: name: imagenet framework: mxnet @@ -66,231 +69,230 @@ def build_mxnet_kl(): random_seed: 9527 workspace: path: {} - '''.format(str(WORKSPACE_DIR)) - configs = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('mxnet_kl.yaml', "w", encoding="utf-8") as f: - yaml.dump(configs, f) - f.close() + """.format( + str(WORKSPACE_DIR) + ) + configs = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open("mxnet_kl.yaml", "w", encoding="utf-8") as f: + yaml.dump(configs, f) + f.close() def are_models_equal(tester, model_a, model_b): - symnet_a, args_a, auxs_a = model_a - symnet_b, args_b, auxs_b = model_b + symnet_a, args_a, auxs_a = model_a + symnet_b, args_b, auxs_b = model_b - nodes_a = [(node['op'], node['inputs']) for node in json.loads(symnet_a.tojson())['nodes']] - nodes_b = [(node['op'], node['inputs']) for node in json.loads(symnet_b.tojson())['nodes']] - tester.assertEqual(nodes_a, nodes_b) + nodes_a = [(node["op"], node["inputs"]) for node in json.loads(symnet_a.tojson())["nodes"]] + nodes_b = [(node["op"], node["inputs"]) for node in json.loads(symnet_b.tojson())["nodes"]] + tester.assertEqual(nodes_a, nodes_b) - args_a = dict(sorted(args_a.items(), key=lambda x: x[0])) - args_b = dict(sorted(args_b.items(), key=lambda x: x[0])) - auxs_a = dict(sorted(auxs_a.items(), key=lambda x: x[0])) - auxs_b = dict(sorted(auxs_b.items(), key=lambda x: x[0])) + args_a = dict(sorted(args_a.items(), key=lambda x: x[0])) + args_b = dict(sorted(args_b.items(), key=lambda x: x[0])) + auxs_a = dict(sorted(auxs_a.items(), key=lambda x: x[0])) + auxs_b = dict(sorted(auxs_b.items(), key=lambda x: x[0])) - assert len(args_a) == len(args_b) - for val_a, val_b in zip(args_a.values(), args_b.values()): - tester.assertTrue(np.all((val_a == val_b).asnumpy())) + assert len(args_a) == len(args_b) + for val_a, val_b in zip(args_a.values(), args_b.values()): + tester.assertTrue(np.all((val_a == val_b).asnumpy())) - assert len(auxs_a) == len(auxs_b) - for val_a, val_b in zip(auxs_a.values(), auxs_b.values()): - tester.assertTrue(np.all((val_a == val_b).asnumpy())) + assert len(auxs_a) == len(auxs_b) + for val_a, val_b in zip(auxs_a.values(), auxs_b.values()): + tester.assertTrue(np.all((val_a == val_b).asnumpy())) class TestAdaptorMXNet(unittest.TestCase): - """ - Test MXNet adaptor functions. - """ + """Test MXNet adaptor functions.""" + @classmethod def setUpClass(self): - if platform.system().lower() == "windows": - self.skipTest(self, "not support mxnet on windows yet") - build_mxnet() - build_mxnet_kl() + if platform.system().lower() == "windows": + self.skipTest(self, "not support mxnet on windows yet") + build_mxnet() + build_mxnet_kl() - self.data_low = -1000 - self.data_high = 1000 + self.data_low = -1000 + self.data_high = 1000 @classmethod def tearDownClass(self): - os.remove('mxnet.yaml') - os.remove('mxnet_kl.yaml') - shutil.rmtree(WORKSPACE_DIR, ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("mxnet.yaml") + os.remove("mxnet_kl.yaml") + shutil.rmtree(WORKSPACE_DIR, ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_utils(self): - import neural_compressor.adaptor.mxnet_utils.util as utils - self.assertTrue(utils.isiterable([1, 2, 3])) - self.assertFalse(utils.isiterable(123)) + import neural_compressor.adaptor.mxnet_utils.util as utils - def test_mlp_model_quantization(self): - """ - Use MLP model to test minmax calibration and built-in evaluate function. - """ - mlp_input = mx.symbol.Variable('data') - mlp_model = mx.symbol.FullyConnected(data=mlp_input, name='fc1', num_hidden=32) - mlp_model = mx.symbol.Activation(data=mlp_model, act_type='relu') - mlp_model = mx.symbol.FullyConnected(data=mlp_model, name='fc2', num_hidden=16) - mlp_model = mx.symbol.Softmax(mlp_model, name='softmax') - - for shape in [(32, 64), ]: - data = MX_NAMESPACE.random.uniform( - self.data_low, self.data_high, shape).astype('float32') - labels = MX_NAMESPACE.ones((shape[0],)) - calib_data = mx.io.NDArrayIter(data=data, label=labels, batch_size=shape[0]) - - with TemporaryDirectory() as tmpdirname: - prefix = str(Path(tmpdirname)/'tmp') - sym_block = mx.gluon.SymbolBlock(mlp_model, [mlp_input]) - sym_block.initialize() - sym_block.forward(data) - sym_block.export(prefix, epoch=0) - fp32_model = mx.model.load_checkpoint(prefix, 0) + self.assertTrue(utils.isiterable([1, 2, 3])) + self.assertFalse(utils.isiterable(123)) - quantizer = Quantization("./mxnet.yaml") - quantizer.model = fp32_model - quantizer.calib_dataloader = calib_data - quantizer.eval_dataloader = calib_data - qmodel = quantizer.fit() - self.assertIsInstance(qmodel.model[0], mx.symbol.Symbol) - - # test inspect_tensor - inspect_tensor = quantizer.strategy.adaptor.inspect_tensor - quantizer.model = fp32_model - - fc_op_name = 'sg_{}_fully_connected'.format( - 'onednn' if check_mx_version('2.0.0') else 'mkldnn') - fc_node_name1 = fc_op_name + '_eltwise_0' - fc_node_name2 = fc_op_name + '_1' - - insp = inspect_tensor(quantizer.model, quantizer.calib_dataloader, - op_list=[fc_node_name1, - fc_node_name2 ], iteration_list=[1, 3]) - qinsp = inspect_tensor(qmodel, quantizer.calib_dataloader, - op_list=[fc_node_name1, - fc_node_name2], iteration_list=[1, 3]) - - self.assertNotEqual(len(insp['activation']), 0) - self.assertEqual(len(insp['activation']), len(qinsp['activation'])) - - for tensors, qtensors in zip(insp['activation'], qinsp['activation']): - for k in (set(tensors.keys()) & set(qtensors.keys())): - tensor, qtensor = tensors[k][k], qtensors[k][k] - self.assertEqual(tensor.shape, qtensor.shape) - - #test inspect with an empty iteration_list - inspect_tensor(qmodel, quantizer.calib_dataloader, - op_list=[fc_node_name1], - iteration_list=[]) - - # test recovery for symbolic model - qmodel_r = recover(fp32_model, WORKSPACE_DIR/'history.snapshot', -1) - are_models_equal(self, qmodel.model, qmodel_r.model) - - # test symbolic model saving - qmodel_r.save(WORKSPACE_DIR/'save_test') + def test_mlp_model_quantization(self): + """Use MLP model to test minmax calibration and built-in evaluate function.""" + mlp_input = mx.symbol.Variable("data") + mlp_model = mx.symbol.FullyConnected(data=mlp_input, name="fc1", num_hidden=32) + mlp_model = mx.symbol.Activation(data=mlp_model, act_type="relu") + mlp_model = mx.symbol.FullyConnected(data=mlp_model, name="fc2", num_hidden=16) + mlp_model = mx.symbol.Softmax(mlp_model, name="softmax") + + for shape in [ + (32, 64), + ]: + data = MX_NAMESPACE.random.uniform(self.data_low, self.data_high, shape).astype("float32") + labels = MX_NAMESPACE.ones((shape[0],)) + calib_data = mx.io.NDArrayIter(data=data, label=labels, batch_size=shape[0]) + + with TemporaryDirectory() as tmpdirname: + prefix = str(Path(tmpdirname) / "tmp") + sym_block = mx.gluon.SymbolBlock(mlp_model, [mlp_input]) + sym_block.initialize() + sym_block.forward(data) + sym_block.export(prefix, epoch=0) + fp32_model = mx.model.load_checkpoint(prefix, 0) + + quantizer = Quantization("./mxnet.yaml") + quantizer.model = fp32_model + quantizer.calib_dataloader = calib_data + quantizer.eval_dataloader = calib_data + qmodel = quantizer.fit() + self.assertIsInstance(qmodel.model[0], mx.symbol.Symbol) + + # test inspect_tensor + inspect_tensor = quantizer.strategy.adaptor.inspect_tensor + quantizer.model = fp32_model + + fc_op_name = "sg_{}_fully_connected".format("onednn" if check_mx_version("2.0.0") else "mkldnn") + fc_node_name1 = fc_op_name + "_eltwise_0" + fc_node_name2 = fc_op_name + "_1" + + insp = inspect_tensor( + quantizer.model, + quantizer.calib_dataloader, + op_list=[fc_node_name1, fc_node_name2], + iteration_list=[1, 3], + ) + qinsp = inspect_tensor( + qmodel, quantizer.calib_dataloader, op_list=[fc_node_name1, fc_node_name2], iteration_list=[1, 3] + ) + + self.assertNotEqual(len(insp["activation"]), 0) + self.assertEqual(len(insp["activation"]), len(qinsp["activation"])) + + for tensors, qtensors in zip(insp["activation"], qinsp["activation"]): + for k in set(tensors.keys()) & set(qtensors.keys()): + tensor, qtensor = tensors[k][k], qtensors[k][k] + self.assertEqual(tensor.shape, qtensor.shape) + + # test inspect with an empty iteration_list + inspect_tensor(qmodel, quantizer.calib_dataloader, op_list=[fc_node_name1], iteration_list=[]) + + # test recovery for symbolic model + qmodel_r = recover(fp32_model, WORKSPACE_DIR / "history.snapshot", -1) + are_models_equal(self, qmodel.model, qmodel_r.model) + + # test symbolic model saving + qmodel_r.save(WORKSPACE_DIR / "save_test") def test_conv_model_quantization(self): - """ - Use Conv model to test KL calibration and user specific evaluate function. - """ - conv_net = nn.HybridSequential() - conv_net.add(nn.Conv2D(channels=3, kernel_size=(1, 1))) - conv_net.add(nn.BatchNorm()) - conv_net.add(nn.Activation('relu')) - conv_net.add(nn.AvgPool2D(pool_size=(4, 4))) - conv_net.add(nn.Dense(1, activation='sigmoid')) - conv_net.initialize() - - for shape in [(32, 3, 224, 224), ]: - dataShape = (shape[0]*5, *shape[1:]) - data = MX_NAMESPACE.random.uniform(self.data_low, self.data_high, dataShape, - dtype='float32') - label = MX_NAMESPACE.random.randint(0, 2, (dataShape[0], 1)).astype('float32') - dataset = mx.gluon.data.ArrayDataset(data, label) - - def eval(model): - eval_dataloader = mx.gluon.data.DataLoader(dataset, batch_size=8) - metric = MXNetMetrics().metrics['Accuracy']() - for batch in eval_dataloader: - data, labels = batch - preds = model.forward(data) - metric.update(labels.asnumpy(), preds.asnumpy()) - return metric.result() - - calib_dataloader = mx.gluon.data.DataLoader(dataset, batch_size=8) - calib_dataloader.batch_size = 8 - quantizer = Quantization("./mxnet_kl.yaml") - quantizer.model = conv_net - quantizer.calib_dataloader = calib_dataloader - quantizer.eval_func = eval - qnet = quantizer.fit().model - self.assertIsInstance(qnet, mx.gluon.HybridBlock) + """Use Conv model to test KL calibration and user specific evaluate function.""" + conv_net = nn.HybridSequential() + conv_net.add(nn.Conv2D(channels=3, kernel_size=(1, 1))) + conv_net.add(nn.BatchNorm()) + conv_net.add(nn.Activation("relu")) + conv_net.add(nn.AvgPool2D(pool_size=(4, 4))) + conv_net.add(nn.Dense(1, activation="sigmoid")) + conv_net.initialize() + + for shape in [ + (32, 3, 224, 224), + ]: + dataShape = (shape[0] * 5, *shape[1:]) + data = MX_NAMESPACE.random.uniform(self.data_low, self.data_high, dataShape, dtype="float32") + label = MX_NAMESPACE.random.randint(0, 2, (dataShape[0], 1)).astype("float32") + dataset = mx.gluon.data.ArrayDataset(data, label) + + def eval(model): + eval_dataloader = mx.gluon.data.DataLoader(dataset, batch_size=8) + metric = MXNetMetrics().metrics["Accuracy"]() + for batch in eval_dataloader: + data, labels = batch + preds = model.forward(data) + metric.update(labels.asnumpy(), preds.asnumpy()) + return metric.result() + + calib_dataloader = mx.gluon.data.DataLoader(dataset, batch_size=8) + calib_dataloader.batch_size = 8 + quantizer = Quantization("./mxnet_kl.yaml") + quantizer.model = conv_net + quantizer.calib_dataloader = calib_dataloader + quantizer.eval_func = eval + qnet = quantizer.fit().model + self.assertIsInstance(qnet, mx.gluon.HybridBlock) def test_gluon_model(self): - """ - Use gluon model to test gluon related functions in mxnet adaptor. - """ - # create gluon model - def create_model(params=None): - net = nn.HybridSequential() - net.add(nn.Conv2D(1, (1, 1), activation="relu")) - net.add(nn.Flatten()) - net.add(nn.Dense(64, activation="relu")) - net.add(nn.Dense(10)) - if params is not None: - if check_mx_version('2.0.0'): - net.load_dict({k: v.data() for k, v in params.items()}) + """Use gluon model to test gluon related functions in mxnet adaptor.""" + + # create gluon model + def create_model(params=None): + net = nn.HybridSequential() + net.add(nn.Conv2D(1, (1, 1), activation="relu")) + net.add(nn.Flatten()) + net.add(nn.Dense(64, activation="relu")) + net.add(nn.Dense(10)) + if params is not None: + if check_mx_version("2.0.0"): + net.load_dict({k: v.data() for k, v in params.items()}) + else: + param_keys = sorted(net.collect_params().keys()) + param_values = sorted(params.items(), key=lambda x: x[0]) + params = {k: v.data() for k, (old_k, v) in zip(param_keys, param_values)} + net.collect_params().load_dict(params) else: - param_keys = sorted(net.collect_params().keys()) - param_values = sorted(params.items(), key=lambda x: x[0]) - params = {k: v.data() for k, (old_k, v) in zip(param_keys, param_values)} - net.collect_params().load_dict(params) - else: - net.initialize() - return net - - class CalibDataset(): - def __init__(self, dataset): - self.dataset = dataset - - def __getitem__(self, idx): - if check_mx_version('2.0.0'): - mx_namespace = mx.np - else: - mx_namespace = mx.nd - data, label = self.dataset[idx] - data = mx_namespace.reshape( - data, (data.shape[-1], *data.shape[:-1])).astype('float32') - return data, label - - def __len__(self): - return len(self.dataset) - - net = create_model() - dataset = CalibDataset(mx.gluon.data.vision.datasets.FashionMNIST(train=False)) - dataloader = common.DataLoader(dataset, batch_size=8) - quantizer = Quantization("./mxnet.yaml") - quantizer.model = net - quantizer.calib_dataloader = dataloader - quantizer.eval_dataloader = dataloader - qnet = quantizer.fit() - self.assertIsInstance(qnet.model, mx.gluon.HybridBlock) - - # test recovery for gluon model - net = create_model(net.collect_params()) - qnet_r = recover(net, WORKSPACE_DIR/'history.snapshot', -1) - - from neural_compressor.adaptor.mxnet_utils.util import prepare_model, prepare_dataloader - dataloader = prepare_dataloader(qnet, mx.cpu(), quantizer.calib_dataloader) - - # test calling prepare_dataloader for already prepared dataloader - self.assertIs(dataloader, prepare_dataloader(qnet, mx.cpu(), dataloader)) - - model_a = prepare_model(qnet, mx.cpu(), dataloader.input_desc) - model_b = prepare_model(qnet_r, mx.cpu(), dataloader.input_desc) - are_models_equal(self, model_a, model_b) - - # test gluon model saving - qnet_r.save(WORKSPACE_DIR/'save_test') + net.initialize() + return net + + class CalibDataset: + def __init__(self, dataset): + self.dataset = dataset + + def __getitem__(self, idx): + if check_mx_version("2.0.0"): + mx_namespace = mx.np + else: + mx_namespace = mx.nd + data, label = self.dataset[idx] + data = mx_namespace.reshape(data, (data.shape[-1], *data.shape[:-1])).astype("float32") + return data, label + + def __len__(self): + return len(self.dataset) + + net = create_model() + dataset = CalibDataset(mx.gluon.data.vision.datasets.FashionMNIST(train=False)) + dataloader = common.DataLoader(dataset, batch_size=8) + quantizer = Quantization("./mxnet.yaml") + quantizer.model = net + quantizer.calib_dataloader = dataloader + quantizer.eval_dataloader = dataloader + qnet = quantizer.fit() + self.assertIsInstance(qnet.model, mx.gluon.HybridBlock) + + # test recovery for gluon model + net = create_model(net.collect_params()) + qnet_r = recover(net, WORKSPACE_DIR / "history.snapshot", -1) + + from neural_compressor.adaptor.mxnet_utils.util import prepare_dataloader, prepare_model + + dataloader = prepare_dataloader(qnet, mx.cpu(), quantizer.calib_dataloader) + + # test calling prepare_dataloader for already prepared dataloader + self.assertIs(dataloader, prepare_dataloader(qnet, mx.cpu(), dataloader)) + + model_a = prepare_model(qnet, mx.cpu(), dataloader.input_desc) + model_b = prepare_model(qnet_r, mx.cpu(), dataloader.input_desc) + are_models_equal(self, model_a, model_b) + + # test gluon model saving + qnet_r.save(WORKSPACE_DIR / "save_test") if __name__ == "__main__": diff --git a/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py b/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py index 227b03d8312..977274d6d4a 100644 --- a/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py +++ b/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py @@ -2,25 +2,29 @@ # -*- coding: utf-8 -*- # import os +import platform import sys import unittest + import yaml -import platform -sys.path.append('..') + +sys.path.append("..") import mxnet as mx + import neural_compressor import neural_compressor.adaptor -class TestMXNetQuery(unittest.TestCase): +class TestMXNetQuery(unittest.TestCase): @classmethod def setUpClass(self): if platform.system().lower() == "windows": self.skipTest(self, "not support mxnet on windows yet") import importlib - nc_path = os.path.dirname(importlib.util.find_spec('neural_compressor').origin) - self.yaml_path = os.path.join(nc_path, 'adaptor/mxnet.yaml') + + nc_path = os.path.dirname(importlib.util.find_spec("neural_compressor").origin) + self.yaml_path = os.path.join(nc_path, "adaptor/mxnet.yaml") self.Queryhandler = neural_compressor.adaptor.mxnet.MXNetQuery(self.yaml_path) self.version = mx.__version__ @@ -37,11 +41,11 @@ def test_one_shot_query(self): def test_get_version(self): Query_version = self.Queryhandler.get_version() # if the mxnet version not in cfgs, the default maybe be ok. - self.assertNotIn([mx.__version__, 'default'], [Query_version]) + self.assertNotIn([mx.__version__, "default"], [Query_version]) def test_get_precisions(self): Query_precisions = self.Queryhandler.get_precisions() - res = Query_precisions.split(',') + res = Query_precisions.split(",") self.assertEqual(len(res), len(set(res))) def test_get_op_types(self): @@ -58,7 +62,8 @@ def test_get_quantization_capability(self): def test_get_mixed_precision_combination(self): Query_mixed_precision = self.Queryhandler.get_mixed_precision_combination() - self.assertNotIn(['int8', 'bf16'], Query_mixed_precision) + self.assertNotIn(["int8", "bf16"], Query_mixed_precision) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py index 01d148c433b..4e06f8f3bf8 100644 --- a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py +++ b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py @@ -1,35 +1,36 @@ import os import shutil import unittest +from collections import OrderedDict from unittest.mock import patch + +import numpy as np +import onnx import onnxruntime as ort import torch import torchvision -import onnx -import numpy as np -from packaging.version import Version -from collections import OrderedDict +from onnx import TensorProto, helper, numpy_helper from onnx import onnx_pb as onnx_proto -from onnx import helper, TensorProto, numpy_helper from packaging.version import Version from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer + +from neural_compressor import PostTrainingQuantConfig, quantization from neural_compressor.adaptor import FRAMEWORKS -from neural_compressor.data import Datasets, DATALOADERS, DataLoader -from neural_compressor.experimental import Quantization, common -from neural_compressor.experimental import Benchmark, common from neural_compressor.adaptor.pytorch import get_torch_version from neural_compressor.conf.config import conf -from neural_compressor import quantization, PostTrainingQuantConfig +from neural_compressor.data import DATALOADERS, DataLoader, Datasets +from neural_compressor.experimental import Benchmark, Quantization, common from neural_compressor.model import Model + def build_static_yaml(): fake_yaml = """ model: name: imagenet framework: onnxrt_qlinearops - quantization: - approach: post_training_static_quant + quantization: + approach: post_training_static_quant calibration: sampling_size: 50 op_wise: { @@ -38,7 +39,7 @@ def build_static_yaml(): 'weight': {'dtype': ['fp32'], 'scheme':['sym']} } } - + evaluation: accuracy: metric: @@ -51,7 +52,7 @@ def build_static_yaml(): exit_policy: timeout: 0 random_seed: 9527 - workspace: + workspace: path: ./nc_workspace/recover/ """ with open("qlinear.yaml", "w", encoding="utf-8") as f: @@ -62,8 +63,8 @@ def build_static_yaml(): name: imagenet framework: onnxrt_qdq - quantization: - approach: post_training_static_quant + quantization: + approach: post_training_static_quant calibration: sampling_size: 50 op_wise: { @@ -72,7 +73,7 @@ def build_static_yaml(): 'weight': {'dtype': ['fp32'], 'scheme':['sym']} } } - + evaluation: accuracy: metric: @@ -85,12 +86,13 @@ def build_static_yaml(): exit_policy: timeout: 0 random_seed: 9527 - workspace: + workspace: path: ./nc_workspace/recover/ """ with open("qdq.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_benchmark_yaml(): fake_yaml = """ model: @@ -122,14 +124,15 @@ def build_benchmark_yaml(): with open("benchmark.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_dynamic_yaml(): fake_yaml = """ model: name: imagenet framework: onnxrt_integerops - quantization: - approach: post_training_dynamic_quant + quantization: + approach: post_training_dynamic_quant calibration: sampling_size: 50 @@ -145,21 +148,22 @@ def build_dynamic_yaml(): exit_policy: timeout: 0 random_seed: 9527 - workspace: + workspace: path: ./nc_workspace/recover/ - + """ with open("dynamic.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_recipe_yaml(): fake_yaml = """ model: name: imagenet framework: onnxrt_qlinearops - quantization: - approach: post_training_static_quant + quantization: + approach: post_training_static_quant recipes: first_conv_or_matmul_quantization: False last_conv_or_matmul_quantization: False @@ -190,14 +194,15 @@ def build_recipe_yaml(): with open("recipe.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_recipe2_yaml(): fake_yaml = """ model: name: imagenet framework: onnxrt_qlinearops - quantization: - approach: post_training_static_quant + quantization: + approach: post_training_static_quant recipes: last_conv_or_matmul_quantization: False pre_post_process_quantization: False @@ -228,14 +233,15 @@ def build_recipe2_yaml(): with open("recipe2.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_gather_yaml(): fake_yaml = """ model: name: imagenet framework: onnxrt_qlinearops - quantization: - approach: post_training_static_quant + quantization: + approach: post_training_static_quant calibration: sampling_size: 1 dataloader: @@ -265,14 +271,15 @@ def build_gather_yaml(): with open("gather.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_rename_yaml(): fake_yaml = """ model: name: test framework: onnxrt_integerops - quantization: - approach: post_training_dynamic_quant + quantization: + approach: post_training_dynamic_quant calibration: sampling_size: 1 @@ -291,6 +298,7 @@ def build_rename_yaml(): with open("rename.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_non_MSE_yaml(): fake_yaml = """ model: @@ -311,7 +319,7 @@ def build_non_MSE_yaml(): evaluation: accuracy: metric: - MSE: + MSE: compare_label: False performance: warmup: 5 @@ -323,13 +331,14 @@ def build_non_MSE_yaml(): exit_policy: timeout: 0 random_seed: 9527 - workspace: + workspace: path: ./nc_workspace/recover/ - + """ with open("non_MSE.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def eval_func(model): return 1.0 @@ -339,227 +348,250 @@ def export_onnx_cv_model(model, path, opset=12): torch_out = model(x) # Export the model - torch.onnx.export(model, # model being run - x, # model input (or a tuple for multiple inputs) - path, # where to save the model (can be a file or file-like object) - export_params=True, # store the trained parameter weights inside the model file - opset_version=opset, # the ONNX version to export the model to, please ensure at least 11. - do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ["input"], # the model"s input names - output_names = ["output"], # the model"s output names - dynamic_axes={"input" : {0 : "batch_size"}, # variable length axes - "output" : {0 : "batch_size"}}) + torch.onnx.export( + model, # model being run + x, # model input (or a tuple for multiple inputs) + path, # where to save the model (can be a file or file-like object) + export_params=True, # store the trained parameter weights inside the model file + opset_version=opset, # the ONNX version to export the model to, please ensure at least 11. + do_constant_folding=True, # whether to execute constant folding for optimization + input_names=["input"], # the model"s input names + output_names=["output"], # the model"s output names + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, # variable length axes + ) + def export_onnx_nlp_model(model, path, opset=12): - symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} - inputs = {'input_ids': torch.ones(1, 128, dtype=torch.int64), - 'attention_mask': torch.ones(1, 128, dtype=torch.int64)} - torch.onnx.export(model, # model being run - (inputs['input_ids'], # model input (or a tuple for multiple inputs) - inputs['attention_mask']), - path, # where to save the model (can be a file or file-like object) - opset_version=opset, # the ONNX version to export the model - do_constant_folding=True, # whether to execute constant folding - input_names=['input_ids', # the model's input names - 'attention_mask'], - output_names=['logits'], - dynamic_axes={'input_ids': symbolic_names, # variable length axes - 'attention_mask' : symbolic_names}) + symbolic_names = {0: "batch_size", 1: "max_seq_len"} + inputs = { + "input_ids": torch.ones(1, 128, dtype=torch.int64), + "attention_mask": torch.ones(1, 128, dtype=torch.int64), + } + torch.onnx.export( + model, # model being run + (inputs["input_ids"], inputs["attention_mask"]), # model input (or a tuple for multiple inputs) + path, # where to save the model (can be a file or file-like object) + opset_version=opset, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=["input_ids", "attention_mask"], # the model's input names + output_names=["logits"], + dynamic_axes={"input_ids": symbolic_names, "attention_mask": symbolic_names}, # variable length axes + ) + def generate_input_initializer(tensor_shape, tensor_dtype, input_name): - ''' - Helper function to generate initializers for test inputs - ''' + """Helper function to generate initializers for test inputs.""" tensor = np.random.ranf(tensor_shape).astype(tensor_dtype) init = numpy_helper.from_array(tensor, input_name) - return init + return init + def build_ir3_model(): - input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT, [1, 2048]) - output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 1000]) - weight = helper.make_tensor_value_info('X1_weight', TensorProto.FLOAT, [1000, 2048]) + input0 = helper.make_tensor_value_info("input0", TensorProto.FLOAT, [1, 2048]) + output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 1000]) + weight = helper.make_tensor_value_info("X1_weight", TensorProto.FLOAT, [1000, 2048]) - X1_weight = generate_input_initializer([1000, 2048], np.float32, 'X1_weight') - kwargs = {'alpha':1.0, 'beta':1.0, 'transA':0, 'transB':1} - gemm = helper.make_node('Gemm', ['input0', 'X1_weight'], ['output'], name='gemm', **kwargs) + X1_weight = generate_input_initializer([1000, 2048], np.float32, "X1_weight") + kwargs = {"alpha": 1.0, "beta": 1.0, "transA": 0, "transB": 1} + gemm = helper.make_node("Gemm", ["input0", "X1_weight"], ["output"], name="gemm", **kwargs) - graph = helper.make_graph([gemm], 'test_graph_6', [input0], [output]) + graph = helper.make_graph([gemm], "test_graph_6", [input0], [output]) graph.initializer.add().CopyFrom(X1_weight) graph.input.extend([weight]) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 11)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 11)]}) model.ir_version = 3 return model + def build_matmul_model(): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 5, 2]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 5, 2]) - H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 5, 2]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 5, 2]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 5, 2]) + H = helper.make_tensor_value_info("H", TensorProto.FLOAT, [1, 5, 2]) e_value = np.random.randint(2, size=(10)).astype(np.float32) - B_init = helper.make_tensor('B', TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist()) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) + B_init = helper.make_tensor("B", TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist()) + E_init = helper.make_tensor("E", TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) + + matmul_node = onnx.helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") + add = onnx.helper.make_node("Add", ["C", "E"], ["D"], name="add") - matmul_node = onnx.helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') - add = onnx.helper.make_node('Add', ['C', 'E'], ['D'], name='add') - f_value = np.random.randint(2, size=(10)).astype(np.float32) - F_init = helper.make_tensor('F', TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) - add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2') - - graph = helper.make_graph([matmul_node, add, add2], 'test_graph_1', [A], [H], [B_init, E_init, F_init]) + F_init = helper.make_tensor("F", TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) + add2 = onnx.helper.make_node("Add", ["D", "F"], ["H"], name="add2") + + graph = helper.make_graph([matmul_node, add, add2], "test_graph_1", [A], [H], [B_init, E_init, F_init]) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model + def build_matmul_model2(): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 5, 1]) - H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 1, 5, 1]) - - C1_init = helper.make_tensor('C1', TensorProto.FLOAT, [1, 1, 5, 5], np.random.random(25).tolist()) - matmul_node = onnx.helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') - matmul_node2 = onnx.helper.make_node('MatMul', ['C1', 'C'], ['C2'], name='Matmul2') - matmul_node3 = onnx.helper.make_node('MatMul', ['A', 'C2'], ['C3'], name='Matmul3') + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 5, 1]) + H = helper.make_tensor_value_info("H", TensorProto.FLOAT, [1, 1, 5, 1]) + + C1_init = helper.make_tensor("C1", TensorProto.FLOAT, [1, 1, 5, 5], np.random.random(25).tolist()) + matmul_node = onnx.helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") + matmul_node2 = onnx.helper.make_node("MatMul", ["C1", "C"], ["C2"], name="Matmul2") + matmul_node3 = onnx.helper.make_node("MatMul", ["A", "C2"], ["C3"], name="Matmul3") e_value = np.random.randint(2, size=(5)).astype(np.float32) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) - add = onnx.helper.make_node('Add', ['C3', 'E'], ['D'], name='add') - + E_init = helper.make_tensor("E", TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) + add = onnx.helper.make_node("Add", ["C3", "E"], ["D"], name="add") + f_value = np.random.randint(2, size=(5)).astype(np.float32) - F_init = helper.make_tensor('F', TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) - add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2') - - graph = helper.make_graph([matmul_node, matmul_node2, matmul_node3, add, add2], 'test_graph_1', [A, B], [H], [E_init, F_init, C1_init]) + F_init = helper.make_tensor("F", TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) + add2 = onnx.helper.make_node("Add", ["D", "F"], ["H"], name="add2") + + graph = helper.make_graph( + [matmul_node, matmul_node2, matmul_node3, add, add2], "test_graph_1", [A, B], [H], [E_init, F_init, C1_init] + ) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) - return model + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) + return model + def build_matmul_gather_model(): - input = helper.make_tensor_value_info('input0', TensorProto.INT64, [1, 1]) - output = helper.make_tensor_value_info('output0', TensorProto.FLOAT, [1, 1]) + input = helper.make_tensor_value_info("input0", TensorProto.INT64, [1, 1]) + output = helper.make_tensor_value_info("output0", TensorProto.FLOAT, [1, 1]) - axes = helper.make_tensor('axes', TensorProto.INT64, [1], [1]) - squeeze = onnx.helper.make_node('Squeeze', ['input0', 'axes'], ['A'], name='squeeze') + axes = helper.make_tensor("axes", TensorProto.INT64, [1], [1]) + squeeze = onnx.helper.make_node("Squeeze", ["input0", "axes"], ["A"], name="squeeze") b_value = np.random.random((1, 2048)) - B_init = helper.make_tensor('B', TensorProto.FLOAT, [1, 2048], b_value.reshape(2048).tolist()) + B_init = helper.make_tensor("B", TensorProto.FLOAT, [1, 2048], b_value.reshape(2048).tolist()) - gather = onnx.helper.make_node('Gather', ['B', 'A'], ['C'], name='gather') + gather = onnx.helper.make_node("Gather", ["B", "A"], ["C"], name="gather") - d_value = np.random.random((2048, 1)).astype('float32') - D_init = helper.make_tensor('D', TensorProto.FLOAT, [2048, 1], d_value.reshape(2048).tolist()) - matmul = onnx.helper.make_node('MatMul', ['C', 'D'], ['output0']) + d_value = np.random.random((2048, 1)).astype("float32") + D_init = helper.make_tensor("D", TensorProto.FLOAT, [2048, 1], d_value.reshape(2048).tolist()) + matmul = onnx.helper.make_node("MatMul", ["C", "D"], ["output0"]) - graph = helper.make_graph([squeeze, gather, matmul], 'test_graph_1', [input], [output], [B_init, D_init, axes]) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + graph = helper.make_graph([squeeze, gather, matmul], "test_graph_1", [input], [output], [B_init, D_init, axes]) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model + def build_model_with_gather(): b_value = np.random.randint(2, size=(10)).astype(np.int32) - B_init = helper.make_tensor('B', TensorProto.INT32, [10], b_value.reshape(10).tolist()) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 100, 4]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [100, 4]) - squeeze = onnx.helper.make_node('Squeeze', ['A'], ['D'], name='squeeze') - B = helper.make_tensor_value_info('B', TensorProto.INT32, [10]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [10, 4]) - node = onnx.helper.make_node('Gather', ['D', 'B'], ['C'], name='gather') + B_init = helper.make_tensor("B", TensorProto.INT32, [10], b_value.reshape(10).tolist()) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 100, 4]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [100, 4]) + squeeze = onnx.helper.make_node("Squeeze", ["A"], ["D"], name="squeeze") + B = helper.make_tensor_value_info("B", TensorProto.INT32, [10]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [10, 4]) + node = onnx.helper.make_node("Gather", ["D", "B"], ["C"], name="gather") e_value = np.random.randint(2, size=(10)).astype(np.float32) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [10, 1], e_value.reshape(10).tolist()) - F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [10, 4]) - add = onnx.helper.make_node('Add', ['C', 'E'], ['F'], name='add') - graph = helper.make_graph([squeeze, node, add], 'test_graph_1', [A], [F], [B_init, E_init]) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + E_init = helper.make_tensor("E", TensorProto.FLOAT, [10, 1], e_value.reshape(10).tolist()) + F = helper.make_tensor_value_info("F", TensorProto.FLOAT, [10, 4]) + add = onnx.helper.make_node("Add", ["C", "E"], ["F"], name="add") + graph = helper.make_graph([squeeze, node, add], "test_graph_1", [A], [F], [B_init, E_init]) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model + def build_rename_model(): input_shape = [1, 1, 200] - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, input_shape) w_shape = [2, 400, 200] - w_weights = np.random.random_sample(w_shape).astype(dtype='float32') - w_init = onnx.numpy_helper.from_array(w_weights, name='w') + w_weights = np.random.random_sample(w_shape).astype(dtype="float32") + w_init = onnx.numpy_helper.from_array(w_weights, name="w") r_shape = [2, 400, 100] - r_weights = np.random.random_sample(r_shape).astype(dtype='float32') - r_init = onnx.numpy_helper.from_array(r_weights, name='r') + r_weights = np.random.random_sample(r_shape).astype(dtype="float32") + r_init = onnx.numpy_helper.from_array(r_weights, name="r") b_shape = [2, 800] - b_weights = np.random.random_sample(b_shape).astype(dtype='float32') - b_init = onnx.numpy_helper.from_array(b_weights, name='b') + b_weights = np.random.random_sample(b_shape).astype(dtype="float32") + b_init = onnx.numpy_helper.from_array(b_weights, name="b") kwargs = {} - kwargs['direction'] = "bidirectional" - kwargs['activations'] = ["Sigmoid", "Tanh", "Tanh", "Sigmoid", "Tanh", "Tanh"] - kwargs['hidden_size'] = 100 - kwargs['input_forget'] = 0 - lstm_node = helper.make_node('LSTM', ['input', 'w', 'r', 'b'], ['out'], name='lstm', **kwargs) + kwargs["direction"] = "bidirectional" + kwargs["activations"] = ["Sigmoid", "Tanh", "Tanh", "Sigmoid", "Tanh", "Tanh"] + kwargs["hidden_size"] = 100 + kwargs["input_forget"] = 0 + lstm_node = helper.make_node("LSTM", ["input", "w", "r", "b"], ["out"], name="lstm", **kwargs) b_value = np.random.randint(2, size=(1)).astype(np.int32) - B_init = helper.make_tensor('B', TensorProto.INT32, [1], b_value.reshape(1).tolist()) - squeeze = onnx.helper.make_node('Squeeze', ['out'], ['D'], name='') - B = helper.make_tensor_value_info('B', TensorProto.INT32, [1]) - node = onnx.helper.make_node('Gather', ['D', 'B'], ['C'], name='') + B_init = helper.make_tensor("B", TensorProto.INT32, [1], b_value.reshape(1).tolist()) + squeeze = onnx.helper.make_node("Squeeze", ["out"], ["D"], name="") + B = helper.make_tensor_value_info("B", TensorProto.INT32, [1]) + node = onnx.helper.make_node("Gather", ["D", "B"], ["C"], name="") e_value = np.random.randint(2, size=(100)).astype(np.float32) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [1, 1, 100], e_value.reshape(100).tolist()) - F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [1, 1, 100]) - add = onnx.helper.make_node('Add', ['C', 'E'], ['F'], name='') - graph = helper.make_graph([lstm_node, squeeze, node, add], 'test_graph_1', [input_tensor], [F], - [B_init, E_init, w_init, r_init, b_init]) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + E_init = helper.make_tensor("E", TensorProto.FLOAT, [1, 1, 100], e_value.reshape(100).tolist()) + F = helper.make_tensor_value_info("F", TensorProto.FLOAT, [1, 1, 100]) + add = onnx.helper.make_node("Add", ["C", "E"], ["F"], name="") + graph = helper.make_graph( + [lstm_node, squeeze, node, add], "test_graph_1", [input_tensor], [F], [B_init, E_init, w_init, r_init, b_init] + ) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model + def build_conv_model(): initializers = [] - input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 3, 224, 224]) + input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 224, 224]) conv1_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name='conv1_weight') - conv1_node = helper.make_node('Conv', ['input', 'conv1_weight'], ['conv1_output'], name='conv1') + np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name="conv1_weight" + ) + conv1_node = helper.make_node("Conv", ["input", "conv1_weight"], ["conv1_output"], name="conv1") conv2_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name='conv2_weight') - conv2_node = helper.make_node('Conv', ['conv1_output', 'conv2_weight'], ['conv2_output'], name='conv2') + np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name="conv2_weight" + ) + conv2_node = helper.make_node("Conv", ["conv1_output", "conv2_weight"], ["conv2_output"], name="conv2") conv3_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name='conv3_weight') - conv3_node = helper.make_node('Conv', ['input', 'conv3_weight'], ['conv3_output'], name='conv3') + np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name="conv3_weight" + ) + conv3_node = helper.make_node("Conv", ["input", "conv3_weight"], ["conv3_output"], name="conv3") - avg_args = {'kernel_shape': [3, 3]} - avgpool_node = helper.make_node('AveragePool', ['conv3_output'], ['avg_output'], name='AveragePool', **avg_args) + avg_args = {"kernel_shape": [3, 3]} + avgpool_node = helper.make_node("AveragePool", ["conv3_output"], ["avg_output"], name="AveragePool", **avg_args) - concat_node = helper.make_node('Concat', ['avg_output', 'conv2_output'], - ['concat_output'], name='Concat', axis=1) - output = helper.make_tensor_value_info('concat_output', TensorProto.FLOAT, [1, 8, 220, 220]) + concat_node = helper.make_node("Concat", ["avg_output", "conv2_output"], ["concat_output"], name="Concat", axis=1) + output = helper.make_tensor_value_info("concat_output", TensorProto.FLOAT, [1, 8, 220, 220]) initializers = [conv1_weight_initializer, conv2_weight_initializer, conv3_weight_initializer] - graph = helper.make_graph([conv1_node, conv2_node, conv3_node, concat_node, avgpool_node], - 'test', [input], [output], initializer=initializers) + graph = helper.make_graph( + [conv1_node, conv2_node, conv3_node, concat_node, avgpool_node], + "test", + [input], + [output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) return model + def build_conv_model2(): - input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT, [1, 3, 1, 3]) - output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 3, 1, 3]) - - X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X1_weight') - X1_bias = generate_input_initializer([3], np.float32, 'X1_bias') - X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X3_weight') - X3_bias = generate_input_initializer([3],np.float32, 'X3_bias') - X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X5_weight') - X5_bias = generate_input_initializer([3],np.float32,'X5_bias') - - relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'], name='Relu1') - conv_node_1 = onnx.helper.make_node('Conv', ['X1', 'X1_weight', 'X1_bias'], ['X2'], name='Conv1') - relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'], name= 'Relu2') - conv_node_2 = onnx.helper.make_node('Conv', ['X3', 'X3_weight', 'X3_bias'], ['X4'], name='Conv2') - conv_node_3 = onnx.helper.make_node('Conv', ['X1', 'X5_weight', 'X5_bias'], ['X5'], name='Conv3') - add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'], name='Add') - - graph = helper.make_graph([relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node], 'test_graph_1', [input0], [output]) + input0 = helper.make_tensor_value_info("input0", TensorProto.FLOAT, [1, 3, 1, 3]) + output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 1, 3]) + + X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X1_weight") + X1_bias = generate_input_initializer([3], np.float32, "X1_bias") + X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X3_weight") + X3_bias = generate_input_initializer([3], np.float32, "X3_bias") + X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X5_weight") + X5_bias = generate_input_initializer([3], np.float32, "X5_bias") + + relu_node_1 = onnx.helper.make_node("Relu", ["input0"], ["X1"], name="Relu1") + conv_node_1 = onnx.helper.make_node("Conv", ["X1", "X1_weight", "X1_bias"], ["X2"], name="Conv1") + relu_node_2 = onnx.helper.make_node("Relu", ["X2"], ["X3"], name="Relu2") + conv_node_2 = onnx.helper.make_node("Conv", ["X3", "X3_weight", "X3_bias"], ["X4"], name="Conv2") + conv_node_3 = onnx.helper.make_node("Conv", ["X1", "X5_weight", "X5_bias"], ["X5"], name="Conv3") + add_node = onnx.helper.make_node("Add", ["X4", "X5"], ["output"], name="Add") + + graph = helper.make_graph( + [relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node], "test_graph_1", [input0], [output] + ) graph.initializer.add().CopyFrom(X1_weight) graph.initializer.add().CopyFrom(X1_bias) graph.initializer.add().CopyFrom(X3_weight) graph.initializer.add().CopyFrom(X3_bias) graph.initializer.add().CopyFrom(X5_weight) graph.initializer.add().CopyFrom(X5_bias) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model + def build_gemm_model(): initializers = [] input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [-1, 2048]) @@ -568,16 +600,18 @@ def build_gemm_model(): bias_data = np.random.normal(0, 0.1, [10]).astype(np.float32) initializers.append(onnx.numpy_helper.from_array(bias_data, name="bias")) output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [-1, 10]) - gemm = onnx.helper.make_node("Gemm", ["input", "weight", "bias"], - ["output"], alpha=1.0, beta=1.0, transB=1, name="gemm") + gemm = onnx.helper.make_node( + "Gemm", ["input", "weight", "bias"], ["output"], alpha=1.0, beta=1.0, transB=1, name="gemm" + ) - graph = helper.make_graph([gemm], 'test', [input_tensor], [output_tensor], initializer=initializers) + graph = helper.make_graph([gemm], "test", [input_tensor], [output_tensor], initializer=initializers) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 return model + def build_benchmark(): - seq = ''' + seq = """ from neural_compressor.experimental import Benchmark from neural_compressor.data import Datasets, DATALOADERS from neural_compressor import conf @@ -615,56 +649,63 @@ def reverse_matrix(x): evaluator.b_dataloader = ext_dataloader evaluator.model = model evaluator('performance') - ''' - with open('benchmark.py', "w", encoding="utf-8") as f: + """ + with open("benchmark.py", "w", encoding="utf-8") as f: f.writelines(seq) + class MatmulDataset: def __init__(self): self.data = [] self.label = [] for i in range(3): - self.data.append(np.random.randn(5,5).astype('float32')) - self.label.append(np.random.randn(5,1).astype('float32')) + self.data.append(np.random.randn(5, 5).astype("float32")) + self.label.append(np.random.randn(5, 1).astype("float32")) def __getitem__(self, idx): return self.data[idx], self.label[idx] def __len__(self): return len(self.data) - + + class DummyNLPDataloader(object): def __init__(self, model_name): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.sequence_a = "intel-extension-for-transformers is based in SH" self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH" - self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors='pt') - self.encoded_dict['labels'] = 1 + self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors="pt") + self.encoded_dict["labels"] = 1 self.batch_size = 1 + class DummyNLPDataloader_list(DummyNLPDataloader): def __init__(self, model_name): super().__init__(model_name) - + def __iter__(self): - yield [self.encoded_dict['input_ids'], self.encoded_dict['attention_mask']], self.encoded_dict['labels'] + yield [self.encoded_dict["input_ids"], self.encoded_dict["attention_mask"]], self.encoded_dict["labels"] + class DummyNLPDataloader_dict(DummyNLPDataloader): def __init__(self, model_name): super().__init__(model_name) - + def __iter__(self): - yield {k: v.numpy().tolist() for k, v in self.encoded_dict.items() if k != 'labels'}, self.encoded_dict['labels'] + yield {k: v.numpy().tolist() for k, v in self.encoded_dict.items() if k != "labels"}, self.encoded_dict[ + "labels" + ] + class DummyCVDataset(object): def __init__(self, shape): np.random.seed(9527) self.label = True self.shape = [shape] - self.low = [0.] - self.high = [1.] + self.low = [0.0] + self.high = [1.0] self.dataset = [] - + def __len__(self): return len(self.dataset) @@ -674,31 +715,33 @@ def __getitem__(self, index): return sample, 0 else: return sample - + + class DummyCVDataset_list(DummyCVDataset): def __init__(self, shape): super().__init__(shape) self.process() - + def process(self): for idx in range(0, len(self.shape)): tensor = np.random.uniform(low=self.low[idx], high=self.high[idx], size=self.shape[idx]) tensor = tensor.astype(np.float32) self.dataset.append(tensor) + class DummyCVDataset_dict(DummyCVDataset): def __init__(self, shape): super().__init__(shape) self.process() - + def process(self): for idx in range(0, len(self.shape)): tensor = np.random.uniform(low=self.low[idx], high=self.high[idx], size=self.shape[idx]) tensor = tensor.astype(np.float32) - self.dataset.append({'input': tensor}) + self.dataset.append({"input": tensor}) -class TestAdaptorONNXRT(unittest.TestCase): +class TestAdaptorONNXRT(unittest.TestCase): mb_v2_export_path = "mb_v2.onnx" mb_v2_model = torchvision.models.mobilenet_v2() rn50_export_path = "rn50.onnx" @@ -706,31 +749,31 @@ class TestAdaptorONNXRT(unittest.TestCase): model_name_or_path = "distilbert-base-uncased-finetuned-sst-2-english" distilbert_model = AutoModelForSequenceClassification.from_pretrained( - model_name_or_path, - config=AutoConfig.from_pretrained(model_name_or_path)) + model_name_or_path, config=AutoConfig.from_pretrained(model_name_or_path) + ) distilbert_export_path = "distilbert.onnx" - datasets = Datasets('onnxrt_qlinearops') - cv_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) - cv_dataloader = DATALOADERS['onnxrt_qlinearops'](cv_dataset) - - ir3_dataset = datasets['dummy'](shape=(10, 2048), low=0., high=1., label=True) - ir3_dataloader = DATALOADERS['onnxrt_qlinearops'](ir3_dataset) + datasets = Datasets("onnxrt_qlinearops") + cv_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) + cv_dataloader = DATALOADERS["onnxrt_qlinearops"](cv_dataset) + + ir3_dataset = datasets["dummy"](shape=(10, 2048), low=0.0, high=1.0, label=True) + ir3_dataloader = DATALOADERS["onnxrt_qlinearops"](ir3_dataset) - gather_dataset = Datasets('onnxrt_qlinearops')['dummy'](shape=(5, 100, 4), label=True) - gather_dataloader = DATALOADERS['onnxrt_qlinearops'](gather_dataset) + gather_dataset = Datasets("onnxrt_qlinearops")["dummy"](shape=(5, 100, 4), label=True) + gather_dataloader = DATALOADERS["onnxrt_qlinearops"](gather_dataset) - ext_dataset = datasets['dummy'](shape=(10, 2), low=0., high=1., label=True) - ext_dataloader = DATALOADERS['onnxrt_qlinearops'](ext_dataset) + ext_dataset = datasets["dummy"](shape=(10, 2), low=0.0, high=1.0, label=True) + ext_dataloader = DATALOADERS["onnxrt_qlinearops"](ext_dataset) - rename_dataset = Datasets('onnxrt_qlinearops')['dummy'](shape=(5, 1, 200), label=True) - rename_dataloader = DATALOADERS['onnxrt_qlinearops'](rename_dataset) + rename_dataset = Datasets("onnxrt_qlinearops")["dummy"](shape=(5, 1, 200), label=True) + rename_dataloader = DATALOADERS["onnxrt_qlinearops"](rename_dataset) matmul_dataset = MatmulDataset() - matmul_dataloader = DATALOADERS['onnxrt_qlinearops'](matmul_dataset) + matmul_dataloader = DATALOADERS["onnxrt_qlinearops"](matmul_dataset) - conv_dataset = Datasets('onnxrt_qlinearops')['dummy'](shape=(10, 3, 1, 3), label=True) - conv_dataloader = DATALOADERS['onnxrt_qlinearops'](conv_dataset) + conv_dataset = Datasets("onnxrt_qlinearops")["dummy"](shape=(10, 3, 1, 3), label=True) + conv_dataloader = DATALOADERS["onnxrt_qlinearops"](conv_dataset) @classmethod def setUpClass(self): @@ -745,7 +788,7 @@ def setUpClass(self): export_onnx_cv_model(self.mb_v2_model, self.mb_v2_export_path) self.mb_v2_model = onnx.load(self.mb_v2_export_path) export_onnx_cv_model(self.rn50_model, self.rn50_export_path, 12) - export_onnx_cv_model(self.rn50_model, 'rn50_9.onnx', 9) + export_onnx_cv_model(self.rn50_model, "rn50_9.onnx", 9) self.rn50_model = onnx.load(self.rn50_export_path) self.ir3_model = build_ir3_model() self.gather_model = build_model_with_gather() @@ -782,11 +825,13 @@ def tearDownClass(self): def test_ext_model(self): import sys - if sys.version_info < (3,10): - os.system("python benchmark.py" ) + + if sys.version_info < (3, 10): + os.system("python benchmark.py") def test_adaptor_register(self): from neural_compressor.adaptor.adaptor import adaptor_registry + def test(): @adaptor_registry class ONNXRT_QLinearOpsAdaptor: @@ -795,33 +840,36 @@ def quantize(self): def evaluate(self): pass + with self.assertRaises(ValueError): test() - @unittest.skipIf(Version(ort.__version__) == Version("1.13.1"), - "This function does not work with ONNX Runtime 1.13.1 for QDQ format quantization of ONNX models.") + @unittest.skipIf( + Version(ort.__version__) == Version("1.13.1"), + "This function does not work with ONNX Runtime 1.13.1 for QDQ format quantization of ONNX models.", + ) def test_inspect_tensor(self): - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "backend": "default", - "format": "default", - "domain": "auto", - "recipes": {}, - "workspace_path": './nc_workspace/{}/{}/'.format( - 'onnxrt', - 'imagenet')} + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "backend": "default", + "format": "default", + "domain": "auto", + "recipes": {}, + "workspace_path": "./nc_workspace/{}/{}/".format("onnxrt", "imagenet"), + } framework = "onnxrt_qlinearops" adaptor = FRAMEWORKS[framework](framework_specific_info) - adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, inspect_type='activation') - adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, inspect_type='activation', save_to_disk=True) - adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, inspect_type='weight') - adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, inspect_type='all') - adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, ["Conv_0"], inspect_type='activation') + adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, inspect_type="activation") + adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, inspect_type="activation", save_to_disk=True) + adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, inspect_type="weight") + adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, inspect_type="all") + adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, ["Conv_0"], inspect_type="activation") op_list = OrderedDict() op_list[("Conv_0", "Conv")] = None - adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, op_list.keys(), inspect_type='activation') + adaptor.inspect_tensor(self.rn50_model, self.cv_dataloader, op_list.keys(), inspect_type="activation") for fake_yaml in ["qlinear.yaml", "qdq.yaml"]: quantizer = Quantization(fake_yaml) @@ -834,102 +882,140 @@ def test_inspect_tensor(self): quantizer.strategy.adaptor.inspect_tensor adaptor._pre_optimize(common.Model(self.rn50_model)) opt_model = quantizer.strategy.adaptor.pre_optimized_model - - op_list, _ = quantizer.strategy.adaptor.diagnosis_helper(opt_model, q_model, None, './nc_workspace/recover/') + + op_list, _ = quantizer.strategy.adaptor.diagnosis_helper( + opt_model, q_model, None, "./nc_workspace/recover/" + ) fp32_tensor = quantizer.strategy.adaptor.inspect_tensor(opt_model.model, self.cv_dataloader, op_list) int8_tensor = quantizer.strategy.adaptor.inspect_tensor(q_model.model, self.cv_dataloader, op_list) - self.assertTrue(len(fp32_tensor['activation']) == len(int8_tensor['activation'])) - self.assertTrue(sorted(fp32_tensor['activation'][0].keys()) == sorted(int8_tensor['activation'][0].keys())) + self.assertTrue(len(fp32_tensor["activation"]) == len(int8_tensor["activation"])) + self.assertTrue(sorted(fp32_tensor["activation"][0].keys()) == sorted(int8_tensor["activation"][0].keys())) for op in op_list: - for x, y in zip(fp32_tensor['activation'][0][op].values(), int8_tensor['activation'][0][op].values()): + for x, y in zip(fp32_tensor["activation"][0][op].values(), int8_tensor["activation"][0][op].values()): self.assertTrue(x.shape == y.shape) if fake_yaml == "qlinear.yaml": - fp32_tensor = quantizer.strategy.adaptor.inspect_tensor(opt_model.model, self.cv_dataloader, op_list, inspect_type='weight') - int8_tensor = quantizer.strategy.adaptor.inspect_tensor(q_model.model, self.cv_dataloader, op_list, inspect_type='weight') - self.assertTrue(len(fp32_tensor['weight']) == len(int8_tensor['weight'])) - self.assertTrue(sorted(fp32_tensor['weight'].keys()) == sorted(int8_tensor['weight'].keys())) - ai_onnx_domain = [opset for opset in q_model.model.opset_import if not opset.domain or opset.domain == "ai.onnx"] + fp32_tensor = quantizer.strategy.adaptor.inspect_tensor( + opt_model.model, self.cv_dataloader, op_list, inspect_type="weight" + ) + int8_tensor = quantizer.strategy.adaptor.inspect_tensor( + q_model.model, self.cv_dataloader, op_list, inspect_type="weight" + ) + self.assertTrue(len(fp32_tensor["weight"]) == len(int8_tensor["weight"])) + self.assertTrue(sorted(fp32_tensor["weight"].keys()) == sorted(int8_tensor["weight"].keys())) + ai_onnx_domain = [ + opset for opset in q_model.model.opset_import if not opset.domain or opset.domain == "ai.onnx" + ] if ai_onnx_domain[0].version > 12 or Version(ort.__version__) < Version("1.12.0"): - for op in fp32_tensor['weight'].keys(): - self.assertTrue(sorted(fp32_tensor['weight'][op].keys()) == sorted(int8_tensor['weight'][op].keys())) - fp32_tensor = quantizer.strategy.adaptor.inspect_tensor(opt_model.model, self.cv_dataloader, op_list, inspect_type='all') - int8_tensor = quantizer.strategy.adaptor.inspect_tensor(q_model.model, self.cv_dataloader, op_list, inspect_type='all') - self.assertTrue(len(fp32_tensor['weight']) == len(int8_tensor['weight'])) - self.assertTrue(len(fp32_tensor['activation']) == len(int8_tensor['activation'])) - self.assertTrue(sorted(fp32_tensor['weight'].keys()) == sorted(int8_tensor['weight'].keys())) + for op in fp32_tensor["weight"].keys(): + self.assertTrue( + sorted(fp32_tensor["weight"][op].keys()) == sorted(int8_tensor["weight"][op].keys()) + ) + fp32_tensor = quantizer.strategy.adaptor.inspect_tensor( + opt_model.model, self.cv_dataloader, op_list, inspect_type="all" + ) + int8_tensor = quantizer.strategy.adaptor.inspect_tensor( + q_model.model, self.cv_dataloader, op_list, inspect_type="all" + ) + self.assertTrue(len(fp32_tensor["weight"]) == len(int8_tensor["weight"])) + self.assertTrue(len(fp32_tensor["activation"]) == len(int8_tensor["activation"])) + self.assertTrue(sorted(fp32_tensor["weight"].keys()) == sorted(int8_tensor["weight"].keys())) if ai_onnx_domain[0].version > 12 or Version(ort.__version__) < Version("1.12.0"): - for op in fp32_tensor['weight'].keys(): - self.assertTrue(sorted(fp32_tensor['weight'][op].keys()) == sorted(int8_tensor['weight'][op].keys())) - self.assertTrue(sorted(fp32_tensor['activation'][0].keys()) == sorted(int8_tensor['activation'][0].keys())) + for op in fp32_tensor["weight"].keys(): + self.assertTrue( + sorted(fp32_tensor["weight"][op].keys()) == sorted(int8_tensor["weight"][op].keys()) + ) + self.assertTrue( + sorted(fp32_tensor["activation"][0].keys()) == sorted(int8_tensor["activation"][0].keys()) + ) if ai_onnx_domain[0].version > 12 or Version(ort.__version__) < Version("1.12.0"): for op in op_list: - self.assertTrue(sorted(fp32_tensor['activation'][0][op].keys()) == sorted(int8_tensor['activation'][0][op].keys())) - - config = PostTrainingQuantConfig(approach='static', recipes={'gemm_to_matmul': False}) - q_model = quantization.fit(self.gemm_model, config, - calib_dataloader=self.ir3_dataloader) - - fp32_tensor = quantizer.strategy.adaptor.inspect_tensor(self.gemm_model, self.ir3_dataloader, ['gemm'], inspect_type='weight') - int8_tensor = quantizer.strategy.adaptor.inspect_tensor(q_model.model, self.ir3_dataloader, ['gemm'], inspect_type='weight') - self.assertTrue(len(fp32_tensor['weight']) == len(int8_tensor['weight'])) - self.assertTrue(sorted(fp32_tensor['weight'].keys()) == sorted(int8_tensor['weight'].keys())) - + self.assertTrue( + sorted(fp32_tensor["activation"][0][op].keys()) + == sorted(int8_tensor["activation"][0][op].keys()) + ) + + config = PostTrainingQuantConfig(approach="static", recipes={"gemm_to_matmul": False}) + q_model = quantization.fit(self.gemm_model, config, calib_dataloader=self.ir3_dataloader) + + fp32_tensor = quantizer.strategy.adaptor.inspect_tensor( + self.gemm_model, self.ir3_dataloader, ["gemm"], inspect_type="weight" + ) + int8_tensor = quantizer.strategy.adaptor.inspect_tensor( + q_model.model, self.ir3_dataloader, ["gemm"], inspect_type="weight" + ) + self.assertTrue(len(fp32_tensor["weight"]) == len(int8_tensor["weight"])) + self.assertTrue(sorted(fp32_tensor["weight"].keys()) == sorted(int8_tensor["weight"].keys())) + def test_set_tensor(self): - config = PostTrainingQuantConfig(approach='static', recipes={'gemm_to_matmul': False, 'graph_optimization_level': 'ENABLE_EXTENDED'}) - q_model = quantization.fit(self.mb_v2_model, config, - calib_dataloader=self.cv_dataloader) - - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "backend": "default", - "format": "default", - "domain": "auto", - "recipes": {}, - "workspace_path": './nc_workspace/{}/{}/'.format( - 'onnxrt', - 'imagenet')} + config = PostTrainingQuantConfig( + approach="static", recipes={"gemm_to_matmul": False, "graph_optimization_level": "ENABLE_EXTENDED"} + ) + q_model = quantization.fit(self.mb_v2_model, config, calib_dataloader=self.cv_dataloader) + + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "backend": "default", + "format": "default", + "domain": "auto", + "recipes": {}, + "workspace_path": "./nc_workspace/{}/{}/".format("onnxrt", "imagenet"), + } framework = "onnxrt_qlinearops" - adaptor = FRAMEWORKS[framework](framework_specific_info) - q_config = {q_model.nodes()[1].name.split('_quant')[0]: {'weight': {'granularity': 'per_channel', 'dtype': onnx_proto.TensorProto.INT8, 'scheme': 'sym'}}} + adaptor = FRAMEWORKS[framework](framework_specific_info) + q_config = { + q_model.nodes()[1].name.split("_quant")[0]: { + "weight": {"granularity": "per_channel", "dtype": onnx_proto.TensorProto.INT8, "scheme": "sym"} + } + } adaptor.quantize_config = q_config version = get_torch_version() - q_model.save('./best_model.onnx') - ai_onnx_domain = [opset for opset in q_model.model.opset_import if not opset.domain or opset.domain == "ai.onnx"] + q_model.save("./best_model.onnx") + ai_onnx_domain = [ + opset for opset in q_model.model.opset_import if not opset.domain or opset.domain == "ai.onnx" + ] if version >= Version("1.7.0-rc1"): if ai_onnx_domain[0].version > 12 or Version(ort.__version__) < Version("1.12.0"): - adaptor.set_tensor(onnx.load("best_model.onnx"), - {self.mb_v2_model.graph.node[0].input[1]: np.random.random([32, 3, 3, 3])}) - adaptor.set_tensor(q_model, - {self.mb_v2_model.graph.node[0].input[2]: np.random.random([32])}) + adaptor.set_tensor( + onnx.load("best_model.onnx"), + {self.mb_v2_model.graph.node[0].input[1]: np.random.random([32, 3, 3, 3])}, + ) + adaptor.set_tensor(q_model, {self.mb_v2_model.graph.node[0].input[2]: np.random.random([32])}) else: - adaptor.set_tensor(onnx.load("best_model.onnx"), - {self.mb_v2_model.graph.node[0].input[1]: np.random.random([32, 3, 3, 3])}) - adaptor.set_tensor(q_model, - {self.mb_v2_model.graph.node[0].input[2]: np.random.random(1)}) + adaptor.set_tensor( + onnx.load("best_model.onnx"), + {self.mb_v2_model.graph.node[0].input[1]: np.random.random([32, 3, 3, 3])}, + ) + adaptor.set_tensor(q_model, {self.mb_v2_model.graph.node[0].input[2]: np.random.random(1)}) else: if ai_onnx_domain[0].version > 12 or Version(ort.__version__) < Version("1.12.0"): - adaptor.set_tensor(onnx.load("best_model.onnx"), - {'ConvBnFusion_W_features.0.0.weight': np.random.random([32, 3, 3, 3])}) - adaptor.set_tensor(q_model, {'ConvBnFusion_BN_B_features.0.1.bias': np.random.random([32])}) + adaptor.set_tensor( + onnx.load("best_model.onnx"), + {"ConvBnFusion_W_features.0.0.weight": np.random.random([32, 3, 3, 3])}, + ) + adaptor.set_tensor(q_model, {"ConvBnFusion_BN_B_features.0.1.bias": np.random.random([32])}) else: - adaptor.set_tensor(onnx.load("best_model.onnx"), - {'ConvBnFusion_W_features.0.0.weight': np.random.random([32, 3, 3, 3])}) - adaptor.set_tensor(q_model, {'ConvBnFusion_BN_B_features.0.1.bias': np.random.random(1)}) + adaptor.set_tensor( + onnx.load("best_model.onnx"), + {"ConvBnFusion_W_features.0.0.weight": np.random.random([32, 3, 3, 3])}, + ) + adaptor.set_tensor(q_model, {"ConvBnFusion_BN_B_features.0.1.bias": np.random.random(1)}) def test_auto_quant(self): - conf.model.framework = 'onnxrt_qlinearops' - conf.quantization.approach = 'post_training_auto_quant' - conf.quantization.optype_wise ={"Add|MatMul|Conv": {'weight': {'algorithm': ['minmax']}, \ - 'activation': {'algorithm': ['minmax']}}} + conf.model.framework = "onnxrt_qlinearops" + conf.quantization.approach = "post_training_auto_quant" + conf.quantization.optype_wise = { + "Add|MatMul|Conv": {"weight": {"algorithm": ["minmax"]}, "activation": {"algorithm": ["minmax"]}} + } conf.quantization.calibration.sampling_size = 1 conf.tuning.exit_policy.timeout = 1000000 conf.tuning.exit_policy.max_trials = 8 - conf.evaluation.accuracy.metric = {'MSE': {'compare_label': False}} + conf.evaluation.accuracy.metric = {"MSE": {"compare_label": False}} quantizer = Quantization(conf) quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader @@ -937,7 +1023,7 @@ def test_auto_quant(self): q_model = quantizer.fit() self.assertNotEqual(q_model, None) - conf.model.framework = 'onnxrt_qdq' + conf.model.framework = "onnxrt_qdq" quantizer = Quantization(conf) quantizer.calib_dataloader = self.cv_dataloader quantizer.eval_dataloader = self.cv_dataloader @@ -946,62 +1032,70 @@ def test_auto_quant(self): self.assertNotEqual(q_model, None) def test_auto_quant_v2(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + tuning_criterion = TuningCriterion(max_trials=8, timeout=10000) accuracy_criterion = AccuracyCriterion(tolerable_loss=0.01) - conf = PostTrainingQuantConfig(quant_level=1, approach="auto", - op_type_dict={"Add|MatMul|Conv": {'weight': {'algorithm': ['minmax']},\ - 'activation': {'algorithm': ['minmax']}}}, - tuning_criterion=tuning_criterion, - accuracy_criterion=accuracy_criterion) + conf = PostTrainingQuantConfig( + quant_level=1, + approach="auto", + op_type_dict={ + "Add|MatMul|Conv": {"weight": {"algorithm": ["minmax"]}, "activation": {"algorithm": ["minmax"]}} + }, + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion, + ) conf.framework = "onnxrt_qlinearops" q_model = fit(model=self.rn50_model, conf=conf, calib_dataloader=self.cv_dataloader, eval_func=lambda model: 1) self.assertIsNotNone(q_model) - def test_quantize_data_per_channel(self): from neural_compressor.adaptor.ox_utils.util import quantize_data_per_channel + tensor_value = np.ones([2, 1]) qType = onnx_proto.TensorProto.INT8 scale_value = np.array([1, 1]) zo_value = np.array([0, 0]) - new_tensor_value = quantize_data_per_channel(tensor_value, 1, 254, qType, 'sym') + new_tensor_value = quantize_data_per_channel(tensor_value, 1, 254, qType, "sym") self.assertEqual(tensor_value.all(), new_tensor_value[-1].all()) def test_adaptor(self): from neural_compressor.utils.constant import FP32, INT8_SYM_MINMAX_PERTENSOR, UINT8_ASYM_MINMAX_PERTENSOR - conf.model.framework = 'onnxrt_qlinearops' - conf.quantization.approach = 'post_training_static_quant' + + conf.model.framework = "onnxrt_qlinearops" + conf.quantization.approach = "post_training_static_quant" conf.quantization.calibration.sampling_size = 1 - conf.quantization.optype_wise = {'Add': FP32} - conf.quantization.op_wise = {'add': {'weight': INT8_SYM_MINMAX_PERTENSOR, 'activation': UINT8_ASYM_MINMAX_PERTENSOR}} - conf.evaluation.accuracy.metric = {'MSE': {'compare_label': False}} + conf.quantization.optype_wise = {"Add": FP32} + conf.quantization.op_wise = { + "add": {"weight": INT8_SYM_MINMAX_PERTENSOR, "activation": UINT8_ASYM_MINMAX_PERTENSOR} + } + conf.evaluation.accuracy.metric = {"MSE": {"compare_label": False}} quantizer = Quantization(conf) quantizer.calib_dataloader = self.matmul_dataloader quantizer.eval_dataloader = self.matmul_dataloader quantizer.model = self.matmul_model q_model = quantizer.fit() - self.assertTrue('add2' in [i.name for i in q_model.nodes()]) - self.assertTrue('add_quant' in [i.name for i in q_model.nodes()]) + self.assertTrue("add2" in [i.name for i in q_model.nodes()]) + self.assertTrue("add_quant" in [i.name for i in q_model.nodes()]) - conf.quantization.pop('op_wise') - conf.quantization.model_wise = {'weight': INT8_SYM_MINMAX_PERTENSOR} - conf.quantization.optype_wise = {'MatMul': {'weight': {'granularity': ['per_channel']}}} + conf.quantization.pop("op_wise") + conf.quantization.model_wise = {"weight": INT8_SYM_MINMAX_PERTENSOR} + conf.quantization.optype_wise = {"MatMul": {"weight": {"granularity": ["per_channel"]}}} quantizer = Quantization(conf) quantizer.calib_dataloader = self.matmul_dataloader quantizer.eval_dataloader = self.matmul_dataloader quantizer.model = self.matmul_model q_model = quantizer.fit() - self.assertEqual(len([i for i in q_model.initializer() if i.name == 'B_scale'][0].float_data), 2) - - conf.quantization.pop('optype_wise') - conf.quantization.pop('model_wise') + self.assertEqual(len([i for i in q_model.initializer() if i.name == "B_scale"][0].float_data), 2) - conf.model.framework = 'onnxrt_integerops' - conf.quantization.approach = 'post_training_dynamic_quant' + conf.quantization.pop("optype_wise") + conf.quantization.pop("model_wise") + + conf.model.framework = "onnxrt_integerops" + conf.quantization.approach = "post_training_dynamic_quant" conf.quantization.calibration.sampling_size = 1 - conf.evaluation.accuracy.metric = {'MSE': {'compare_label': False}} + conf.evaluation.accuracy.metric = {"MSE": {"compare_label": False}} quantizer = Quantization(conf) quantizer.calib_dataloader = self.rename_dataloader quantizer.eval_dataloader = self.rename_dataloader @@ -1009,15 +1103,15 @@ def test_adaptor(self): q_model = quantizer.fit() self.assertNotEqual(q_model, None) - conf.model.framework = 'onnxrt_integerops' - conf.quantization.approach = 'post_training_dynamic_quant' + conf.model.framework = "onnxrt_integerops" + conf.quantization.approach = "post_training_dynamic_quant" conf.quantization.calibration.sampling_size = 1 - conf.evaluation.accuracy.metric = {'MSE': {'compare_label': False}} + conf.evaluation.accuracy.metric = {"MSE": {"compare_label": False}} quantizer = Quantization(conf) quantizer.calib_dataloader = self.rename_dataloader quantizer.eval_dataloader = self.rename_dataloader - onnx.save(self.rename_model, 'rename_model.onnx') - quantizer.model = 'rename_model.onnx' + onnx.save(self.rename_model, "rename_model.onnx") + quantizer.model = "rename_model.onnx" # force set the model to large model quantizer.model._is_large_model = True q_model = quantizer.fit() @@ -1031,57 +1125,72 @@ def test_adaptor(self): self.assertNotEqual(q_model, None) import copy + tmp_model = copy.deepcopy(self.rn50_model) tmp_model.opset_import[0].version = 10 quantizer.model = tmp_model q_model = quantizer.fit() self.assertNotEqual(q_model, None) - tmp_model.opset_import.extend([onnx.helper.make_opsetid("", 11)]) + tmp_model.opset_import.extend([onnx.helper.make_opsetid("", 11)]) quantizer.model = tmp_model q_model = quantizer.fit() self.assertEqual(q_model, None) - model = onnx.load('rn50_9.onnx') + model = onnx.load("rn50_9.onnx") quantizer.model = model q_model = quantizer.fit() self.assertNotEqual(q_model, None) - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "backend": "default", - "format": "default", - "domain": "auto", - "recipes": {}, - "workspace_path": './nc_workspace/{}/{}/'.format( - 'onnxrt', - 'imagenet')} + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "backend": "default", + "format": "default", + "domain": "auto", + "recipes": {}, + "workspace_path": "./nc_workspace/{}/{}/".format("onnxrt", "imagenet"), + } framework = "onnxrt_qlinearops" - adaptor = FRAMEWORKS[framework](framework_specific_info) - tune_cfg = {'calib_iteration': 1, - 'op': {('gather', 'Gather'): {'activation': {'dtype': ['uint8'], 'quant_mode': 'static'}, - 'weight': {'dtype': ['uint8']}}, - ('add', 'Add'): {'activation': {'dtype': ['uint8'], 'quant_mode': 'static'}, - 'weight': {'dtype': ['int8']}}, - ('squeeze', 'Squeeze'): {'activation': {'dtype': ['uint8'], 'quant_mode': 'static'}, - 'weight': {'dtype': ['int8']}}}} + adaptor = FRAMEWORKS[framework](framework_specific_info) + tune_cfg = { + "calib_iteration": 1, + "op": { + ("gather", "Gather"): { + "activation": {"dtype": ["uint8"], "quant_mode": "static"}, + "weight": {"dtype": ["uint8"]}, + }, + ("add", "Add"): { + "activation": {"dtype": ["uint8"], "quant_mode": "static"}, + "weight": {"dtype": ["int8"]}, + }, + ("squeeze", "Squeeze"): { + "activation": {"dtype": ["uint8"], "quant_mode": "static"}, + "weight": {"dtype": ["int8"]}, + }, + }, + } adaptor.quantize(tune_cfg, common.Model(self.gather_model), self.gather_dataloader) self.assertTrue(len(adaptor.quantizable_ops), 2) - - framework_specific_info['device'] = 'gpu' - framework_specific_info['backend'] = 'onnxrt_cuda_ep' - - tune_cfg = {'calib_iteration': 1, - 'op': {('Matmul', 'MatMul'): {'activation': {'dtype': ['uint8'], 'quant_mode': 'static'}, - 'weight': {'dtype': ['int8']}}, - ('add', 'Add'): {'activation': {'dtype': 'fp16', 'quant_mode': 'static'}, - 'weight': {'dtype': 'fp16'}}, - ('add2', 'Add'): {'activation': {'dtype': 'fp16', 'quant_mode': 'static'}, - 'weight': {'dtype': 'fp16'}}}} - adaptor = FRAMEWORKS[framework](framework_specific_info) + + framework_specific_info["device"] = "gpu" + framework_specific_info["backend"] = "onnxrt_cuda_ep" + + tune_cfg = { + "calib_iteration": 1, + "op": { + ("Matmul", "MatMul"): { + "activation": {"dtype": ["uint8"], "quant_mode": "static"}, + "weight": {"dtype": ["int8"]}, + }, + ("add", "Add"): {"activation": {"dtype": "fp16", "quant_mode": "static"}, "weight": {"dtype": "fp16"}}, + ("add2", "Add"): {"activation": {"dtype": "fp16", "quant_mode": "static"}, "weight": {"dtype": "fp16"}}, + }, + } + adaptor = FRAMEWORKS[framework](framework_specific_info) model = adaptor.quantize(tune_cfg, common.Model(self.matmul_model), self.matmul_dataloader) - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Cast']), 2) - + self.assertEqual(len([i for i in model.model.graph.node if i.op_type == "Cast"]), 2) + for fake_yaml in ["gather.yaml"]: quantizer = Quantization(fake_yaml) quantizer.model = self.gather_model @@ -1089,11 +1198,11 @@ def test_adaptor(self): self.assertNotEqual(q_model, None) quantizer.model = self.matmul_model2 - q_model = quantizer.fit() # error input shape test + q_model = quantizer.fit() # error input shape test self.assertEqual(q_model, None) quantizer.eval_dataloader = self.matmul_dataloader - q_model = quantizer.fit() # error input shape test + q_model = quantizer.fit() # error input shape test self.assertEqual(q_model, None) quantizer.calib_dataloader = self.matmul_dataloader @@ -1102,14 +1211,14 @@ def test_adaptor(self): q_model = quantizer.fit() self.assertNotEqual(q_model, None) - quantizer = Quantization('recipe.yaml') + quantizer = Quantization("recipe.yaml") quantizer.model = self.matmul_model quantizer.calib_dataloader = self.matmul_dataloader quantizer.eval_dataloader = self.matmul_dataloader q_model = quantizer.fit() - self.assertTrue('Matmul' in [i.name for i in q_model.nodes()]) + self.assertTrue("Matmul" in [i.name for i in q_model.nodes()]) - quantizer = Quantization('recipe2.yaml') + quantizer = Quantization("recipe2.yaml") quantizer.model = self.conv_model2 quantizer.calib_dataloader = self.conv_dataloader quantizer.eval_dataloader = self.conv_dataloader @@ -1125,7 +1234,8 @@ def test_adaptor(self): self.assertNotEqual(q_model, None) from neural_compressor.utils.utility import recover - model = recover(self.mb_v2_model, './nc_workspace/recover/history.snapshot', 0) + + model = recover(self.mb_v2_model, "./nc_workspace/recover/history.snapshot", 0) self.assertTrue(model.model == q_model.model) for mode in ["accuracy"]: @@ -1136,40 +1246,41 @@ def test_adaptor(self): evaluator(mode) def test_qdq_settings(self): - config = PostTrainingQuantConfig(approach='static', quant_format='QDQ', - recipes={'add_qdq_pair_to_weight': True}) - q_model = quantization.fit(self.ir3_model, config, - calib_dataloader=self.ir3_dataloader) + config = PostTrainingQuantConfig( + approach="static", quant_format="QDQ", recipes={"add_qdq_pair_to_weight": True} + ) + q_model = quantization.fit(self.ir3_model, config, calib_dataloader=self.ir3_dataloader) self.assertNotEqual(q_model, None) - q_model = quantization.fit(self.matmul_model, config, - calib_dataloader=self.matmul_dataloader) + q_model = quantization.fit(self.matmul_model, config, calib_dataloader=self.matmul_dataloader) self.assertNotEqual(q_model, None) - config = PostTrainingQuantConfig(approach='static', quant_format='QDQ', - recipes={'dedicated_qdq_pair': True}) - q_model = quantization.fit(self.conv_model, config, - calib_dataloader=self.cv_dataloader) + config = PostTrainingQuantConfig(approach="static", quant_format="QDQ", recipes={"dedicated_qdq_pair": True}) + q_model = quantization.fit(self.conv_model, config, calib_dataloader=self.cv_dataloader) self.assertNotEqual(q_model, None) - config = PostTrainingQuantConfig(approach='static', quant_format='QDQ', - recipes={'optypes_to_exclude_output_quant': ['Conv']}) - q_model = quantization.fit(self.rn50_model, config, - calib_dataloader=self.cv_dataloader) + config = PostTrainingQuantConfig( + approach="static", quant_format="QDQ", recipes={"optypes_to_exclude_output_quant": ["Conv"]} + ) + q_model = quantization.fit(self.rn50_model, config, calib_dataloader=self.cv_dataloader) self.assertNotEqual(q_model, None) def test_lower_is_better_case(self): import time - conf.model.framework = 'onnxrt_qlinearops' - conf.quantization.approach = 'post_training_static_quant' - conf.quantization.model_wise = {'weight': {'granularity': ['per_tensor']}, 'activation': {'granularity': ['per_tensor']}} + + conf.model.framework = "onnxrt_qlinearops" + conf.quantization.approach = "post_training_static_quant" + conf.quantization.model_wise = { + "weight": {"granularity": ["per_tensor"]}, + "activation": {"granularity": ["per_tensor"]}, + } conf.tuning.exit_policy.max_trials = 5 conf.tuning.accuracy_criterion.relative = 0.01 conf.tuning.accuracy_criterion.higher_is_better = False conf.tuning.exit_policy.timeout = 100 - - result = [0., 0.1, 0.1005, 0.102, 0.1002, 0.102, 0.102] + result = [0.0, 0.1, 0.1005, 0.102, 0.1002, 0.102, 0.102] + def sub_eval(model, result): time.sleep(0.001 * len(result)) del result[0] @@ -1179,6 +1290,7 @@ def eval(model): return sub_eval(model, result) from neural_compressor.experimental import Quantization + quantizer = Quantization(conf) quantizer.model = self.matmul_model quantizer.calib_dataloader = self.matmul_dataloader @@ -1187,12 +1299,14 @@ def eval(model): node_names = [i.name for i in q_model.nodes()] # This assert it depends on the number of trials, disables it first. # self.assertTrue('Matmul_quant' in node_names) - # self.assertTrue('add' in node_names) - # self.assertTrue('add2' in node_names) - + # self.assertTrue('add' in node_names) + # self.assertTrue('add2' in node_names) + def test_new_API(self): import time + result = [0.1] + def sub_eval(model, result): time.sleep(0.001 * len(result)) return result[0] @@ -1200,100 +1314,96 @@ def sub_eval(model, result): def eval(model): return sub_eval(model, result) - dataset = Datasets("onnxrt_qdq")["dummy"]([(1,1,5,5), (1,1,5,1)]) + dataset = Datasets("onnxrt_qdq")["dummy"]([(1, 1, 5, 5), (1, 1, 5, 1)]) dataloader = DATALOADERS["onnxrt_qdq"](dataset) - config = PostTrainingQuantConfig(approach='static') - q_model = quantization.fit(self.matmul_model2, config, - calib_dataloader=dataloader, eval_func=eval) - self.assertEqual(len([i for i in q_model.nodes() if i.op_type == 'QLinearMatMul']), 2) - - config = PostTrainingQuantConfig(approach='static', quant_format='QDQ') - q_model = quantization.fit(self.matmul_model, config, - calib_dataloader=self.matmul_dataloader, eval_func=eval) - self.assertTrue('QLinearMatMul' not in [i.op_type for i in q_model.nodes()]) - - config = PostTrainingQuantConfig(approach='static') - q_model = quantization.fit(self.matmul_model, config, - calib_dataloader=self.matmul_dataloader, eval_func=eval) - self.assertTrue('QLinearMatMul' in [i.op_type for i in q_model.nodes()]) - - config = PostTrainingQuantConfig(approach='dynamic') - q_model = quantization.fit(self.matmul_model, config, - calib_dataloader=self.matmul_dataloader, eval_func=eval) - self.assertTrue('MatMulInteger' in [i.op_type for i in q_model.nodes()]) - - config = PostTrainingQuantConfig(approach='dynamic', quant_format='QDQ') - q_model = quantization.fit(self.matmul_model, config, - calib_dataloader=self.matmul_dataloader, eval_func=eval) - self.assertTrue('MatMulInteger' in [i.op_type for i in q_model.nodes()]) - - config = PostTrainingQuantConfig(approach='static', backend='onnxrt_trt_ep', device='gpu') - q_model = quantization.fit(self.matmul_model, config, - calib_dataloader=self.matmul_dataloader, eval_func=eval) - self.assertTrue('QLinearMatMul' not in [i.op_type for i in q_model.nodes()]) - - config = PostTrainingQuantConfig(approach='static', backend='onnxrt_cuda_ep', device='gpu', quant_level=1) - q_model = quantization.fit(self.distilbert_model, config, + config = PostTrainingQuantConfig(approach="static") + q_model = quantization.fit(self.matmul_model2, config, calib_dataloader=dataloader, eval_func=eval) + self.assertEqual(len([i for i in q_model.nodes() if i.op_type == "QLinearMatMul"]), 2) + + config = PostTrainingQuantConfig(approach="static", quant_format="QDQ") + q_model = quantization.fit(self.matmul_model, config, calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue("QLinearMatMul" not in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach="static") + q_model = quantization.fit(self.matmul_model, config, calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue("QLinearMatMul" in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach="dynamic") + q_model = quantization.fit(self.matmul_model, config, calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue("MatMulInteger" in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach="dynamic", quant_format="QDQ") + q_model = quantization.fit(self.matmul_model, config, calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue("MatMulInteger" in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach="static", backend="onnxrt_trt_ep", device="gpu") + q_model = quantization.fit(self.matmul_model, config, calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue("QLinearMatMul" not in [i.op_type for i in q_model.nodes()]) + + config = PostTrainingQuantConfig(approach="static", backend="onnxrt_cuda_ep", device="gpu", quant_level=1) + q_model = quantization.fit( + self.distilbert_model, + config, calib_dataloader=DummyNLPDataloader_dict("distilbert-base-uncased-finetuned-sst-2-english"), - eval_func=eval) - self.assertTrue('QLinearMatMul' in [i.op_type for i in q_model.nodes()]) + eval_func=eval, + ) + self.assertTrue("QLinearMatMul" in [i.op_type for i in q_model.nodes()]) - config = PostTrainingQuantConfig(approach='static', recipes={'optypes_to_exclude_output_quant': ['MatMul']}) - q_model = quantization.fit(self.matmul_model, config, - calib_dataloader=self.matmul_dataloader, eval_func=eval) - self.assertTrue('MatMulIntegerToFloat' in [i.op_type for i in q_model.nodes()]) + config = PostTrainingQuantConfig(approach="static", recipes={"optypes_to_exclude_output_quant": ["MatMul"]}) + q_model = quantization.fit(self.matmul_model, config, calib_dataloader=self.matmul_dataloader, eval_func=eval) + self.assertTrue("MatMulIntegerToFloat" in [i.op_type for i in q_model.nodes()]) - dataset = Datasets("onnxrt_qdq")["dummy"]((1,1), low=0., high=0., dtype='int64') + dataset = Datasets("onnxrt_qdq")["dummy"]((1, 1), low=0.0, high=0.0, dtype="int64") dataloader = DATALOADERS["onnxrt_qdq"](dataset) config = PostTrainingQuantConfig() - q_model = quantization.fit(self.gather_matmul_model, config, - calib_dataloader=dataloader, eval_func=eval) + q_model = quantization.fit(self.gather_matmul_model, config, calib_dataloader=dataloader, eval_func=eval) - config = PostTrainingQuantConfig(quant_format='QDQ') - q_model2 = quantization.fit(self.gather_matmul_model, config, - calib_dataloader=dataloader, eval_func=eval) + config = PostTrainingQuantConfig(quant_format="QDQ") + q_model2 = quantization.fit(self.gather_matmul_model, config, calib_dataloader=dataloader, eval_func=eval) - sess1 = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) - sess2 = ort.InferenceSession(q_model2.model.SerializeToString(), providers=['CPUExecutionProvider']) + sess1 = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) + sess2 = ort.InferenceSession(q_model2.model.SerializeToString(), providers=["CPUExecutionProvider"]) for data, _ in dataloader: - output1 = sess1.run(None, {'input0': data}) - output2 = sess2.run(None, {'input0': data}) + output1 = sess1.run(None, {"input0": data}) + output2 = sess2.run(None, {"input0": data}) self.assertAlmostEqual(output1[0][0], output2[0][0]) def test_smooth_quant(self): - config = PostTrainingQuantConfig(approach='static', recipes={'smooth_quant': True, \ - 'smooth_quant_args': {'alpha': 0.5}}) - q_model = quantization.fit(self.conv_model, config, - calib_dataloader=self.cv_dataloader) - self.assertEqual(len([i for i in q_model.nodes() if i.op_type == 'Mul']), 2) + config = PostTrainingQuantConfig( + approach="static", recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}} + ) + q_model = quantization.fit(self.conv_model, config, calib_dataloader=self.cv_dataloader) + self.assertEqual(len([i for i in q_model.nodes() if i.op_type == "Mul"]), 2) def test_smooth_quant_args(self): from neural_compressor.model.onnx_model import ONNXModel - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "backend": "default", - "format": "default", - "domain": "auto", - "recipes": {}, - "workspace_path": './nc_workspace/{}/{}/'.format( - 'onnxrt', - 'imagenet')} + + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "backend": "default", + "format": "default", + "domain": "auto", + "recipes": {}, + "workspace_path": "./nc_workspace/{}/{}/".format("onnxrt", "imagenet"), + } framework = "onnxrt_qlinearops" adaptor = FRAMEWORKS[framework](framework_specific_info) adaptor.pre_optimized_model = ONNXModel(self.conv_model) # tune_cfg was removed, not need to set it to None # adaptor.smooth_quant(self.conv_model, self.cv_dataloader, 1, None, scales_per_op=False) adaptor.smooth_quant(self.conv_model, self.cv_dataloader, 1, scales_per_op=False) - self.assertEqual(len([i for i in adaptor.pre_optimized_model.nodes() if i.op_type == 'Mul']), 1) - + self.assertEqual(len([i for i in adaptor.pre_optimized_model.nodes() if i.op_type == "Mul"]), 1) + def test_multi_metrics(self): - conf.model.framework = 'onnxrt_qlinearops' - conf.quantization.approach = 'post_training_static_quant' - conf.evaluation.accuracy.multi_metrics = {'Accuracy': {}, 'MSE': {'compare_label': False}} - conf.evaluation.accuracy.pop('metric', None) + conf.model.framework = "onnxrt_qlinearops" + conf.quantization.approach = "post_training_static_quant" + conf.evaluation.accuracy.multi_metrics = {"Accuracy": {}, "MSE": {"compare_label": False}} + conf.evaluation.accuracy.pop("metric", None) from neural_compressor.experimental import Quantization + quantizer = Quantization(conf) quantizer.eval_dataloader = self.cv_dataloader quantizer.calib_dataloader = self.cv_dataloader @@ -1302,9 +1412,13 @@ def test_multi_metrics(self): self.assertNotEqual(q_model, None) conf.evaluation.accuracy.multi_metrics = { - 'Accuracy': {}, 'MSE': {'compare_label': False}, 'higher_is_better': [False, False]} + "Accuracy": {}, + "MSE": {"compare_label": False}, + "higher_is_better": [False, False], + } conf.tuning.exit_policy.max_trials = 1 from neural_compressor.experimental import Quantization + quantizer = Quantization(conf) quantizer.eval_dataloader = self.cv_dataloader quantizer.calib_dataloader = self.cv_dataloader @@ -1314,9 +1428,9 @@ def test_multi_metrics(self): conf.tuning.accuracy_criterion.relative = 0.01 conf.tuning.accuracy_criterion.higher_is_better = True - conf.evaluation.accuracy.multi_metrics = { - 'Accuracy': {}, 'MSE': {'compare_label': False}, 'weight': [0.5, 0.5]} + conf.evaluation.accuracy.multi_metrics = {"Accuracy": {}, "MSE": {"compare_label": False}, "weight": [0.5, 0.5]} from neural_compressor.experimental import Quantization + quantizer = Quantization(conf) quantizer.eval_dataloader = self.cv_dataloader quantizer.calib_dataloader = self.cv_dataloader @@ -1325,9 +1439,13 @@ def test_multi_metrics(self): self.assertNotEqual(q_model, None) conf.evaluation.accuracy.multi_metrics = { - 'Accuracy': {}, 'MSE': {'compare_label': False}, 'weight': [0.5, 0.5], - 'higher_is_better': [False, False]} + "Accuracy": {}, + "MSE": {"compare_label": False}, + "weight": [0.5, 0.5], + "higher_is_better": [False, False], + } from neural_compressor.experimental import Quantization + quantizer = Quantization(conf) quantizer.eval_dataloader = self.cv_dataloader quantizer.calib_dataloader = self.cv_dataloader @@ -1336,11 +1454,15 @@ def test_multi_metrics(self): self.assertNotEqual(q_model, None) conf.evaluation.accuracy.multi_metrics = { - 'Accuracy': {}, 'MSE': {'compare_label': False}, 'weight': [0.5, 0.5], - 'higher_is_better': [False, False]} + "Accuracy": {}, + "MSE": {"compare_label": False}, + "weight": [0.5, 0.5], + "higher_is_better": [False, False], + } conf.tuning.accuracy_criterion.higher_is_better = False conf.tuning.exit_policy.max_trials = 2 from neural_compressor.experimental import Quantization + quantizer = Quantization(conf) quantizer.eval_dataloader = self.cv_dataloader quantizer.calib_dataloader = self.cv_dataloader @@ -1349,7 +1471,9 @@ def test_multi_metrics(self): self.assertEqual(q_model, None) import time - result = [[0., 0.], [0., 0.], [0., 122.]] + + result = [[0.0, 0.0], [0.0, 0.0], [0.0, 122.0]] + def sub_eval(model, result): time.sleep(0.001 * len(result)) del result[0] @@ -1359,10 +1483,14 @@ def eval(model): return sub_eval(model, result) conf.evaluation.accuracy.multi_metrics = { - 'Accuracy': {}, 'MSE': {'compare_label': False}, 'higher_is_better': [False, False]} + "Accuracy": {}, + "MSE": {"compare_label": False}, + "higher_is_better": [False, False], + } conf.tuning.exit_policy.max_trials = 1 - conf.tuning.accuracy_criterion = {'absolute': 0.01, 'higher_is_better': False} + conf.tuning.accuracy_criterion = {"absolute": 0.01, "higher_is_better": False} from neural_compressor.experimental import Quantization + quantizer = Quantization(conf) quantizer.eval_func = eval quantizer.calib_dataloader = self.cv_dataloader @@ -1372,12 +1500,11 @@ def eval(model): def test_calibrator(self): from neural_compressor.adaptor.ox_utils.calibrator import CALIBRATOR - regular_data = [np.arange(15).reshape(3,5).astype('float32'), - np.arange(15).reshape(3,5).astype('float32')] - irregular_data = [np.arange(10).reshape(2,5).astype('float32'), - np.arange(5).reshape(1,5).astype('float32')] - - calibrator = CALIBRATOR['minmax']() + + regular_data = [np.arange(15).reshape(3, 5).astype("float32"), np.arange(15).reshape(3, 5).astype("float32")] + irregular_data = [np.arange(10).reshape(2, 5).astype("float32"), np.arange(5).reshape(1, 5).astype("float32")] + + calibrator = CALIBRATOR["minmax"]() calibrator.collect(irregular_data) res = calibrator.calib_range self.assertEqual(res[0], np.array(0.0).astype(np.float32)) @@ -1392,7 +1519,7 @@ def test_calibrator(self): self.assertIsNone(res[1]) del calibrator - calibrator = CALIBRATOR['kl']() + calibrator = CALIBRATOR["kl"]() calibrator.collect(irregular_data) res = calibrator.calib_range self.assertEqual(res[0], np.array(0.0).astype(np.float32)) @@ -1407,7 +1534,7 @@ def test_calibrator(self): self.assertIsNone(res[1]) del calibrator - calibrator = CALIBRATOR['percentile']() + calibrator = CALIBRATOR["percentile"]() calibrator.collect(irregular_data) res = calibrator.calib_range self.assertEqual(res[0], np.array(0.0).astype(np.float32)) @@ -1423,33 +1550,33 @@ def test_calibrator(self): del calibrator def test_query_block_info(self): - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "backend": "default", - "format": "default", - "domain": "auto", - "recipes": {}, - "workspace_path": './nc_workspace/{}/{}/'.format( - 'onnxrt', - 'imagenet')} + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "backend": "default", + "format": "default", + "domain": "auto", + "recipes": {}, + "workspace_path": "./nc_workspace/{}/{}/".format("onnxrt", "imagenet"), + } framework = "onnxrt_qlinearops" adaptor = FRAMEWORKS[framework](framework_specific_info) q_capability = adaptor.query_fw_capability(Model(self.distilbert_model)) - self.assertEqual(len(q_capability['block_wise']), 6) + self.assertEqual(len(q_capability["block_wise"]), 6) def test_dataloader_input(self): - cv_dataloader = DataLoader(framework='onnxruntime', dataset=DummyCVDataset_list(shape=(3, 224, 224))) - quantizer = Quantization('qlinear.yaml') + cv_dataloader = DataLoader(framework="onnxruntime", dataset=DummyCVDataset_list(shape=(3, 224, 224))) + quantizer = Quantization("qlinear.yaml") quantizer.calib_dataloader = cv_dataloader quantizer.eval_dataloader = cv_dataloader quantizer.model = self.rn50_model q_model = quantizer.fit() self.assertNotEqual(q_model, None) - cv_dataloader = DataLoader(framework='pytorch', dataset=DummyCVDataset_dict(shape=(3, 224, 224))) - quantizer = Quantization('qlinear.yaml') + cv_dataloader = DataLoader(framework="pytorch", dataset=DummyCVDataset_dict(shape=(3, 224, 224))) + quantizer = Quantization("qlinear.yaml") quantizer.calib_dataloader = cv_dataloader quantizer.eval_dataloader = cv_dataloader quantizer.model = self.rn50_model @@ -1457,37 +1584,44 @@ def test_dataloader_input(self): self.assertNotEqual(q_model, None) nlp_dataloader = DummyNLPDataloader_list("distilbert-base-uncased-finetuned-sst-2-english") - quantizer = Quantization('qlinear.yaml') + quantizer = Quantization("qlinear.yaml") quantizer.calib_dataloader = nlp_dataloader quantizer.eval_dataloader = nlp_dataloader quantizer.model = self.distilbert_model q_model = quantizer.fit() self.assertNotEqual(q_model, None) - + nlp_dataloader = DummyNLPDataloader_dict("distilbert-base-uncased-finetuned-sst-2-english") - quantizer = Quantization('qlinear.yaml') + quantizer = Quantization("qlinear.yaml") quantizer.calib_dataloader = nlp_dataloader quantizer.eval_dataloader = nlp_dataloader quantizer.model = self.distilbert_model q_model = quantizer.fit() self.assertNotEqual(q_model, None) - @patch('logging.Logger.warning') + @patch("logging.Logger.warning") def test_backend(self, mock_warning): - framework_specific_info = {"device": "cpu", - "backend": "test_backend", - "approach": "post_training_static_quant", - "workspace_path": './nc_workspace'} + framework_specific_info = { + "device": "cpu", + "backend": "test_backend", + "approach": "post_training_static_quant", + "workspace_path": "./nc_workspace", + } framework = "onnxrt_qlinearops" with self.assertRaises(AssertionError) as context: - adaptor = FRAMEWORKS[framework](framework_specific_info) - self.assertEqual(str(context.exception), "'test_backend' backend is not supported, "\ - "supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep', 'onnxrt_dml_ep']") - - framework_specific_info = {"device": "cpu", - "backend": "onnxrt_trt_ep", - "approach": "post_training_static_quant", - "workspace_path": './nc_workspace'} + adaptor = FRAMEWORKS[framework](framework_specific_info) + self.assertEqual( + str(context.exception), + "'test_backend' backend is not supported, " + "supported backends include ['default', 'onnxrt_trt_ep', 'onnxrt_dnnl_ep', 'onnxrt_cuda_ep', 'onnxrt_dml_ep']", + ) + + framework_specific_info = { + "device": "cpu", + "backend": "onnxrt_trt_ep", + "approach": "post_training_static_quant", + "workspace_path": "./nc_workspace", + } framework = "onnxrt_qlinearops" adaptor = FRAMEWORKS[framework](framework_specific_info) diff --git a/test/adaptor/onnxrt_adaptor/test_onnxrt_augment.py b/test/adaptor/onnxrt_adaptor/test_onnxrt_augment.py index 5653fc6c0b7..e65a4dafdfb 100644 --- a/test/adaptor/onnxrt_adaptor/test_onnxrt_augment.py +++ b/test/adaptor/onnxrt_adaptor/test_onnxrt_augment.py @@ -2,81 +2,77 @@ import shutil import sys import unittest + import numpy as np import onnx -from onnx import helper, TensorProto, numpy_helper - +from onnx import TensorProto, helper, numpy_helper -sys.path.append('..') -from neural_compressor.experimental.data.datasets.dataset import Dataset +sys.path.append("..") from neural_compressor.adaptor.ox_utils.calibration import ONNXRTAugment +from neural_compressor.data import DATALOADERS, Datasets +from neural_compressor.experimental.data.datasets.dataset import Dataset from neural_compressor.model.onnx_model import ONNXModel -from neural_compressor.data import Datasets, DATALOADERS + def generate_input_initializer(tensor_shape, tensor_dtype, input_name): - ''' - Helper function to generate initializers for test inputs - ''' + """Helper function to generate initializers for test inputs.""" tensor = np.random.ranf(tensor_shape).astype(tensor_dtype) init = numpy_helper.from_array(tensor, input_name) - return init + return init + def create_cv_session(): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 3, 3]) b_value = np.random.randn(1, 1, 3, 3).astype(np.float32) - B_init = helper.make_tensor('B', TensorProto.FLOAT, [1, 1, 3, 3], - b_value.reshape(9).tolist()) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - relu_node = onnx.helper.make_node('Relu', ['C'], ['D'], name='relu') - graph = helper.make_graph([conv_node, relu_node], 'test_graph_1', [A, B], [D], [B_init]) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + B_init = helper.make_tensor("B", TensorProto.FLOAT, [1, 1, 3, 3], b_value.reshape(9).tolist()) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 5]) + conv_node = onnx.helper.make_node("Conv", ["A", "B"], ["C"], name="conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1]) + relu_node = onnx.helper.make_node("Relu", ["C"], ["D"], name="relu") + graph = helper.make_graph([conv_node, relu_node], "test_graph_1", [A, B], [D], [B_init]) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) dataset = TestDataset2() - dataloader = DATALOADERS['onnxrt_qlinearops'](dataset) + dataloader = DATALOADERS["onnxrt_qlinearops"](dataset) return model, dataloader + def create_nlp_session(): a_value = np.random.randn(100, 4).astype(np.float32) - A_init = helper.make_tensor('A', TensorProto.FLOAT, [100, 4], - a_value.reshape(400).tolist()) + A_init = helper.make_tensor("A", TensorProto.FLOAT, [100, 4], a_value.reshape(400).tolist()) b_value = np.random.randint(2, size=(10)).astype(np.int32) - B_init = helper.make_tensor('B', TensorProto.INT32, [10], - b_value.reshape(10).tolist()) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 100, 4]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [100, 4]) - squeeze = onnx.helper.make_node('Squeeze', ['A'], ['D'], name='squeeze') - B = helper.make_tensor_value_info('B', TensorProto.INT32, [10]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [10, 4]) - node = onnx.helper.make_node('Gather', ['D', 'B'], ['C'], name='gather') - graph = helper.make_graph([squeeze, node], 'test_graph_1', [A], [C], [B_init]) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) - datasets = Datasets('onnxrt_qlinearops') - dataset = datasets['dummy_v2'](input_shape=(100, 4), label_shape=(1,)) - - dataloader = DATALOADERS['onnxrt_qlinearops'](dataset) - return model, dataloader + B_init = helper.make_tensor("B", TensorProto.INT32, [10], b_value.reshape(10).tolist()) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 100, 4]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [100, 4]) + squeeze = onnx.helper.make_node("Squeeze", ["A"], ["D"], name="squeeze") + B = helper.make_tensor_value_info("B", TensorProto.INT32, [10]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [10, 4]) + node = onnx.helper.make_node("Gather", ["D", "B"], ["C"], name="gather") + graph = helper.make_graph([squeeze, node], "test_graph_1", [A], [C], [B_init]) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) + datasets = Datasets("onnxrt_qlinearops") + dataset = datasets["dummy_v2"](input_shape=(100, 4), label_shape=(1,)) + + dataloader = DATALOADERS["onnxrt_qlinearops"](dataset) + return model, dataloader + class TestDataset(Dataset): """Configuration for Imagenet dataset.""" def __init__(self): data_list = [] - data_list.append((np.array([[[[0.45,0.60,0.75]], - [[0.25,0.50,0.75]], - [[0.90,0.70,0.50]]]]).astype(np.float32), 0)) - data_list.append((np.array([[[[0.62,0.94,0.38]], - [[0.70,0.13,0.07]], - [[0.89,0.75,0.84]]]]).astype(np.float32), 0)) - data_list.append((np.array([[[[0.64,0.24,0.97]], - [[0.82,0.58,0.27]], - [[0.019,0.34,0.02]]]]).astype(np.float32), 0)) + data_list.append( + (np.array([[[[0.45, 0.60, 0.75]], [[0.25, 0.50, 0.75]], [[0.90, 0.70, 0.50]]]]).astype(np.float32), 0) + ) + data_list.append( + (np.array([[[[0.62, 0.94, 0.38]], [[0.70, 0.13, 0.07]], [[0.89, 0.75, 0.84]]]]).astype(np.float32), 0) + ) + data_list.append( + (np.array([[[[0.64, 0.24, 0.97]], [[0.82, 0.58, 0.27]], [[0.019, 0.34, 0.02]]]]).astype(np.float32), 0) + ) self.data_list = data_list - + def __len__(self): return len(self.data_list) @@ -84,14 +80,15 @@ def __getitem__(self, index): data = self.data_list[index] return data + class TestDataset2(Dataset): """Configuration for Imagenet dataset.""" def __init__(self): data_list = [] - data_list.append(np.random.random([1,5,5]).astype(np.float32)) - data_list.append(np.random.random([1,5,5]).astype(np.float32)) - data_list.append(np.random.random([1,5,5]).astype(np.float32)) + data_list.append(np.random.random([1, 5, 5]).astype(np.float32)) + data_list.append(np.random.random([1, 5, 5]).astype(np.float32)) + data_list.append(np.random.random([1, 5, 5]).astype(np.float32)) self.data_list = data_list def __len__(self): @@ -101,10 +98,10 @@ def __getitem__(self, index): data = self.data_list[index] return data, 0 + class TestAugment(unittest.TestCase): + work_space = "./onnxrt_calib_test" - work_space = './onnxrt_calib_test' - @classmethod def setUpClass(cls): os.makedirs(cls.work_space) @@ -117,11 +114,7 @@ def tearDownClass(cls): def test_dump_tensor(self): model, dataloader = self.cv_session - augment = ONNXRTAugment(ONNXModel(model), - dataloader, - [], - iterations=[0, 1], - white_nodes=["conv"]) + augment = ONNXRTAugment(ONNXModel(model), dataloader, [], iterations=[0, 1], white_nodes=["conv"]) map_dumped_tensors = augment.dump_tensor() assert "conv" in map_dumped_tensors["activation"][0] assert "A" in map_dumped_tensors["activation"][0]["conv"] @@ -129,65 +122,55 @@ def test_dump_tensor(self): assert "A" in map_dumped_tensors["activation"][1]["conv"] model, dataloader = self.cv_session - augment = ONNXRTAugment(ONNXModel(model), - dataloader, - [], - iterations=[0], - white_nodes=["conv", "relu"]) + augment = ONNXRTAugment(ONNXModel(model), dataloader, [], iterations=[0], white_nodes=["conv", "relu"]) map_dumped_tensors = augment.dump_tensor(weight=True) assert "conv" in map_dumped_tensors["activation"][0] assert "relu" in map_dumped_tensors["activation"][0] assert "conv" in map_dumped_tensors["weight"] model, dataloader = self.nlp_session - augment = ONNXRTAugment(ONNXModel(model), - dataloader, - [], - iterations=[0], - white_nodes=["gather"]) + augment = ONNXRTAugment(ONNXModel(model), dataloader, [], iterations=[0], white_nodes=["gather"]) map_dumped_tensors = augment.dump_tensor() assert "gather" in map_dumped_tensors["activation"][0] def test_dump_calibration(self): model, dataloader = self.cv_session - augment = ONNXRTAugment(ONNXModel(model), - dataloader, - ["Conv", "Relu"], - iterations=[0]) + augment = ONNXRTAugment(ONNXModel(model), dataloader, ["Conv", "Relu"], iterations=[0]) calib_params = augment.dump_calibration({}) assert "A" in calib_params and "B" in calib_params and "D" in calib_params and "C" in calib_params def test_augment_graph(self): + """TEST_CONFIG_1.""" - ''' TEST_CONFIG_1''' - - # Conv - # | + # Conv + # | # Clip - # | + # | # MatMul - - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) - E = helper.make_tensor_value_info('E', TensorProto.FLOAT, [1, 1, 5, 1]) - F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [1, 1, 5, 1]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], name='Conv', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) - clip_node = onnx.helper.make_node('Clip', ['C'], ['D'], name='Clip') - matmul_node = onnx.helper.make_node('MatMul', ['D', 'E'], ['F'], name='MatMul') - graph = helper.make_graph([conv_node, clip_node, matmul_node], 'test_graph_1', [A, B, E], [F]) + + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 3, 3]) + E = helper.make_tensor_value_info("E", TensorProto.FLOAT, [1, 1, 5, 1]) + F = helper.make_tensor_value_info("F", TensorProto.FLOAT, [1, 1, 5, 1]) + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + clip_node = onnx.helper.make_node("Clip", ["C"], ["D"], name="Clip") + matmul_node = onnx.helper.make_node("MatMul", ["D", "E"], ["F"], name="MatMul") + graph = helper.make_graph([conv_node, clip_node, matmul_node], "test_graph_1", [A, B, E], [F]) model = helper.make_model(graph) # Augmenting graph data_reader = None - augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul']) + augment = ONNXRTAugment(ONNXModel(model), data_reader, ["Conv", "MatMul"]) augment.augment_graph() augmented_model = augment.augmented_model # Checking if output exists augmented_model_node_names = [node.name for node in augmented_model.graph.node] augmented_model_outputs = [output.name for output in augmented_model.graph.output] - added_node_names = ['Conv', 'Clip', 'MatMul'] - added_outputs = ['A', 'B', 'C', 'D', 'E', 'F'] + added_node_names = ["Conv", "Clip", "MatMul"] + added_outputs = ["A", "B", "C", "D", "E", "F"] # Original 3 nodes (exlude graph input/output) self.assertEqual(len(augmented_model_node_names), 3) # Original 1 graph output + 5 intermediate outputs @@ -197,35 +180,40 @@ def test_augment_graph(self): for output in added_outputs: self.assertTrue(output in augmented_model_outputs) - print('Finished TEST_CONFIG_1') - - - '''TEST_CONFIG_2''' + print("Finished TEST_CONFIG_1") + """TEST_CONFIG_2.""" # Conv - # | + # | # Conv - G = helper.make_tensor_value_info('G', TensorProto.FLOAT, [1, 1, 5, 5]) - H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 1, 3, 3]) - J = helper.make_tensor_value_info('J', TensorProto.FLOAT, [1, 1, 3, 3]) - K = helper.make_tensor_value_info('K', TensorProto.FLOAT, [1, 1, 5, 5]) - conv_node_1 = onnx.helper.make_node('Conv', ['G', 'H'], ['I'], name='Conv', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) - conv_node_2 = onnx.helper.make_node('Conv', ['I', 'J'], ['K'], name='Conv', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) - graph = helper.make_graph([conv_node_1, conv_node_2], 'test_graph_2', [G, H, J], [K]) + G = helper.make_tensor_value_info("G", TensorProto.FLOAT, [1, 1, 5, 5]) + H = helper.make_tensor_value_info("H", TensorProto.FLOAT, [1, 1, 3, 3]) + J = helper.make_tensor_value_info("J", TensorProto.FLOAT, [1, 1, 3, 3]) + K = helper.make_tensor_value_info("K", TensorProto.FLOAT, [1, 1, 5, 5]) + conv_node_1 = onnx.helper.make_node( + "Conv", ["G", "H"], ["I"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + conv_node_2 = onnx.helper.make_node( + "Conv", ["I", "J"], ["K"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + graph = helper.make_graph([conv_node_1, conv_node_2], "test_graph_2", [G, H, J], [K]) model = helper.make_model(graph) # Augmenting graph data_reader = None - augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul'], ) + augment = ONNXRTAugment( + ONNXModel(model), + data_reader, + ["Conv", "MatMul"], + ) augment.augment_graph() augmented_model = augment.augmented_model - augmented_model_node_names = [node.name for node in augmented_model.graph.node] augmented_model_outputs = [output.name for output in augmented_model.graph.output] - added_node_names = ['Conv', 'Conv'] - added_outputs = ['I', 'J', 'H', 'G', 'K'] + added_node_names = ["Conv", "Conv"] + added_outputs = ["I", "J", "H", "G", "K"] # Original 2 nodes self.assertEqual(len(augmented_model_node_names), 2) # Original 1 graph output + 4 intermediate outputs @@ -235,39 +223,39 @@ def test_augment_graph(self): for output in added_outputs: self.assertTrue(output in augmented_model_outputs) - print('Finished TEST_CONFIG_2') + print("Finished TEST_CONFIG_2") + """TEST_CONFIG_3.""" - - '''TEST_CONFIG_3''' - # Relu - # | - # Conv \ + # | + # Conv \ # | | # Clip | # | / # MatMul - L = helper.make_tensor_value_info('L', TensorProto.FLOAT, [1, 1, 5, 5]) - N = helper.make_tensor_value_info('N', TensorProto.FLOAT, [1, 1, 3, 3]) - Q = helper.make_tensor_value_info('Q', TensorProto.FLOAT, [1, 1, 5, 5]) - relu_node = onnx.helper.make_node('Relu', ['L'], ['M'], name='Relu') - conv_node = onnx.helper.make_node('Conv', ['M', 'N'], ['O'], name='Conv', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) - clip_node = onnx.helper.make_node('Clip', ['O'], ['P'], name='Clip') - matmul_node = onnx.helper.make_node('MatMul', ['P','M'], ['Q'], name='MatMul') - graph = helper.make_graph([relu_node, conv_node, clip_node, matmul_node], 'test_graph_3', [L, N], [Q]) + L = helper.make_tensor_value_info("L", TensorProto.FLOAT, [1, 1, 5, 5]) + N = helper.make_tensor_value_info("N", TensorProto.FLOAT, [1, 1, 3, 3]) + Q = helper.make_tensor_value_info("Q", TensorProto.FLOAT, [1, 1, 5, 5]) + relu_node = onnx.helper.make_node("Relu", ["L"], ["M"], name="Relu") + conv_node = onnx.helper.make_node( + "Conv", ["M", "N"], ["O"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + clip_node = onnx.helper.make_node("Clip", ["O"], ["P"], name="Clip") + matmul_node = onnx.helper.make_node("MatMul", ["P", "M"], ["Q"], name="MatMul") + graph = helper.make_graph([relu_node, conv_node, clip_node, matmul_node], "test_graph_3", [L, N], [Q]) model = helper.make_model(graph) # Augmenting graph data_reader = None - augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul']) + augment = ONNXRTAugment(ONNXModel(model), data_reader, ["Conv", "MatMul"]) augment.augment_graph() augmented_model = augment.augmented_model augmented_model_node_names = [node.name for node in augmented_model.graph.node] augmented_model_outputs = [output.name for output in augmented_model.graph.output] - added_node_names = ['Relu', 'Conv', 'Clip', 'MatMul'] - added_outputs = ['P', 'M', 'N', 'O', 'Q'] + added_node_names = ["Relu", "Conv", "Clip", "MatMul"] + added_outputs = ["P", "M", "N", "O", "Q"] # Original 4 nodes self.assertEqual(len(augmented_model_node_names), 4) # Original 1 graph output + 4 intermediate outputs @@ -276,36 +264,38 @@ def test_augment_graph(self): self.assertTrue(name in augmented_model_node_names) for output in added_outputs: self.assertTrue(output in augmented_model_outputs) - - print('Finished TEST_CONFIG_3') - - '''TEST_CONFIG_4''' + print("Finished TEST_CONFIG_3") + """TEST_CONFIG_4.""" # Attention - # | + # | # MatMul - Attention_weight = helper.make_tensor_value_info('Attention_weight', TensorProto.FLOAT, [13,7 ]) - Attention_bias = helper.make_tensor_value_info('Attention_bias', TensorProto.FLOAT, [13, 7]) - Attention_mask = helper.make_tensor_value_info('Attention_mask', TensorProto.INT32, [13, 7]) - S = helper.make_tensor_value_info('S', TensorProto.FLOAT, [13, 7]) - T = helper.make_tensor_value_info('T', TensorProto.FLOAT, [13, 7]) - attention_node = onnx.helper.make_node('Attention', ['Attention_weight', 'Attention_bias', 'Attention_mask'], ['R'], name='Attention') - matmul_node = onnx.helper.make_node('MatMul', ['R', 'S'], ['T'], name='MatMul') - graph = helper.make_graph([attention_node, matmul_node], 'test_graph_4', [Attention_weight, Attention_bias, Attention_mask, S], [T]) + Attention_weight = helper.make_tensor_value_info("Attention_weight", TensorProto.FLOAT, [13, 7]) + Attention_bias = helper.make_tensor_value_info("Attention_bias", TensorProto.FLOAT, [13, 7]) + Attention_mask = helper.make_tensor_value_info("Attention_mask", TensorProto.INT32, [13, 7]) + S = helper.make_tensor_value_info("S", TensorProto.FLOAT, [13, 7]) + T = helper.make_tensor_value_info("T", TensorProto.FLOAT, [13, 7]) + attention_node = onnx.helper.make_node( + "Attention", ["Attention_weight", "Attention_bias", "Attention_mask"], ["R"], name="Attention" + ) + matmul_node = onnx.helper.make_node("MatMul", ["R", "S"], ["T"], name="MatMul") + graph = helper.make_graph( + [attention_node, matmul_node], "test_graph_4", [Attention_weight, Attention_bias, Attention_mask, S], [T] + ) model = helper.make_model(graph) # Augmenting graph data_reader = None - augment = ONNXRTAugment(ONNXModel(model), data_reader, ['Conv', 'MatMul', 'Attention']) + augment = ONNXRTAugment(ONNXModel(model), data_reader, ["Conv", "MatMul", "Attention"]) augment.augment_graph() augmented_model = augment.augmented_model augmented_model_node_names = [node.name for node in augmented_model.graph.node] augmented_model_outputs = [output.name for output in augmented_model.graph.output] - added_node_names = ['Attention', 'MatMul'] - added_outputs = ['R', 'Attention_mask', 'S', 'T', 'Attention_bias', 'Attention_weight'] + added_node_names = ["Attention", "MatMul"] + added_outputs = ["R", "Attention_mask", "S", "T", "Attention_bias", "Attention_weight"] # Original 2 nodes self.assertEqual(len(augmented_model_node_names), 2) # Original 1 graph output + 5 intermediate outputs @@ -315,58 +305,70 @@ def test_augment_graph(self): for output in added_outputs: self.assertTrue(output in augmented_model_outputs) - print('Finished TEST_CONFIG_4') + print("Finished TEST_CONFIG_4") # QAttention # | # QuantizeLinear - - Attention_input = helper.make_tensor_value_info('input_quantized', TensorProto.INT8, [7, 13]) - Attention_weight = helper.make_tensor_value_info('weight_quantized', TensorProto.INT8, [13,7]) - weight_quantized = generate_input_initializer([13, 7], np.int8, 'weight_quantized') - Attention_bias = helper.make_tensor_value_info('bias', TensorProto.FLOAT, [13, 7]) - bias = generate_input_initializer([13, 7], np.float32, 'bias') - Input_scale = helper.make_tensor_value_info('input_scale', TensorProto.FLOAT, [1]) - input_scale = generate_input_initializer([1], np.float32, 'input_scale') - Weight_scale = helper.make_tensor_value_info('weight_scale', TensorProto.FLOAT, [1]) - weight_scale = generate_input_initializer([1], np.float32, 'weight_scale') - Attention_mask = helper.make_tensor_value_info('mask', TensorProto.INT32, [13, 7]) - mask = generate_input_initializer([13, 7], np.int32, 'mask') - Input_zo = helper.make_tensor_value_info('input_zero_point', TensorProto.INT8, [1]) - input_zero_point = generate_input_initializer([1], np.int8, 'input_zero_point') - Weight_zo = helper.make_tensor_value_info('weight_zero_point', TensorProto.INT8, [1]) - weight_zero_point = generate_input_initializer([1], np.int8, 'weight_zero_point') - Q_scale = helper.make_tensor_value_info('attn_output_scale', TensorProto.FLOAT, [1]) - attn_output_scale = generate_input_initializer([1], np.float32, 'attn_output_scale') - Q_zo = helper.make_tensor_value_info('attn_output_zero_point', TensorProto.INT8, [1]) - attn_output_zero_point = generate_input_initializer([1], np.int8, 'attn_output_zero_point') - Output = helper.make_tensor_value_info('output', TensorProto.INT8, [13,7]) - attention_node = onnx.helper.make_node('QAttention', ['input_quantized', - 'weight_quantized', - 'bias', - 'input_scale', - 'weight_scale', - 'mask', - 'input_zero_point', - 'weight_zero_point'], - ['attn_output'], name='attention_quant') - qlinear_node = onnx.helper.make_node('QuantizeLinear', - ['attn_output', 'attn_output_scale', 'attn_output_zero_point'], - ['attn_output_quantized'], - name='attn_output_QuantizeLinear') - graph = helper.make_graph([attention_node, qlinear_node], - 'test_graph_5', - [Attention_input, - Attention_weight, - Attention_bias, - Input_scale, - Weight_scale, - Attention_mask, - Input_zo, - Weight_zo, - Q_scale, - Q_zo], - [Output]) + + Attention_input = helper.make_tensor_value_info("input_quantized", TensorProto.INT8, [7, 13]) + Attention_weight = helper.make_tensor_value_info("weight_quantized", TensorProto.INT8, [13, 7]) + weight_quantized = generate_input_initializer([13, 7], np.int8, "weight_quantized") + Attention_bias = helper.make_tensor_value_info("bias", TensorProto.FLOAT, [13, 7]) + bias = generate_input_initializer([13, 7], np.float32, "bias") + Input_scale = helper.make_tensor_value_info("input_scale", TensorProto.FLOAT, [1]) + input_scale = generate_input_initializer([1], np.float32, "input_scale") + Weight_scale = helper.make_tensor_value_info("weight_scale", TensorProto.FLOAT, [1]) + weight_scale = generate_input_initializer([1], np.float32, "weight_scale") + Attention_mask = helper.make_tensor_value_info("mask", TensorProto.INT32, [13, 7]) + mask = generate_input_initializer([13, 7], np.int32, "mask") + Input_zo = helper.make_tensor_value_info("input_zero_point", TensorProto.INT8, [1]) + input_zero_point = generate_input_initializer([1], np.int8, "input_zero_point") + Weight_zo = helper.make_tensor_value_info("weight_zero_point", TensorProto.INT8, [1]) + weight_zero_point = generate_input_initializer([1], np.int8, "weight_zero_point") + Q_scale = helper.make_tensor_value_info("attn_output_scale", TensorProto.FLOAT, [1]) + attn_output_scale = generate_input_initializer([1], np.float32, "attn_output_scale") + Q_zo = helper.make_tensor_value_info("attn_output_zero_point", TensorProto.INT8, [1]) + attn_output_zero_point = generate_input_initializer([1], np.int8, "attn_output_zero_point") + Output = helper.make_tensor_value_info("output", TensorProto.INT8, [13, 7]) + attention_node = onnx.helper.make_node( + "QAttention", + [ + "input_quantized", + "weight_quantized", + "bias", + "input_scale", + "weight_scale", + "mask", + "input_zero_point", + "weight_zero_point", + ], + ["attn_output"], + name="attention_quant", + ) + qlinear_node = onnx.helper.make_node( + "QuantizeLinear", + ["attn_output", "attn_output_scale", "attn_output_zero_point"], + ["attn_output_quantized"], + name="attn_output_QuantizeLinear", + ) + graph = helper.make_graph( + [attention_node, qlinear_node], + "test_graph_5", + [ + Attention_input, + Attention_weight, + Attention_bias, + Input_scale, + Weight_scale, + Attention_mask, + Input_zo, + Weight_zo, + Q_scale, + Q_zo, + ], + [Output], + ) graph.initializer.add().CopyFrom(weight_quantized) graph.initializer.add().CopyFrom(bias) graph.initializer.add().CopyFrom(input_scale) @@ -375,13 +377,13 @@ def test_augment_graph(self): graph.initializer.add().CopyFrom(input_zero_point) graph.initializer.add().CopyFrom(weight_zero_point) graph.initializer.add().CopyFrom(attn_output_scale) - graph.initializer.add().CopyFrom(attn_output_zero_point) + graph.initializer.add().CopyFrom(attn_output_zero_point) model = helper.make_model(graph) # Augmenting graph data_reader = None - augment = ONNXRTAugment(ONNXModel(model), data_reader, [], white_nodes=['attention']) - augment.augment_nodes = ['DequantizeLinear'] + augment = ONNXRTAugment(ONNXModel(model), data_reader, [], white_nodes=["attention"]) + augment.augment_nodes = ["DequantizeLinear"] augment.already_quantized = True augment.augment_graph(activation_only=True, weight_only=False) @@ -389,8 +391,8 @@ def test_augment_graph(self): augmented_model_node_names = [node.name for node in augmented_model.graph.node] augmented_model_outputs = [output.name for output in augmented_model.graph.output] - added_node_names = ['attention_quant', 'attn_output_QuantizeLinear'] - added_outputs = ['input_quantized_output', 'output'] + added_node_names = ["attention_quant", "attn_output_QuantizeLinear"] + added_outputs = ["input_quantized_output", "output"] self.assertEqual(len(augmented_model_node_names), 3) self.assertEqual(len(augmented_model_outputs), 2) for name in added_node_names: @@ -398,35 +400,61 @@ def test_augment_graph(self): for output in added_outputs: self.assertTrue(output in augmented_model_outputs) - print('Finished TEST_CONFIG_5') + print("Finished TEST_CONFIG_5") # QuantizeLinear # | # QLinearConv # | # DequantizeLinear - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - A_scale = helper.make_tensor_value_info('A_scale', TensorProto.FLOAT, [1]) - a_scale = generate_input_initializer([1], np.float32, 'A_scale') - A_zo = helper.make_tensor_value_info('A_zero_point', TensorProto.INT8, [1]) - a_zero_point = generate_input_initializer([1], np.int8, 'A_zero_point') - C = helper.make_tensor_value_info('C', TensorProto.INT8, [1, 1, 5, 5]) - c = generate_input_initializer([1, 1, 5, 5], np.int8, 'C') - C_scale = helper.make_tensor_value_info('C_scale', TensorProto.FLOAT, [1]) - c_scale = generate_input_initializer([1], np.float32, 'C_scale') - C_zo = helper.make_tensor_value_info('C_zero_point', TensorProto.INT8, [1]) - c_zero_point = generate_input_initializer([1], np.int8, 'C_zero_point') - E = helper.make_tensor_value_info('E', TensorProto.INT32, [1]) - e = generate_input_initializer([1], np.int32, 'E') - D_scale = helper.make_tensor_value_info('D_scale', TensorProto.FLOAT, [1]) - d_scale = generate_input_initializer([1], np.float32, 'D_scale') - D_zo = helper.make_tensor_value_info('D_zero_point', TensorProto.INT8, [1]) - d_zero_point = generate_input_initializer([1], np.int8, 'D_zero_point') - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) - quantize_node = onnx.helper.make_node('QuantizeLinear', ['A', 'A_scale', 'A_zero_point'], ['A_quantized'], name='A_QuantizeLinear') - conv_node = onnx.helper.make_node('QLinearConv', ['A_quantized', 'A_scale', 'A_zero_point', 'C_quantized', 'C_scale', 'C_zero_point', 'D_scale', 'D_zero_point', 'E'], ['D_quantized'], name='conv_quant', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) - dequantize_node = onnx.helper.make_node('DequantizeLinear', ['D_quantized', 'D_scale', 'D_zero_point'], ['D'], name='D_DequantizeLinear') - graph = helper.make_graph([quantize_node, conv_node, dequantize_node], 'test_graph_5', [A, A_scale, A_zo, C, C_scale, C_zo, E, D_scale, D_zo], [D]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + A_scale = helper.make_tensor_value_info("A_scale", TensorProto.FLOAT, [1]) + a_scale = generate_input_initializer([1], np.float32, "A_scale") + A_zo = helper.make_tensor_value_info("A_zero_point", TensorProto.INT8, [1]) + a_zero_point = generate_input_initializer([1], np.int8, "A_zero_point") + C = helper.make_tensor_value_info("C", TensorProto.INT8, [1, 1, 5, 5]) + c = generate_input_initializer([1, 1, 5, 5], np.int8, "C") + C_scale = helper.make_tensor_value_info("C_scale", TensorProto.FLOAT, [1]) + c_scale = generate_input_initializer([1], np.float32, "C_scale") + C_zo = helper.make_tensor_value_info("C_zero_point", TensorProto.INT8, [1]) + c_zero_point = generate_input_initializer([1], np.int8, "C_zero_point") + E = helper.make_tensor_value_info("E", TensorProto.INT32, [1]) + e = generate_input_initializer([1], np.int32, "E") + D_scale = helper.make_tensor_value_info("D_scale", TensorProto.FLOAT, [1]) + d_scale = generate_input_initializer([1], np.float32, "D_scale") + D_zo = helper.make_tensor_value_info("D_zero_point", TensorProto.INT8, [1]) + d_zero_point = generate_input_initializer([1], np.int8, "D_zero_point") + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 5]) + quantize_node = onnx.helper.make_node( + "QuantizeLinear", ["A", "A_scale", "A_zero_point"], ["A_quantized"], name="A_QuantizeLinear" + ) + conv_node = onnx.helper.make_node( + "QLinearConv", + [ + "A_quantized", + "A_scale", + "A_zero_point", + "C_quantized", + "C_scale", + "C_zero_point", + "D_scale", + "D_zero_point", + "E", + ], + ["D_quantized"], + name="conv_quant", + kernel_shape=[3, 3], + pads=[1, 1, 1, 1], + ) + dequantize_node = onnx.helper.make_node( + "DequantizeLinear", ["D_quantized", "D_scale", "D_zero_point"], ["D"], name="D_DequantizeLinear" + ) + graph = helper.make_graph( + [quantize_node, conv_node, dequantize_node], + "test_graph_5", + [A, A_scale, A_zo, C, C_scale, C_zo, E, D_scale, D_zo], + [D], + ) graph.initializer.add().CopyFrom(a_scale) graph.initializer.add().CopyFrom(a_zero_point) graph.initializer.add().CopyFrom(c) @@ -439,7 +467,7 @@ def test_augment_graph(self): # Augmenting graph data_reader = None - augment = ONNXRTAugment(ONNXModel(model), data_reader, [], white_nodes=['conv']) + augment = ONNXRTAugment(ONNXModel(model), data_reader, [], white_nodes=["conv"]) augment.augment_nodes = ["DequantizeLinear"] augment.already_quantized = True augment.augment_graph(activation_only=True, weight_only=False) @@ -447,77 +475,82 @@ def test_augment_graph(self): augmented_model_node_names = [node.name for node in augmented_model.graph.node] augmented_model_outputs = [output.name for output in augmented_model.graph.output] - added_node_names = ['A_QuantizeLinear', 'conv_quant', 'D_DequantizeLinear', 'A_quantized_DequantizeLinear'] - added_outputs = ['D', 'A_quantized_output'] + added_node_names = ["A_QuantizeLinear", "conv_quant", "D_DequantizeLinear", "A_quantized_DequantizeLinear"] + added_outputs = ["D", "A_quantized_output"] self.assertEqual(len(augmented_model_node_names), 4) self.assertEqual(len(augmented_model_outputs), 2) for name in added_node_names: self.assertTrue(name in augmented_model_node_names) for output in added_outputs: self.assertTrue(output in augmented_model_outputs) - + def test_quant_param_calculation(self): - '''TEST_CONFIG_6''' - - # Relu - # | \ + """TEST_CONFIG_6.""" + + # Relu + # | \ # Conv \ - # | \ - # Relu | - # | Conv - # Conv / - # \ / + # | \ + # Relu | + # | Conv + # Conv / + # \ / # | # Add - - input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT, [1, 3, 1, 3]) - output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 3, 1, 3]) - - X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X1_weight') - X1_bias = generate_input_initializer([3], np.float32, 'X1_bias') - X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X3_weight') - X3_bias = generate_input_initializer([3],np.float32, 'X3_bias') - X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X5_weight') - X5_bias = generate_input_initializer([3],np.float32,'X5_bias') - - relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'], name='Relu1') - conv_node_1 = onnx.helper.make_node('Conv', ['X1', 'X1_weight', 'X1_bias'], ['X2'], name='Conv1') - relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'], name= 'Relu2') - conv_node_2 = onnx.helper.make_node('Conv', ['X3', 'X3_weight', 'X3_bias'], ['X4'], name='Conv2') - conv_node_3 = onnx.helper.make_node('Conv', ['X1', 'X5_weight', 'X5_bias'], ['X5'], name='Conv3') - add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'], name='Add') - - graph = helper.make_graph([relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node], 'test_graph_5', [input0], [output]) + + input0 = helper.make_tensor_value_info("input0", TensorProto.FLOAT, [1, 3, 1, 3]) + output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 1, 3]) + + X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X1_weight") + X1_bias = generate_input_initializer([3], np.float32, "X1_bias") + X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X3_weight") + X3_bias = generate_input_initializer([3], np.float32, "X3_bias") + X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X5_weight") + X5_bias = generate_input_initializer([3], np.float32, "X5_bias") + + relu_node_1 = onnx.helper.make_node("Relu", ["input0"], ["X1"], name="Relu1") + conv_node_1 = onnx.helper.make_node("Conv", ["X1", "X1_weight", "X1_bias"], ["X2"], name="Conv1") + relu_node_2 = onnx.helper.make_node("Relu", ["X2"], ["X3"], name="Relu2") + conv_node_2 = onnx.helper.make_node("Conv", ["X3", "X3_weight", "X3_bias"], ["X4"], name="Conv2") + conv_node_3 = onnx.helper.make_node("Conv", ["X1", "X5_weight", "X5_bias"], ["X5"], name="Conv3") + add_node = onnx.helper.make_node("Add", ["X4", "X5"], ["output"], name="Add") + + graph = helper.make_graph( + [relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node], + "test_graph_5", + [input0], + [output], + ) graph.initializer.add().CopyFrom(X1_weight) graph.initializer.add().CopyFrom(X1_bias) graph.initializer.add().CopyFrom(X3_weight) graph.initializer.add().CopyFrom(X3_bias) graph.initializer.add().CopyFrom(X5_weight) graph.initializer.add().CopyFrom(X5_bias) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) data_reader = TestDataset() - augment = ONNXRTAugment(ONNXModel(model), data_reader,['Conv', 'MatMul']) + augment = ONNXRTAugment(ONNXModel(model), data_reader, ["Conv", "MatMul"]) - #test calculation of quantization params - #TO_DO: check rmin/rmax + # test calculation of quantization params + # TO_DO: check rmin/rmax quantization_params_dict = augment.dump_calibration({}) node_output_names, output_dicts_list = augment.get_intermediate_outputs({}) dict_for_quantization = augment._map_calibration(node_output_names, output_dicts_list) - #check the size of the quantization dictionary + # check the size of the quantization dictionary self.assertEqual(len(quantization_params_dict), 12) - - #check the computation of zp and scale + + # check the computation of zp and scale for key, value in quantization_params_dict.items(): - self.assertTrue(value is not None) self.assertTrue(len(value) == 2) - + thresholds = dict_for_quantization[key] rmin = min(thresholds[0], 0) rmax = max(thresholds[1], 0) - if key == 'X2': #next_node is Relu - if rmin < 0: rmin = 0 - + if key == "X2": # next_node is Relu + if rmin < 0: + rmin = 0 + scale_expected = np.float32((rmax - rmin) / 255 if rmin != rmax else 1) zp_expected = np.uint8(round(max(0, min(255, (0 - rmin) / scale_expected)))) zp_actual = value[0] @@ -525,9 +558,9 @@ def test_quant_param_calculation(self): self.assertEqual(zp_expected, zp_actual) self.assertEqual(scale_expected, scale_actual) - - print('Finished' + ' test calculation of quantization params.') + + print("Finished" + " test calculation of quantization params.") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py b/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py index 46dd63c24f0..9c15d27e96b 100644 --- a/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py +++ b/test/adaptor/onnxrt_adaptor/test_onnxrt_operators.py @@ -1,181 +1,185 @@ +import copy import os import shutil import unittest -import copy -import onnx -import numpy as np from collections import Counter -from onnx import helper, TensorProto, numpy_helper, onnx_pb + +import numpy as np +import onnx +import onnxruntime as ort +from onnx import TensorProto, helper, numpy_helper, onnx_pb + from neural_compressor.adaptor.ox_utils.operators import QOPERATORS from neural_compressor.adaptor.ox_utils.quantizer import Quantizer -from neural_compressor.adaptor.ox_utils.util import QuantizedInitializer, QuantizedValue, QuantizationMode -import onnxruntime as ort +from neural_compressor.adaptor.ox_utils.util import QuantizationMode, QuantizedInitializer, QuantizedValue from neural_compressor.config import ONNXQlinear2QDQConfig from neural_compressor.utils.utility import CpuInfo + def build_model(): initializers = [] - input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 3, 15, 15]) - output = helper.make_tensor_value_info('add_out_2', TensorProto.FLOAT, [88, 11]) - - add_node = onnx.helper.make_node('Add', ['input', 'add_init'], ['add_out'], name='add') + input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 15, 15]) + output = helper.make_tensor_value_info("add_out_2", TensorProto.FLOAT, [88, 11]) + + add_node = onnx.helper.make_node("Add", ["input", "add_init"], ["add_out"], name="add") conv1_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name='conv1_weight') - conv1_node = helper.make_node('Conv', ['add_out', 'conv1_weight'], ['conv1_output'], name='conv1') + np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name="conv1_weight" + ) + conv1_node = helper.make_node("Conv", ["add_out", "conv1_weight"], ["conv1_output"], name="conv1") conv2_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name='conv2_weight') - conv2_node = helper.make_node('Conv', ['add_out', 'conv2_weight'], ['conv2_output'], name='conv2') + np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name="conv2_weight" + ) + conv2_node = helper.make_node("Conv", ["add_out", "conv2_weight"], ["conv2_output"], name="conv2") # 1, 8, 13, 13 - concat_node = helper.make_node('Concat', ['conv1_output', 'conv2_output'], [ - 'concat_output'], name='Concat', axis=1) + concat_node = helper.make_node("Concat", ["conv1_output", "conv2_output"], ["concat_output"], name="Concat", axis=1) # 1, 8, 11, 11 - avg_args = {'kernel_shape': [3, 3]} - avgpool_node = helper.make_node('AveragePool', ['concat_output'], ['avg_output'], name='AveragePool', **avg_args) - reshape_node = onnx.helper.make_node('Reshape', ['avg_output', 'shape'], ['reshape_output'], name='Reshape') + avg_args = {"kernel_shape": [3, 3]} + avgpool_node = helper.make_node("AveragePool", ["concat_output"], ["avg_output"], name="AveragePool", **avg_args) + reshape_node = onnx.helper.make_node("Reshape", ["avg_output", "shape"], ["reshape_output"], name="Reshape") - add_node_2 = onnx.helper.make_node('Add', ['reshape_output', 'add_init_2'], ['add_out_2'], name='add_2') + add_node_2 = onnx.helper.make_node("Add", ["reshape_output", "add_init_2"], ["add_out_2"], name="add_2") initializers = [conv1_weight_initializer, conv2_weight_initializer] - initializers.append(onnx.numpy_helper.from_array(np.array([88, 11], dtype=np.int64), name='shape')) - initializers.append(onnx.numpy_helper.from_array(np.zeros((1, 3, 15, 15)).astype('float32'), name='add_init')) - initializers.append(onnx.numpy_helper.from_array(np.zeros((88, 11)).astype('float32'), name='add_init_2')) - - graph = helper.make_graph([conv1_node, conv2_node, concat_node, avgpool_node, reshape_node, add_node, add_node_2], - 'test', [input], [output], initializer=initializers) + initializers.append(onnx.numpy_helper.from_array(np.array([88, 11], dtype=np.int64), name="shape")) + initializers.append(onnx.numpy_helper.from_array(np.zeros((1, 3, 15, 15)).astype("float32"), name="add_init")) + initializers.append(onnx.numpy_helper.from_array(np.zeros((88, 11)).astype("float32"), name="add_init_2")) + + graph = helper.make_graph( + [conv1_node, conv2_node, concat_node, avgpool_node, reshape_node, add_node, add_node_2], + "test", + [input], + [output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) return model -class TestAdaptorONNXRT(unittest.TestCase): +class TestAdaptorONNXRT(unittest.TestCase): qlinear_backend = QuantizationMode.QLinearOps - qdq_backend = 'qdq' + qdq_backend = "qdq" integer_backend = QuantizationMode.IntegerOps - static_q_config = {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'} - } - dynamic_q_config = {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'dynamic'} - } + static_q_config = { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + }, + } + dynamic_q_config = { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "dynamic", + }, + } @classmethod def setUpClass(cls): - os.makedirs('./onnxrt_test') + os.makedirs("./onnxrt_test") @classmethod def tearDownClass(cls): shutil.rmtree("./onnxrt_test", ignore_errors=True) def qlinear_test(self, model, q_config, quantize_params, quantizable_op_types, **kwargs): - quantizer = Quantizer(copy.deepcopy(model), - q_config, - self.qlinear_backend, - True, - quantize_params, - quantizable_op_types, - **kwargs) + quantizer = Quantizer( + copy.deepcopy(model), q_config, self.qlinear_backend, True, quantize_params, quantizable_op_types, **kwargs + ) quantizer.quantize_model() assert quantizer.model.model return quantizer.model def qdq_test(self, model, q_config, quantize_params, quantizable_op_types, **kwargs): - quantizer = Quantizer(copy.deepcopy(model), - q_config, - self.qdq_backend, - True, - quantize_params, - quantizable_op_types, - **kwargs) + quantizer = Quantizer( + copy.deepcopy(model), q_config, self.qdq_backend, True, quantize_params, quantizable_op_types, **kwargs + ) quantizer.quantize_model() assert quantizer.model.model return quantizer.model def dynamic_test(self, model, q_config, quantize_params, quantizable_op_types): - quantizer = Quantizer(copy.deepcopy(model), - q_config, - self.integer_backend, - False, - quantize_params, - quantizable_op_types) + quantizer = Quantizer( + copy.deepcopy(model), q_config, self.integer_backend, False, quantize_params, quantizable_op_types + ) quantizer.quantize_model() assert quantizer.model.model return quantizer.model def test_resize(self): - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 2, 26, 42]) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 2, 26, 42]) conv_weight_arr = np.random.randint(-1, 2, [3, 2, 3, 3]).astype(np.float32) - conv_weight_initializer = onnx.numpy_helper.from_array(conv_weight_arr, name='conv1_weight') - conv_node = onnx.helper.make_node('Conv', ['input', 'conv1_weight'], ['conv_output'], name='conv_node') + conv_weight_initializer = onnx.numpy_helper.from_array(conv_weight_arr, name="conv1_weight") + conv_node = onnx.helper.make_node("Conv", ["input", "conv1_weight"], ["conv_output"], name="conv_node") initializers = [conv_weight_initializer] - output_tensor = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 3, 48, 80]) - resize_inputs = ['conv_output'] # resize_roi_name, resize_scales_name, resize_sizes_name] - resize_attrs = {'coordinate_transformation_mode': 'asymmetric', 'mode': 'nearest', 'nearest_mode': 'floor'} - resize_node = helper.make_node('Resize', resize_inputs, ['output'], name='resize_node', **resize_attrs) + output_tensor = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 48, 80]) + resize_inputs = ["conv_output"] # resize_roi_name, resize_scales_name, resize_sizes_name] + resize_attrs = {"coordinate_transformation_mode": "asymmetric", "mode": "nearest", "nearest_mode": "floor"} + resize_node = helper.make_node("Resize", resize_inputs, ["output"], name="resize_node", **resize_attrs) resize_roi = [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0] - resize_roi_name = 'resize_roi' + resize_roi_name = "resize_roi" resize_roi_initializer = helper.make_tensor(resize_roi_name, TensorProto.FLOAT, [len(resize_roi)], resize_roi) initializers.extend([resize_roi_initializer]) resize_node.input.extend([resize_roi_name]) resize_scales = [1.0, 1.0, 2.0, 2.0] - resize_scales_name = 'resize_scales' - resize_scales_initializer = helper.make_tensor(resize_scales_name, TensorProto.FLOAT, [ - len(resize_scales)], resize_scales) + resize_scales_name = "resize_scales" + resize_scales_initializer = helper.make_tensor( + resize_scales_name, TensorProto.FLOAT, [len(resize_scales)], resize_scales + ) initializers.extend([resize_scales_initializer]) resize_node.input.extend([resize_scales_name]) - graph = helper.make_graph([conv_node, resize_node], 'TestOpQuantizerResize_test_model', - [input_tensor], [output_tensor], initializer=initializers) + graph = helper.make_graph( + [conv_node, resize_node], + "TestOpQuantizerResize_test_model", + [input_tensor], + [output_tensor], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - model.ir_version = 7 # use stable onnx ir version - - q_config = {'conv_node': self.static_q_config, - 'resize_node': self.static_q_config} - quantize_params = {'input': [np.uint8(0), np.float32(10.)], - 'conv1_weight': [np.uint8(0), np.float32(10.)], - 'conv_output': [np.uint8(0), np.float32(10.)], - 'output': [np.uint8(0), np.float32(10.)], - } - - q_model = self.qlinear_test(model, q_config, quantize_params, ['Resize', 'Conv']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - - q_model = self.qdq_test(model, q_config, quantize_params, ['Resize', 'Conv']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 4) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 3) + model.ir_version = 7 # use stable onnx ir version + + q_config = {"conv_node": self.static_q_config, "resize_node": self.static_q_config} + quantize_params = { + "input": [np.uint8(0), np.float32(10.0)], + "conv1_weight": [np.uint8(0), np.float32(10.0)], + "conv_output": [np.uint8(0), np.float32(10.0)], + "output": [np.uint8(0), np.float32(10.0)], + } + + q_model = self.qlinear_test(model, q_config, quantize_params, ["Resize", "Conv"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + + q_model = self.qdq_test(model, q_config, quantize_params, ["Resize", "Conv"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3) # test opset version 10 model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 10)]) - model.ir_version = 7 # use stable onnx ir version + model.ir_version = 7 # use stable onnx ir version + + q_model = self.qlinear_test(model, q_config, quantize_params, ["Resize", "Conv"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) - q_model = self.qlinear_test(model, q_config, quantize_params, ['Resize', 'Conv']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) + q_model = self.qdq_test(model, q_config, quantize_params, ["Resize", "Conv"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) - q_model = self.qdq_test(model, q_config, quantize_params, ['Resize', 'Conv']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 3) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) - def test_argmax(self): input_name = "input" output_name = "output" @@ -230,16 +234,16 @@ def test_argmax(self): ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 # use stable onnx ir version - q_config = {'conv_node': self.static_q_config, - 'argmax_node': self.static_q_config} - quantize_params = {'input': [np.uint8(0), np.float32(10.)], - 'conv_weight': [np.uint8(0),np.float32(10.)], - 'conv_output': [np.uint8(0), np.float32(10.)], - 'output': [np.uint8(0), np.float32(10.)], - } - q_model = self.qlinear_test(model, q_config, quantize_params, ['Conv', 'ArgMax']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) + q_config = {"conv_node": self.static_q_config, "argmax_node": self.static_q_config} + quantize_params = { + "input": [np.uint8(0), np.float32(10.0)], + "conv_weight": [np.uint8(0), np.float32(10.0)], + "conv_output": [np.uint8(0), np.float32(10.0)], + "output": [np.uint8(0), np.float32(10.0)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ["Conv", "ArgMax"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) def test_gemm(self): input_name = "input" @@ -257,14 +261,8 @@ def test_gemm(self): bias_data = np.random.normal(0, 0.1, bias_shape).astype(np.float32) initializers.append(onnx.numpy_helper.from_array(bias_data, name=bias_name)) - gemm1_node = onnx.helper.make_node( - "Gemm", - [input_name, weight_name, bias_name], - [output_name], - alpha=1.0, - beta=1.0, - transB=1, - name=node_name + gemm1_node = onnx.helper.make_node( + "Gemm", [input_name, weight_name, bias_name], [output_name], alpha=1.0, beta=1.0, transB=1, name=node_name ) gemm1_output_name = "gemm1_output" @@ -280,29 +278,24 @@ def test_gemm(self): ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 # use stable onnx ir version - q_config = {'gemm': self.static_q_config} - quantize_params = {'input': [np.uint8(0), np.float32(10.)], - 'linear1.weight': [np.uint8(0), np.float32(10.)], - 'linear1.bias': [np.uint8(0), np.float32(10.)], - 'output': [np.uint8(0), np.float32(10.)], - } - q_model = self.qlinear_test(model, q_config, quantize_params, ['Gemm']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - q_model = self.qdq_test(model, q_config, quantize_params, ['Gemm']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 4) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) + q_config = {"gemm": self.static_q_config} + quantize_params = { + "input": [np.uint8(0), np.float32(10.0)], + "linear1.weight": [np.uint8(0), np.float32(10.0)], + "linear1.bias": [np.uint8(0), np.float32(10.0)], + "output": [np.uint8(0), np.float32(10.0)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ["Gemm"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + q_model = self.qdq_test(model, q_config, quantize_params, ["Gemm"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) # test gemm with non-constant bias bias_tensor = helper.make_tensor_value_info(bias_name, TensorProto.FLOAT, [100]) - gemm2_node = onnx.helper.make_node( - "Gemm", - [input_name, weight_name, bias_name], - [output_name], - alpha=1.0, - beta=1.0, - transB=1, - name=node_name + gemm2_node = onnx.helper.make_node( + "Gemm", [input_name, weight_name, bias_name], [output_name], alpha=1.0, beta=1.0, transB=1, name=node_name ) initializers = [] initializers.append(onnx.numpy_helper.from_array(weight_data, name=weight_name)) @@ -316,551 +309,620 @@ def test_gemm(self): ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 - q_model = self.qlinear_test(model, q_config, quantize_params, ['Gemm']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 0) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 0) - q_model = self.qdq_test(model, q_config, quantize_params, ['Gemm']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 3) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) + q_model = self.qlinear_test(model, q_config, quantize_params, ["Gemm"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 0) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 0) + q_model = self.qdq_test(model, q_config, quantize_params, ["Gemm"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) def test_embed(self): input_ids_shape = [1, 4] - input_ids_tensor = helper.make_tensor_value_info('input_ids', TensorProto.INT32, input_ids_shape) + input_ids_tensor = helper.make_tensor_value_info("input_ids", TensorProto.INT32, input_ids_shape) segment_ids_shape = [1, 4] - segment_ids_tensor = helper.make_tensor_value_info('segment_ids', TensorProto.INT32, segment_ids_shape) + segment_ids_tensor = helper.make_tensor_value_info("segment_ids", TensorProto.INT32, segment_ids_shape) # EmbedLayerNormalization Node Constants and Weights: word_embed_shape = [32, 4] - word_embed_weights = np.random.random_sample(word_embed_shape).astype(dtype='float32') - word_embed_initializer = onnx.numpy_helper.from_array(word_embed_weights, name='word_embed') + word_embed_weights = np.random.random_sample(word_embed_shape).astype(dtype="float32") + word_embed_initializer = onnx.numpy_helper.from_array(word_embed_weights, name="word_embed") pos_embed_shape = [16, 4] - pos_embed_weights = np.random.random_sample(pos_embed_shape).astype(dtype='float32') - pos_embed_initializer = onnx.numpy_helper.from_array(pos_embed_weights, name='pos_embed') + pos_embed_weights = np.random.random_sample(pos_embed_shape).astype(dtype="float32") + pos_embed_initializer = onnx.numpy_helper.from_array(pos_embed_weights, name="pos_embed") seg_embed_shape = [2, 4] - seg_embed_weights = np.random.random_sample(seg_embed_shape).astype(dtype='float32') - seg_embed_initializer = onnx.numpy_helper.from_array(seg_embed_weights, name='seg_embed') + seg_embed_weights = np.random.random_sample(seg_embed_shape).astype(dtype="float32") + seg_embed_initializer = onnx.numpy_helper.from_array(seg_embed_weights, name="seg_embed") gamma_shape = [4] - gamma = np.random.random_sample(gamma_shape).astype(dtype='float32') - gamma_initializer = onnx.numpy_helper.from_array(gamma, name='gamma') + gamma = np.random.random_sample(gamma_shape).astype(dtype="float32") + gamma_initializer = onnx.numpy_helper.from_array(gamma, name="gamma") beta_shape = [4] - beta = np.random.random_sample(beta_shape).astype(dtype='float32') - beta_initializer = onnx.numpy_helper.from_array(beta, name='beta') + beta = np.random.random_sample(beta_shape).astype(dtype="float32") + beta_initializer = onnx.numpy_helper.from_array(beta, name="beta") # EmbedLayerNormalization Outputs: layernorm_out_shape = [1, 4, 4] - layernorm_out_tensor = helper.make_tensor_value_info('layernorm_out', TensorProto.FLOAT, layernorm_out_shape) + layernorm_out_tensor = helper.make_tensor_value_info("layernorm_out", TensorProto.FLOAT, layernorm_out_shape) mask_index_out_shape = [1] - mask_index_out_tensor = helper.make_tensor_value_info('mask_index_out', TensorProto.INT32, mask_index_out_shape) + mask_index_out_tensor = helper.make_tensor_value_info("mask_index_out", TensorProto.INT32, mask_index_out_shape) # EmbedLayerNormalization Node: - embed_layer_norm_inputs = [ - 'input_ids', 'segment_ids', 'word_embed', 'pos_embed', 'seg_embed', 'gamma', 'beta' - ] - embed_layer_norm_outputs = ['layernorm_out', 'mask_index_out'] - embed_layer_norm_node = helper.make_node('EmbedLayerNormalization', - embed_layer_norm_inputs, - embed_layer_norm_outputs, - domain='com.microsoft', - name='Embed') + embed_layer_norm_inputs = ["input_ids", "segment_ids", "word_embed", "pos_embed", "seg_embed", "gamma", "beta"] + embed_layer_norm_outputs = ["layernorm_out", "mask_index_out"] + embed_layer_norm_node = helper.make_node( + "EmbedLayerNormalization", + embed_layer_norm_inputs, + embed_layer_norm_outputs, + domain="com.microsoft", + name="Embed", + ) # Construct the Graph and Model: nodes = [embed_layer_norm_node] - graph_name = 'embed_layernorm_graph' + graph_name = "embed_layernorm_graph" inputs = [input_ids_tensor, segment_ids_tensor] outputs = [layernorm_out_tensor, mask_index_out_tensor] initializers = [ - word_embed_initializer, pos_embed_initializer, seg_embed_initializer, gamma_initializer, beta_initializer + word_embed_initializer, + pos_embed_initializer, + seg_embed_initializer, + gamma_initializer, + beta_initializer, ] graph = helper.make_graph(nodes, graph_name, inputs, outputs, initializer=initializers) - model = helper.make_model(graph, - opset_imports=[helper.make_opsetid("com.microsoft", 14), helper.make_opsetid("ai.onnx", 14)]) - model.ir_version = 7 # use stable onnx ir version - - q_config = {'Embed': self.static_q_config} - quantize_params = {'word_embed': [np.uint8(10.), np.float32(0)], - 'pos_embed': [np.uint8(10.), np.float32(0)], - 'seg_embed': [np.uint8(10.), np.float32(0)], - 'gamma': [np.uint8(10.), np.float32(0)], - 'beta': [np.uint8(10.), np.float32(0)], - 'layernorm_out': [np.uint8(10.), np.float32(0)], - 'mask_index_out': [np.uint8(10.), np.float32(0)], - 'input_ids': [np.uint8(10.), np.float32(0)], - } - q_model = self.qlinear_test(model, q_config, quantize_params, ['EmbedLayerNormalization']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QEmbedLayerNormalization'], 1) - converter = QOPERATORS["QEmbedLayerNormalization"]([i for i in q_model.nodes() if i.op_type == "QEmbedLayerNormalization"][0], - None, q_model.initializer()) + model = helper.make_model( + graph, opset_imports=[helper.make_opsetid("com.microsoft", 14), helper.make_opsetid("ai.onnx", 14)] + ) + model.ir_version = 7 # use stable onnx ir version + + q_config = {"Embed": self.static_q_config} + quantize_params = { + "word_embed": [np.uint8(10.0), np.float32(0)], + "pos_embed": [np.uint8(10.0), np.float32(0)], + "seg_embed": [np.uint8(10.0), np.float32(0)], + "gamma": [np.uint8(10.0), np.float32(0)], + "beta": [np.uint8(10.0), np.float32(0)], + "layernorm_out": [np.uint8(10.0), np.float32(0)], + "mask_index_out": [np.uint8(10.0), np.float32(0)], + "input_ids": [np.uint8(10.0), np.float32(0)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ["EmbedLayerNormalization"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QEmbedLayerNormalization"], 1) + converter = QOPERATORS["QEmbedLayerNormalization"]( + [i for i in q_model.nodes() if i.op_type == "QEmbedLayerNormalization"][0], None, q_model.initializer() + ) done, add_node, init = converter.convert() self.assertTrue("EmbedLayerNormalization" in [i.op_type for i in add_node]) - q_model = self.qdq_test(model, q_config, quantize_params, ['EmbedLayerNormalization']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 5) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['EmbedLayerNormalization'], 1) + q_model = self.qdq_test(model, q_config, quantize_params, ["EmbedLayerNormalization"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 5) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["EmbedLayerNormalization"], 1) def test_LSTM(self): input_shape = [1, 1, 200] - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, input_shape) w_shape = [2, 400, 200] - w_weights = np.random.random_sample(w_shape).astype(dtype='float32') - w_init = onnx.numpy_helper.from_array(w_weights, name='w') + w_weights = np.random.random_sample(w_shape).astype(dtype="float32") + w_init = onnx.numpy_helper.from_array(w_weights, name="w") r_shape = [2, 400, 100] - r_weights = np.random.random_sample(r_shape).astype(dtype='float32') - r_init = onnx.numpy_helper.from_array(r_weights, name='r') + r_weights = np.random.random_sample(r_shape).astype(dtype="float32") + r_init = onnx.numpy_helper.from_array(r_weights, name="r") b_shape = [2, 800] - b_weights = np.random.random_sample(b_shape).astype(dtype='float32') - b_init = onnx.numpy_helper.from_array(b_weights, name='b') + b_weights = np.random.random_sample(b_shape).astype(dtype="float32") + b_init = onnx.numpy_helper.from_array(b_weights, name="b") out_shape = [1, 2, 1, 100] - out_tensor = helper.make_tensor_value_info('out', TensorProto.FLOAT, out_shape) + out_tensor = helper.make_tensor_value_info("out", TensorProto.FLOAT, out_shape) kwargs = {} - kwargs['direction'] = "bidirectional" - kwargs['activations'] = ["Sigmoid", "Tanh", "Tanh", "Sigmoid", "Tanh", "Tanh"] - kwargs['hidden_size'] = 100 - kwargs['input_forget'] = 0 - - lstm_node = helper.make_node('LSTM', - ['input', 'w', 'r', 'b'], - ['out'], - name='lstm', - domain='', - **kwargs) - graph = helper.make_graph([lstm_node], 'test', [input_tensor], [out_tensor], initializer=[w_init, r_init, b_init]) - model = helper.make_model(graph, - opset_imports=[helper.make_opsetid("", 11)]) - model.ir_version = 7 # use stable onnx ir version - - q_config = {'lstm': self.dynamic_q_config} - q_model = self.dynamic_test(model, q_config, None, ['LSTM']) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DynamicQuantizeLSTM'], 1) - + kwargs["direction"] = "bidirectional" + kwargs["activations"] = ["Sigmoid", "Tanh", "Tanh", "Sigmoid", "Tanh", "Tanh"] + kwargs["hidden_size"] = 100 + kwargs["input_forget"] = 0 + + lstm_node = helper.make_node("LSTM", ["input", "w", "r", "b"], ["out"], name="lstm", domain="", **kwargs) + graph = helper.make_graph( + [lstm_node], "test", [input_tensor], [out_tensor], initializer=[w_init, r_init, b_init] + ) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 11)]) + model.ir_version = 7 # use stable onnx ir version + + q_config = {"lstm": self.dynamic_q_config} + q_model = self.dynamic_test(model, q_config, None, ["LSTM"]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLSTM"], 1) + def test_concat_reshape_pooling(self): model = build_model() - - q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ - 'Concat':self.static_q_config, 'AveragePool':self.static_q_config, 'add':self.static_q_config} - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'conv1_weight': [np.uint8(10.), np.float32(0)], - 'conv1_output': [np.uint8(10.), np.float32(0)], - 'conv2_weight': [np.uint8(10.), np.float32(0)], - 'conv2_output': [np.uint8(10.), np.float32(0)], - 'concat_output': [np.uint8(10.), np.float32(0)], - 'avg_output': [np.uint8(10.), np.float32(0)], - 'add_out': [np.uint8(10.), np.float32(0)], - 'add_init': [np.uint8(10.), np.float32(0)], - 'shape': [np.uint8(10.), np.float32(0)], - 'reshape_output': [np.uint8(10.), np.float32(0)], - 'add_init_2': [np.uint8(10.), np.float32(0)], - 'add_out_2': [np.uint8(10.), np.float32(0)]} - quantizable_op_types = ['Reshape', 'Conv', 'Concat', 'AveragePool', 'Add'] - q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types, **{'dedicated_qdq_pair': True}) + + q_config = { + "Reshape": self.static_q_config, + "conv1": self.static_q_config, + "conv2": self.static_q_config, + "Concat": self.static_q_config, + "AveragePool": self.static_q_config, + "add": self.static_q_config, + } + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "conv1_weight": [np.uint8(10.0), np.float32(0)], + "conv1_output": [np.uint8(10.0), np.float32(0)], + "conv2_weight": [np.uint8(10.0), np.float32(0)], + "conv2_output": [np.uint8(10.0), np.float32(0)], + "concat_output": [np.uint8(10.0), np.float32(0)], + "avg_output": [np.uint8(10.0), np.float32(0)], + "add_out": [np.uint8(10.0), np.float32(0)], + "add_init": [np.uint8(10.0), np.float32(0)], + "shape": [np.uint8(10.0), np.float32(0)], + "reshape_output": [np.uint8(10.0), np.float32(0)], + "add_init_2": [np.uint8(10.0), np.float32(0)], + "add_out_2": [np.uint8(10.0), np.float32(0)], + } + quantizable_op_types = ["Reshape", "Conv", "Concat", "AveragePool", "Add"] + q_model = self.qlinear_test( + model, q_config, quantize_params, quantizable_op_types, **{"dedicated_qdq_pair": True} + ) self.assertEqual(len(q_model.model.graph.node), 9) - q_model.export('test.onnx', ONNXQlinear2QDQConfig()) - export_model = onnx.load('test.onnx') + q_model.export("test.onnx", ONNXQlinear2QDQConfig()) + export_model = onnx.load("test.onnx") self.assertEqual(len(export_model.graph.node), 20) - os.remove('test.onnx') - - q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types, **{'dedicated_qdq_pair': True}) + os.remove("test.onnx") + + q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types, **{"dedicated_qdq_pair": True}) self.assertEqual(len(q_model.model.graph.node), 23) - q_config = {'Reshape':self.static_q_config, 'conv1':'fp32', 'conv2':self.static_q_config, \ - 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + q_config = { + "Reshape": self.static_q_config, + "conv1": "fp32", + "conv2": self.static_q_config, + "Concat": self.static_q_config, + "AveragePool": self.static_q_config, + } q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 9) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 12) - q_config = {'Reshape':self.static_q_config, 'conv1':'fp32', 'conv2':'fp32', \ - 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + q_config = { + "Reshape": self.static_q_config, + "conv1": "fp32", + "conv2": "fp32", + "Concat": self.static_q_config, + "AveragePool": self.static_q_config, + } q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 7) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 7) - q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ - 'Concat':self.static_q_config, 'AveragePool':'fp32'} + q_config = { + "Reshape": self.static_q_config, + "conv1": self.static_q_config, + "conv2": self.static_q_config, + "Concat": self.static_q_config, + "AveragePool": "fp32", + } q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 9) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 17) - - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'conv1_weight': [np.uint8(10.), np.float32(0)], - 'conv1_output': [np.uint8(10.), np.float32(0)], - 'conv2_weight': [np.uint8(10.), np.float32(0)], - 'conv2_output': [np.uint8(10.), np.float32(0)], - 'concat_output': [np.uint8(10.), np.float32(0)], - 'avg_output': [np.uint8(10.), np.float32(0)], - 'shape': [np.uint8(10.), np.float32(0)], - 'add_out': [np.uint8(10.), np.float32(0)], - 'add_init': [np.uint8(10.), np.float32(0)], - 'reshape_output': [np.uint8(10.), np.float32(0)]} - q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ - 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "conv1_weight": [np.uint8(10.0), np.float32(0)], + "conv1_output": [np.uint8(10.0), np.float32(0)], + "conv2_weight": [np.uint8(10.0), np.float32(0)], + "conv2_output": [np.uint8(10.0), np.float32(0)], + "concat_output": [np.uint8(10.0), np.float32(0)], + "avg_output": [np.uint8(10.0), np.float32(0)], + "shape": [np.uint8(10.0), np.float32(0)], + "add_out": [np.uint8(10.0), np.float32(0)], + "add_init": [np.uint8(10.0), np.float32(0)], + "reshape_output": [np.uint8(10.0), np.float32(0)], + } + q_config = { + "Reshape": self.static_q_config, + "conv1": self.static_q_config, + "conv2": self.static_q_config, + "Concat": self.static_q_config, + "AveragePool": self.static_q_config, + } q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 9) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 21) - + def test_conv(self): - for op in ['Conv', 'FusedConv']: - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 3, 3, 1]) - C = helper.make_tensor('C', TensorProto.FLOAT, [1, 5, 5, 1], np.random.random((1, 5 ,5, 1)).reshape(25).tolist()) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 1]) - conv_node = onnx.helper.make_node(op, ['A', 'B', 'C'], ['D'], - name=op, - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) + for op in ["Conv", "FusedConv"]: + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5, 1]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 3, 3, 1]) + C = helper.make_tensor( + "C", TensorProto.FLOAT, [1, 5, 5, 1], np.random.random((1, 5, 5, 1)).reshape(25).tolist() + ) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 1]) + conv_node = onnx.helper.make_node( + op, ["A", "B", "C"], ["D"], name=op, kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) initializers = [C] - graph = helper.make_graph([conv_node], 'test_graph_1', [A, B], [D], initializer=initializers) + graph = helper.make_graph([conv_node], "test_graph_1", [A, B], [D], initializer=initializers) model = helper.make_model(graph) q_config = {op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = [op] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 4) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3) def test_matmul(self): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B_init = helper.make_tensor('B', TensorProto.FLOAT, [1, 1, 5, 1], np.random.random((1, 1, 5, 1)).reshape(5).tolist()) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 1, 5, 1]) - matmul_node = onnx.helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') - graph = helper.make_graph([matmul_node], 'test_graph_1', [A], [C], [B_init]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B_init = helper.make_tensor( + "B", TensorProto.FLOAT, [1, 1, 5, 1], np.random.random((1, 1, 5, 1)).reshape(5).tolist() + ) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 1, 5, 1]) + matmul_node = onnx.helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") + graph = helper.make_graph([matmul_node], "test_graph_1", [A], [C], [B_init]) model = helper.make_model(graph) q_config = {"Matmul": self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Matmul"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 3) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) q_config = {"Matmul": self.dynamic_q_config} q_model = self.dynamic_test(model, q_config, None, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DynamicQuantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['MatMulInteger'], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["MatMulInteger"], 1) - quantize_params = {"A": [np.float32(10.)], - "B": [np.float32(10.)], - "C": [np.float32(10.)]} + quantize_params = {"A": [np.float32(10.0)], "B": [np.float32(10.0)], "C": [np.float32(10.0)]} with self.assertRaises(ValueError): self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) with self.assertRaises(ValueError): self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - - q_config = {"Matmul": {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'dynamic'}}} + + q_config = { + "Matmul": { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "dynamic", + }, + } + } quantize_params = {} q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DynamicQuantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['MatMulInteger'], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["MatMulInteger"], 1) def test_attention(self): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 5, 5]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 1, 5, 5]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) - node = onnx.helper.make_node('Attention', ['A', 'B', 'C'], ['D'], name='Attention') - graph = helper.make_graph([node], 'test_graph_1', [A, B, C], [D]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 5, 5]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 1, 5, 5]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 5]) + node = onnx.helper.make_node("Attention", ["A", "B", "C"], ["D"], name="Attention") + graph = helper.make_graph([node], "test_graph_1", [A, B, C], [D]) model = helper.make_model(graph) q_config = {"Attention": self.static_q_config} - quantize_params = {"A": [np.uint8(0), np.float32(0.5)], - "B": [np.uint8(0), np.float32(0.5)], - "C": [np.uint8(0), np.float32(0.5)], - "D": [np.uint8(0), np.float32(0.5)]} + quantize_params = { + "A": [np.uint8(0), np.float32(0.5)], + "B": [np.uint8(0), np.float32(0.5)], + "C": [np.uint8(0), np.float32(0.5)], + "D": [np.uint8(0), np.float32(0.5)], + } quantizable_op_types = ["Attention"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QAttention'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 3) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QAttention"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) - converter = QOPERATORS["QAttention"]([i for i in q_model.nodes() if i.op_type == "QAttention"][0], - None, q_model.initializer()) + converter = QOPERATORS["QAttention"]( + [i for i in q_model.nodes() if i.op_type == "QAttention"][0], None, q_model.initializer() + ) done, add_node, init = converter.convert() self.assertTrue("Attention" in [i.op_type for i in add_node]) self.qdq_test(model, q_config, quantize_params, quantizable_op_types) q_config = {"Attention": self.dynamic_q_config} q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DynamicQuantizeLinear'], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 3) - E = helper.make_tensor_value_info('E', TensorProto.INT32, [1, 1, 5, 5]) - F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [1, 1, 5, 5]) - node = onnx.helper.make_node('Attention', ['A', 'B', 'C', 'F', 'E'], ['D'], name='Attention') - graph = helper.make_graph([node], 'test_graph_1', [A, B, C, F, E], [D]) + E = helper.make_tensor_value_info("E", TensorProto.INT32, [1, 1, 5, 5]) + F = helper.make_tensor_value_info("F", TensorProto.FLOAT, [1, 1, 5, 5]) + node = onnx.helper.make_node("Attention", ["A", "B", "C", "F", "E"], ["D"], name="Attention") + graph = helper.make_graph([node], "test_graph_1", [A, B, C, F, E], [D]) model = helper.make_model(graph) q_config = {"Attention": self.static_q_config} - quantize_params = {"A": [np.uint8(0), np.float32(0.5)], - "B": [np.uint8(0), np.float32(0.5)], - "C": [np.uint8(0), np.float32(0.5)], - "D": [np.uint8(0), np.float32(0.5)]} + quantize_params = { + "A": [np.uint8(0), np.float32(0.5)], + "B": [np.uint8(0), np.float32(0.5)], + "C": [np.uint8(0), np.float32(0.5)], + "D": [np.uint8(0), np.float32(0.5)], + } quantizable_op_types = ["Attention"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 3) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 3) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3) q_config = {"Attention": self.dynamic_q_config} q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DynamicQuantizeLinear'], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DynamicQuantizeLinear"], 3) def test_gather(self): - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, [3, 2]) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [3, 2]) - matmul_weight = helper.make_tensor('matmul_weight', TensorProto.FLOAT, [2, 3], np.random.random((2, 3)).reshape(6).tolist()) - matmul_output = helper.make_tensor_value_info('matmul_output', TensorProto.FLOAT, [3, 3]) - matmul_node = onnx.helper.make_node('MatMul', ['input', 'matmul_weight'], ['matmul_output'], name='MatMul') + matmul_weight = helper.make_tensor( + "matmul_weight", TensorProto.FLOAT, [2, 3], np.random.random((2, 3)).reshape(6).tolist() + ) + matmul_output = helper.make_tensor_value_info("matmul_output", TensorProto.FLOAT, [3, 3]) + matmul_node = onnx.helper.make_node("MatMul", ["input", "matmul_weight"], ["matmul_output"], name="MatMul") - gather_indices = helper.make_tensor('gather_indices', TensorProto.INT64, [1, 2], [0, 2]) - gather_output = helper.make_tensor_value_info('gather_output', TensorProto.FLOAT, [1, 2, 3]) - gather_node = onnx.helper.make_node('Gather', ['matmul_output', 'gather_indices'], ['gather_output'], name='Gather') + gather_indices = helper.make_tensor("gather_indices", TensorProto.INT64, [1, 2], [0, 2]) + gather_output = helper.make_tensor_value_info("gather_output", TensorProto.FLOAT, [1, 2, 3]) + gather_node = onnx.helper.make_node( + "Gather", ["matmul_output", "gather_indices"], ["gather_output"], name="Gather" + ) initializers = [matmul_weight, gather_indices] - graph = helper.make_graph([matmul_node, gather_node], - 'TestGather_test_model', - [input_tensor], [gather_output], initializer=initializers) + graph = helper.make_graph( + [matmul_node, gather_node], + "TestGather_test_model", + [input_tensor], + [gather_output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 - - q_config = {'Gather': self.static_q_config, "MatMul": self.static_q_config} - quantize_params = {"input": [np.uint8(10.), np.float32(0)], - "matmul_weight": [np.uint8(10.), np.float32(0)], - "matmul_output": [np.uint8(10.), np.float32(0)], - "gather_output": [np.uint8(10.), np.float32(0)]} + + q_config = {"Gather": self.static_q_config, "MatMul": self.static_q_config} + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "matmul_weight": [np.uint8(10.0), np.float32(0)], + "matmul_output": [np.uint8(10.0), np.float32(0)], + "gather_output": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Gather", "MatMul"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 3) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 4) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4) - q_config = {'Gather': self.dynamic_q_config, "MatMul": self.dynamic_q_config} + q_config = {"Gather": self.dynamic_q_config, "MatMul": self.dynamic_q_config} q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 6) def test_split(self): - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [100, 2]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [100, 2]) e_value = np.random.randn(2, 2).astype(np.float32) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [2, 2], - e_value.reshape(4).tolist()) - - matmul_node = onnx.helper.make_node('MatMul', ['D', 'E'], ['A'], name='Matmul') - - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [50, 2]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [50, 2]) - node = onnx.helper.make_node('Split', ['A'], ['B', 'C'], name='Split', **{'num_outputs': 2}) - graph = helper.make_graph([matmul_node, node], 'test_graph_1', [D], [B, C], [E_init]) + E_init = helper.make_tensor("E", TensorProto.FLOAT, [2, 2], e_value.reshape(4).tolist()) + + matmul_node = onnx.helper.make_node("MatMul", ["D", "E"], ["A"], name="Matmul") + + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [50, 2]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [50, 2]) + node = onnx.helper.make_node("Split", ["A"], ["B", "C"], name="Split", **{"num_outputs": 2}) + graph = helper.make_graph([matmul_node, node], "test_graph_1", [D], [B, C], [E_init]) model = helper.make_model(graph) - q_config = {'Split': {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'} - }, - 'Matmul': {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'} - }, - - } - quantize_params = {"A": [np.uint8(0), np.float32(0.5)], - "B": [np.uint8(0), np.float32(0.5)], - "C": [np.uint8(0), np.float32(0.5)], - "D": [np.uint8(0), np.float32(0.5)], - "E": [np.uint8(0), np.float32(0.5)], - } + q_config = { + "Split": { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + }, + }, + "Matmul": { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + }, + }, + } + quantize_params = { + "A": [np.uint8(0), np.float32(0.5)], + "B": [np.uint8(0), np.float32(0.5)], + "C": [np.uint8(0), np.float32(0.5)], + "D": [np.uint8(0), np.float32(0.5)], + "E": [np.uint8(0), np.float32(0.5)], + } quantizable_op_types = ["Split", "MatMul"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 2) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) - q_model.export('test.onnx', ONNXQlinear2QDQConfig()) - export_model = onnx.load('test.onnx') + q_model.export("test.onnx", ONNXQlinear2QDQConfig()) + export_model = onnx.load("test.onnx") self.assertEqual(len(export_model.graph.node), 11) - os.remove('test.onnx') + os.remove("test.onnx") q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 5) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 4) - + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 5) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4) + def test_pad(self): b_value = np.array([0, 1, 1, 0, 1, 1]).astype(np.int64) - B_init = helper.make_tensor('B', TensorProto.INT64, [6], - b_value.reshape(6).tolist()) - B = helper.make_tensor_value_info('B', TensorProto.INT64, [6]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 7, 7]) + B_init = helper.make_tensor("B", TensorProto.INT64, [6], b_value.reshape(6).tolist()) + B = helper.make_tensor_value_info("B", TensorProto.INT64, [6]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 7, 7]) d_value = np.random.randn(1).astype(np.float32) - D_init = helper.make_tensor('D', TensorProto.FLOAT, [1], - d_value.reshape(1).tolist()) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1]) + D_init = helper.make_tensor("D", TensorProto.FLOAT, [1], d_value.reshape(1).tolist()) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1]) e_value = np.random.randn(1, 5, 5).astype(np.float32) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [1, 1, 5, 5], - e_value.reshape(25).tolist()) - E = helper.make_tensor_value_info('E', TensorProto.FLOAT, [1, 1, 5, 5]) + E_init = helper.make_tensor("E", TensorProto.FLOAT, [1, 1, 5, 5], e_value.reshape(25).tolist()) + E = helper.make_tensor_value_info("E", TensorProto.FLOAT, [1, 1, 5, 5]) f_value = np.random.randn(1, 3, 3).astype(np.float32) - F_init = helper.make_tensor('F', TensorProto.FLOAT, [1, 1, 3, 3], - f_value.reshape(9).tolist()) - F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [1, 1, 3, 3]) + F_init = helper.make_tensor("F", TensorProto.FLOAT, [1, 1, 3, 3], f_value.reshape(9).tolist()) + F = helper.make_tensor_value_info("F", TensorProto.FLOAT, [1, 1, 3, 3]) for mode in ["constant", "edge", "reflect", "constant_value", "constant_value_wo_init"]: - conv_node = onnx.helper.make_node('Conv', ['E', 'F'], ['A'], - name='Conv', - kernel=[3, 3], - padding=[1, 1, 1, 1]) + conv_node = onnx.helper.make_node( + "Conv", ["E", "F"], ["A"], name="Conv", kernel=[3, 3], padding=[1, 1, 1, 1] + ) if mode == "constant_value": - node = onnx.helper.make_node('Pad', ['A', 'B', 'D'], ['C'], name='Pad', mode="constant") - graph = helper.make_graph([conv_node, node], 'test_graph_1', [E, F, B, D], [C], [E_init, F_init, B_init, D_init]) + node = onnx.helper.make_node("Pad", ["A", "B", "D"], ["C"], name="Pad", mode="constant") + graph = helper.make_graph( + [conv_node, node], "test_graph_1", [E, F, B, D], [C], [E_init, F_init, B_init, D_init] + ) elif mode == "constant_value_wo_init": - node = onnx.helper.make_node('Pad', ['A', 'B', 'D'], ['C'], name='Pad', mode="constant") - graph = helper.make_graph([conv_node, node], 'test_graph_1', [E, F, B, D], [C], [E_init, F_init, B_init]) + node = onnx.helper.make_node("Pad", ["A", "B", "D"], ["C"], name="Pad", mode="constant") + graph = helper.make_graph( + [conv_node, node], "test_graph_1", [E, F, B, D], [C], [E_init, F_init, B_init] + ) else: - node = onnx.helper.make_node('Pad', ['A', 'B'], ['C'], name='Pad', mode=mode) - graph = helper.make_graph([conv_node, node], 'test_graph_1', [E, F, B], [C], [E_init, F_init, B_init]) + node = onnx.helper.make_node("Pad", ["A", "B"], ["C"], name="Pad", mode=mode) + graph = helper.make_graph([conv_node, node], "test_graph_1", [E, F, B], [C], [E_init, F_init, B_init]) model = helper.make_model(graph) - pad_config = {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'}} - conv_config = {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_channel'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'}} - q_config = {'Conv': conv_config, - 'Pad': pad_config} - quantize_params = {"A": [np.uint8(10.), np.float32(1)], - "C": [np.uint8(10.), np.float32(1)], - "D": [np.uint8(10.), np.float32(1)], - "E": [np.uint8(10.), np.float32(1)], - "F": [np.uint8(10.), np.float32(1)]} + pad_config = { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + }, + } + conv_config = { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_channel"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + }, + } + q_config = {"Conv": conv_config, "Pad": pad_config} + quantize_params = { + "A": [np.uint8(10.0), np.float32(1)], + "C": [np.uint8(10.0), np.float32(1)], + "D": [np.uint8(10.0), np.float32(1)], + "E": [np.uint8(10.0), np.float32(1)], + "F": [np.uint8(10.0), np.float32(1)], + } quantizable_op_types = ["Conv", "Pad"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types, **{'dedicated_qdq_pair': True}) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 4) - - node = onnx.helper.make_node('Pad', ['E', 'B', 'D'], ['C'], name='Pad', mode="constant") - graph = helper.make_graph([node], 'test_graph_1', [E, B, D], [C], [E_init, B_init, D_init]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + q_model = self.qdq_test( + model, q_config, quantize_params, quantizable_op_types, **{"dedicated_qdq_pair": True} + ) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4) + + node = onnx.helper.make_node("Pad", ["E", "B", "D"], ["C"], name="Pad", mode="constant") + graph = helper.make_graph([node], "test_graph_1", [E, B, D], [C], [E_init, B_init, D_init]) model = helper.make_model(graph) - q_config = {'Pad': {'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'} - }} - quantize_params = {"C": [np.uint8(10.), np.float32(0)], - "E": [np.uint8(10.), np.float32(0)]} + q_config = { + "Pad": { + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + } + } + } + quantize_params = {"C": [np.uint8(10.0), np.float32(0)], "E": [np.uint8(10.0), np.float32(0)]} quantizable_op_types = ["Pad"] self.qlinear_test(model, pad_config, quantize_params, quantizable_op_types) self.qdq_test(model, pad_config, quantize_params, quantizable_op_types) def test_binary(self): - for op in ['Mul', 'Add']: - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 10]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 10]) - node = onnx.helper.make_node(op, ['A', 'B'], ['C'], name=op) - graph = helper.make_graph([node], 'test_graph_1', [A, B], [C]) + for op in ["Mul", "Add"]: + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 10]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 10]) + node = onnx.helper.make_node(op, ["A", "B"], ["C"], name=op) + graph = helper.make_graph([node], "test_graph_1", [A, B], [C]) model = helper.make_model(graph) q_config = {op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = [op] self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.qlinear_test(model, q_config, {}, quantizable_op_types) self.qdq_test(model, q_config, quantize_params, quantizable_op_types) self.qdq_test(model, q_config, {}, quantizable_op_types) - + def test_relu(self): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) - E = helper.make_tensor_value_info('E', TensorProto.FLOAT, [1, 1, 5, 5]) - F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [1, 1, 5, 5]) - - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='Conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - relu_node = onnx.helper.make_node('Relu', ['C'], ['D'], name='Relu') - add_node = onnx.helper.make_node('Add', ['D', 'E'], ['F'], name='Add') - graph = helper.make_graph([conv_node, relu_node], 'test_graph_1', [A, B], [D]) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 3, 3]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 5]) + E = helper.make_tensor_value_info("E", TensorProto.FLOAT, [1, 1, 5, 5]) + F = helper.make_tensor_value_info("F", TensorProto.FLOAT, [1, 1, 5, 5]) + + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + relu_node = onnx.helper.make_node("Relu", ["C"], ["D"], name="Relu") + add_node = onnx.helper.make_node("Add", ["D", "E"], ["F"], name="Add") + graph = helper.make_graph([conv_node, relu_node], "test_graph_1", [A, B], [D]) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED - sess_options.optimized_model_filepath = "./onnxrt_test/optimized_model.onnx" + sess_options.optimized_model_filepath = "./onnxrt_test/optimized_model.onnx" session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=ort.get_available_providers()) tmp_model = onnx.load(sess_options.optimized_model_filepath) - + q_config = {"Conv": self.static_q_config, "Relu": self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Conv", "Relu"] q_model = self.qlinear_test(tmp_model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 4) q_model = self.qdq_test(tmp_model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 7) - + sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=ort.get_available_providers()) tmp_model = onnx.load(sess_options.optimized_model_filepath) @@ -869,8 +931,8 @@ def test_relu(self): q_model = self.qdq_test(tmp_model, q_config, quantize_params, quantizable_op_types) self.assertEqual(len(q_model.model.graph.node), 8) - graph = helper.make_graph([conv_node, relu_node, add_node], 'test_graph_2', [A, B, E], [F]) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + graph = helper.make_graph([conv_node, relu_node, add_node], "test_graph_2", [A, B, E], [F]) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC session = ort.InferenceSession(model.SerializeToString(), sess_options, providers=ort.get_available_providers()) tmp_model = onnx.load(sess_options.optimized_model_filepath) @@ -880,16 +942,15 @@ def test_relu(self): self.assertEqual(len(q_model.model.graph.node), 8) def test_clip(self): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='Conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - clip_node = onnx.helper.make_node('Clip', ['C'], ['D'], name='Clip') - graph = helper.make_graph([conv_node, clip_node], 'test_graph_1', [A, B], [D]) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 3, 3]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 5]) + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + clip_node = onnx.helper.make_node("Clip", ["C"], ["D"], name="Clip") + graph = helper.make_graph([conv_node, clip_node], "test_graph_1", [A, B], [D]) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED @@ -898,36 +959,36 @@ def test_clip(self): model = onnx.load(sess_options.optimized_model_filepath) q_config = {"Conv": self.static_q_config, "Clip": self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Conv", "Clip"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 3) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 3) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 3) def test_activation(self): for op in ["Relu", "LeakyRelu", "Sigmoid"]: - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 10]) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 10]) - node = onnx.helper.make_node(op, ['A'], ['B'], name=op) - graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 10]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 10]) + node = onnx.helper.make_node(op, ["A"], ["B"], name=op) + graph = helper.make_graph([node], "test_graph_1", [A], [B]) model = helper.make_model(graph) q_config = {op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)]} + quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "B": [np.uint8(10.0), np.float32(0)]} quantizable_op_types = [op] self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.qdq_test(model, q_config, quantize_params, quantizable_op_types) a_value = np.random.randn(1, 10).astype(np.float32) - A_init = helper.make_tensor('A', TensorProto.FLOAT, [1, 10], - a_value.reshape(10).tolist()) - graph = helper.make_graph([node], 'test_graph_1', [A], [B], [A_init]) + A_init = helper.make_tensor("A", TensorProto.FLOAT, [1, 10], a_value.reshape(10).tolist()) + graph = helper.make_graph([node], "test_graph_1", [A], [B], [A_init]) model = helper.make_model(graph) self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.qdq_test(model, q_config, quantize_params, quantizable_op_types) @@ -936,16 +997,12 @@ def test_activation(self): def test_pooling(self): op = "MaxPool" - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 5, 5, 1]) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) - node = onnx.helper.make_node(op, ['A'], ['B'], - name=op, - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 5, 5, 1]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5, 1]) + node = onnx.helper.make_node(op, ["A"], ["B"], name=op, kernel_shape=[3, 3], pads=[1, 1, 1, 1]) + graph = helper.make_graph([node], "test_graph_1", [A], [B]) q_config = {op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)]} + quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "B": [np.uint8(10.0), np.float32(0)]} quantizable_op_types = [op] for opset_version in [12, 13]: opset = onnx.OperatorSetIdProto() @@ -954,388 +1011,497 @@ def test_pooling(self): self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='Conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - pool_node = onnx.helper.make_node(op, ['C'], ['D'], name=op) - graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 3, 3]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 5]) + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + pool_node = onnx.helper.make_node(op, ["C"], ["D"], name=op) + graph = helper.make_graph([conv_node, pool_node], "test_graph_1", [A, B], [D]) model = helper.make_model(graph) - + q_config = {"Conv": self.static_q_config, op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Conv", op] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 4) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 4) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4) op = "GlobalAveragePool" - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 5, 1, 1]) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) - node = onnx.helper.make_node(op, ['A'], ['B'], - name=op, - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 5, 1, 1]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5, 1]) + node = onnx.helper.make_node(op, ["A"], ["B"], name=op, kernel_shape=[3, 3], pads=[1, 1, 1, 1]) + graph = helper.make_graph([node], "test_graph_1", [A], [B]) q_config = {op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)]} + quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "B": [np.uint8(10.0), np.float32(0)]} quantizable_op_types = [op] for opset_version in [12, 13]: opset = onnx.OperatorSetIdProto() opset.version = opset_version model = helper.make_model(graph, opset_imports=[opset]) q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 2) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) - - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 1, 1]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='Conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - pool_node = onnx.helper.make_node(op, ['C'], ['D'], name=op) - graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 2) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) + + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 3, 3]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 1, 1]) + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + pool_node = onnx.helper.make_node(op, ["C"], ["D"], name=op) + graph = helper.make_graph([conv_node, pool_node], "test_graph_1", [A, B], [D]) model = helper.make_model(graph) - + q_config = {"Conv": self.static_q_config, op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Conv", op] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 2) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 2) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 4) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 4) - + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 4) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4) def test_exclude_node(self): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [3, 3, 1, 1]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 3, 3]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='Conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - pool_node = onnx.helper.make_node("MaxPool", ['C'], ['D'], name="MaxPool") - graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5, 1]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [3, 3, 1, 1]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 3, 3]) + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + pool_node = onnx.helper.make_node("MaxPool", ["C"], ["D"], name="MaxPool") + graph = helper.make_graph([conv_node, pool_node], "test_graph_1", [A, B], [D]) model = helper.make_model(graph) q_config = {"Conv": self.static_q_config, "MaxPool": "fp32"} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Conv", "MaxPool"] self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - + def test_more_direct8bit_nodes(self): # test direct q8 nodes: MatMul-Flatten-Abs-Sign-ShrinK-MatMul - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 32]) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 32]) - matmul1_weight = helper.make_tensor('matmul1_weight', TensorProto.FLOAT, [32, 64], np.random.random((32, 64)).reshape(2048).tolist()) - matmul1_output = helper.make_tensor_value_info('matmul1_output', TensorProto.FLOAT, [1, 64]) - matmul1_node = onnx.helper.make_node('MatMul', ['input', 'matmul1_weight'], ['matmul1_output'], name='Matmul_0') + matmul1_weight = helper.make_tensor( + "matmul1_weight", TensorProto.FLOAT, [32, 64], np.random.random((32, 64)).reshape(2048).tolist() + ) + matmul1_output = helper.make_tensor_value_info("matmul1_output", TensorProto.FLOAT, [1, 64]) + matmul1_node = onnx.helper.make_node("MatMul", ["input", "matmul1_weight"], ["matmul1_output"], name="Matmul_0") - flatten_output = helper.make_tensor_value_info('flatten_output', TensorProto.FLOAT, [1, 64]) - flatten_node = onnx.helper.make_node("Flatten", inputs=["matmul1_output"], outputs=["flatten_output"], axis=1, name='Flatten_1') + flatten_output = helper.make_tensor_value_info("flatten_output", TensorProto.FLOAT, [1, 64]) + flatten_node = onnx.helper.make_node( + "Flatten", inputs=["matmul1_output"], outputs=["flatten_output"], axis=1, name="Flatten_1" + ) - abs_output = helper.make_tensor_value_info('abs_output', TensorProto.FLOAT, [1, 64]) - abs_node = onnx.helper.make_node("Abs", inputs=["flatten_output"], outputs=["abs_output"], name='Abs_2') + abs_output = helper.make_tensor_value_info("abs_output", TensorProto.FLOAT, [1, 64]) + abs_node = onnx.helper.make_node("Abs", inputs=["flatten_output"], outputs=["abs_output"], name="Abs_2") - sign_output = helper.make_tensor_value_info('sign_output', TensorProto.FLOAT, [1, 64]) + sign_output = helper.make_tensor_value_info("sign_output", TensorProto.FLOAT, [1, 64]) sign_node = onnx.helper.make_node("Sign", inputs=["abs_output"], outputs=["sign_output"], name="Sign_3") - shrink_output = helper.make_tensor_value_info('shrink_output', TensorProto.FLOAT, [1, 64]) - shrink_node = onnx.helper.make_node("Shrink", inputs=["sign_output"], outputs=["shrink_output"], name="Shrink_4") + shrink_output = helper.make_tensor_value_info("shrink_output", TensorProto.FLOAT, [1, 64]) + shrink_node = onnx.helper.make_node( + "Shrink", inputs=["sign_output"], outputs=["shrink_output"], name="Shrink_4" + ) - matmul2_weight = helper.make_tensor('matmul2_weight', TensorProto.FLOAT, [64, 2], np.random.random((64, 2)).reshape(128).tolist()) - matmul2_output = helper.make_tensor_value_info('matmul2_output', TensorProto.FLOAT, [1, 2]) - matmul2_node = onnx.helper.make_node('MatMul', ['shrink_output', 'matmul2_weight'], ['matmul2_output'], name='Matmul_5') + matmul2_weight = helper.make_tensor( + "matmul2_weight", TensorProto.FLOAT, [64, 2], np.random.random((64, 2)).reshape(128).tolist() + ) + matmul2_output = helper.make_tensor_value_info("matmul2_output", TensorProto.FLOAT, [1, 2]) + matmul2_node = onnx.helper.make_node( + "MatMul", ["shrink_output", "matmul2_weight"], ["matmul2_output"], name="Matmul_5" + ) initializers = [matmul1_weight, matmul2_weight] - graph = helper.make_graph([matmul1_node, flatten_node, abs_node, sign_node, shrink_node, matmul2_node], - 'TestMoreDirect8_test_model', - [input_tensor], [matmul2_output], initializer=initializers) + graph = helper.make_graph( + [matmul1_node, flatten_node, abs_node, sign_node, shrink_node, matmul2_node], + "TestMoreDirect8_test_model", + [input_tensor], + [matmul2_output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 - q_config = {'Matmul_0':self.static_q_config, 'Flatten_1':self.static_q_config, 'Abs_2':self.static_q_config, \ - 'Sign_3':self.static_q_config, 'Shrink_4':self.static_q_config, 'Matmul_5':self.static_q_config} - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'matmul1_weight': [np.uint8(10.), np.float32(0)], - 'matmul1_output': [np.uint8(10.), np.float32(0)], - 'flatten_output': [np.uint8(10.), np.float32(0)], - 'abs_output': [np.uint8(10.), np.float32(0)], - 'sign_output': [np.uint8(10.), np.float32(0)], - 'shrink_output': [np.uint8(10.), np.float32(0)], - 'matmul2_weight': [np.uint8(10.), np.float32(0)], - 'matmul2_output': [np.uint8(10.), np.float32(0)]} - quantizable_op_types = ['MatMul', 'Flatten', 'Abs', 'Sign', 'Shrink'] + q_config = { + "Matmul_0": self.static_q_config, + "Flatten_1": self.static_q_config, + "Abs_2": self.static_q_config, + "Sign_3": self.static_q_config, + "Shrink_4": self.static_q_config, + "Matmul_5": self.static_q_config, + } + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "matmul1_weight": [np.uint8(10.0), np.float32(0)], + "matmul1_output": [np.uint8(10.0), np.float32(0)], + "flatten_output": [np.uint8(10.0), np.float32(0)], + "abs_output": [np.uint8(10.0), np.float32(0)], + "sign_output": [np.uint8(10.0), np.float32(0)], + "shrink_output": [np.uint8(10.0), np.float32(0)], + "matmul2_weight": [np.uint8(10.0), np.float32(0)], + "matmul2_output": [np.uint8(10.0), np.float32(0)], + } + quantizable_op_types = ["MatMul", "Flatten", "Abs", "Sign", "Shrink"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) - q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - q_model.save('qdq.onnx') - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 9) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 7) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + q_model.save("qdq.onnx") + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 9) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 7) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) - def test_expand(self): # test expand nodes: MatMul-Expand-MatMul - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, [3, 2]) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [3, 2]) - matmul1_weight = helper.make_tensor('matmul1_weight', TensorProto.FLOAT, [2, 1], np.random.random((2, 1)).reshape(2).tolist()) - matmul1_output = helper.make_tensor_value_info('matmul1_output', TensorProto.FLOAT, [3, 1]) - matmul1_node = onnx.helper.make_node('MatMul', ['input', 'matmul1_weight'], ['matmul1_output'], name='Matmul_0') + matmul1_weight = helper.make_tensor( + "matmul1_weight", TensorProto.FLOAT, [2, 1], np.random.random((2, 1)).reshape(2).tolist() + ) + matmul1_output = helper.make_tensor_value_info("matmul1_output", TensorProto.FLOAT, [3, 1]) + matmul1_node = onnx.helper.make_node("MatMul", ["input", "matmul1_weight"], ["matmul1_output"], name="Matmul_0") - expand_new_shape = helper.make_tensor('expand_new_shape', TensorProto.INT64, [2], [3, 4]) - expand_output = helper.make_tensor_value_info('expand_output', TensorProto.FLOAT, [3, 4]) - expand_node = onnx.helper.make_node('Expand', ['matmul1_output', 'expand_new_shape'], ['expand_output'], name='Expand_1') + expand_new_shape = helper.make_tensor("expand_new_shape", TensorProto.INT64, [2], [3, 4]) + expand_output = helper.make_tensor_value_info("expand_output", TensorProto.FLOAT, [3, 4]) + expand_node = onnx.helper.make_node( + "Expand", ["matmul1_output", "expand_new_shape"], ["expand_output"], name="Expand_1" + ) - matmul2_weight = helper.make_tensor('matmul2_weight', TensorProto.FLOAT, [4, 2], np.random.random((4, 2)).reshape(8).tolist()) - matmul2_output = helper.make_tensor_value_info('matmul2_output', TensorProto.FLOAT, [3, 2]) - matmul2_node = onnx.helper.make_node('MatMul', ['expand_output', 'matmul2_weight'], ['matmul2_output'], name='Matmul_2') + matmul2_weight = helper.make_tensor( + "matmul2_weight", TensorProto.FLOAT, [4, 2], np.random.random((4, 2)).reshape(8).tolist() + ) + matmul2_output = helper.make_tensor_value_info("matmul2_output", TensorProto.FLOAT, [3, 2]) + matmul2_node = onnx.helper.make_node( + "MatMul", ["expand_output", "matmul2_weight"], ["matmul2_output"], name="Matmul_2" + ) initializers = [matmul1_weight, matmul2_weight, expand_new_shape] - graph = helper.make_graph([matmul1_node, expand_node, matmul2_node], - 'TestExpand_test_model', - [input_tensor], [matmul2_output], initializer=initializers) + graph = helper.make_graph( + [matmul1_node, expand_node, matmul2_node], + "TestExpand_test_model", + [input_tensor], + [matmul2_output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 - - q_config = {'Matmul_0':self.static_q_config, 'Expand_1':self.static_q_config, 'Matmul_2':self.static_q_config} - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'matmul1_weight': [np.uint8(10.), np.float32(0)], - 'matmul1_output': [np.uint8(10.), np.float32(0)], - 'matmul2_weight': [np.uint8(10.), np.float32(0)], - 'matmul2_output': [np.uint8(10.), np.float32(0)], - 'expand_output': [np.uint8(10.), np.float32(0)], - } - quantizable_op_types = ['MatMul', 'Expand'] + + q_config = { + "Matmul_0": self.static_q_config, + "Expand_1": self.static_q_config, + "Matmul_2": self.static_q_config, + } + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "matmul1_weight": [np.uint8(10.0), np.float32(0)], + "matmul1_output": [np.uint8(10.0), np.float32(0)], + "matmul2_weight": [np.uint8(10.0), np.float32(0)], + "matmul2_output": [np.uint8(10.0), np.float32(0)], + "expand_output": [np.uint8(10.0), np.float32(0)], + } + quantizable_op_types = ["MatMul", "Expand"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 6) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 4) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) def test_slice(self): # test slice nodes: MatMul-Slice-MatMul - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, [5, 4, 1]) - - matmul1_weight = helper.make_tensor('matmul1_weight', TensorProto.FLOAT, [1, 3], np.random.random((1, 3)).reshape(3).tolist()) - matmul1_output = helper.make_tensor_value_info('matmul1_output', TensorProto.FLOAT, [5, 4, 3]) - matmul1_node = onnx.helper.make_node('MatMul', ['input', 'matmul1_weight'], ['matmul1_output'], name='Matmul_0') + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [5, 4, 1]) - slice_starts = helper.make_tensor('slice_starts', TensorProto.INT64, [2], [0, 0]) - slice_ends = helper.make_tensor('slice_ends', TensorProto.INT64, [2], [3, 4]) - slice_axes = helper.make_tensor('slice_axes', TensorProto.INT64, [2], [0, 1]) - slice_steps = helper.make_tensor('slice_steps', TensorProto.INT64, [2], [1, 1]) - slice_output = helper.make_tensor_value_info('slice_output', TensorProto.FLOAT, [3, 4, 3]) - slice_node = onnx.helper.make_node('Slice', - ['matmul1_output', 'slice_starts', 'slice_ends', 'slice_axes', 'slice_steps'], - ['slice_output'], name='Slice_1') + matmul1_weight = helper.make_tensor( + "matmul1_weight", TensorProto.FLOAT, [1, 3], np.random.random((1, 3)).reshape(3).tolist() + ) + matmul1_output = helper.make_tensor_value_info("matmul1_output", TensorProto.FLOAT, [5, 4, 3]) + matmul1_node = onnx.helper.make_node("MatMul", ["input", "matmul1_weight"], ["matmul1_output"], name="Matmul_0") + + slice_starts = helper.make_tensor("slice_starts", TensorProto.INT64, [2], [0, 0]) + slice_ends = helper.make_tensor("slice_ends", TensorProto.INT64, [2], [3, 4]) + slice_axes = helper.make_tensor("slice_axes", TensorProto.INT64, [2], [0, 1]) + slice_steps = helper.make_tensor("slice_steps", TensorProto.INT64, [2], [1, 1]) + slice_output = helper.make_tensor_value_info("slice_output", TensorProto.FLOAT, [3, 4, 3]) + slice_node = onnx.helper.make_node( + "Slice", + ["matmul1_output", "slice_starts", "slice_ends", "slice_axes", "slice_steps"], + ["slice_output"], + name="Slice_1", + ) - matmul2_weight = helper.make_tensor('matmul2_weight', TensorProto.FLOAT, [3, 2], np.random.random((3, 2)).reshape(6).tolist()) - matmul2_output = helper.make_tensor_value_info('matmul2_output', TensorProto.FLOAT, [3, 4, 2]) - matmul2_node = onnx.helper.make_node('MatMul', ['slice_output', 'matmul2_weight'], ['matmul2_output'], name='Matmul_2') + matmul2_weight = helper.make_tensor( + "matmul2_weight", TensorProto.FLOAT, [3, 2], np.random.random((3, 2)).reshape(6).tolist() + ) + matmul2_output = helper.make_tensor_value_info("matmul2_output", TensorProto.FLOAT, [3, 4, 2]) + matmul2_node = onnx.helper.make_node( + "MatMul", ["slice_output", "matmul2_weight"], ["matmul2_output"], name="Matmul_2" + ) initializers = [matmul1_weight, matmul2_weight, slice_starts, slice_ends, slice_axes, slice_steps] - graph = helper.make_graph([matmul1_node, slice_node, matmul2_node], - 'TestSlice_test_model', - [input_tensor], [matmul2_output], initializer=initializers) + graph = helper.make_graph( + [matmul1_node, slice_node, matmul2_node], + "TestSlice_test_model", + [input_tensor], + [matmul2_output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 - q_config = {'Matmul_0':self.static_q_config, 'Slice_1':self.static_q_config, 'Matmul_2':self.static_q_config} - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'matmul1_weight': [np.uint8(10.), np.float32(0)], - 'matmul1_output': [np.uint8(10.), np.float32(0)], - 'matmul2_weight': [np.uint8(10.), np.float32(0)], - 'matmul2_output': [np.uint8(10.), np.float32(0)], - 'slice_output': [np.uint8(10.), np.float32(0)], - } - quantizable_op_types = ['MatMul', 'Slice'] + q_config = {"Matmul_0": self.static_q_config, "Slice_1": self.static_q_config, "Matmul_2": self.static_q_config} + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "matmul1_weight": [np.uint8(10.0), np.float32(0)], + "matmul1_output": [np.uint8(10.0), np.float32(0)], + "matmul2_weight": [np.uint8(10.0), np.float32(0)], + "matmul2_output": [np.uint8(10.0), np.float32(0)], + "slice_output": [np.uint8(10.0), np.float32(0)], + } + quantizable_op_types = ["MatMul", "Slice"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 6) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 4) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) def test_mod(self): # test mode nodes: MatMul-Mod-MatMul # MatMul-/ - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, [2, 3]) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [2, 3]) - matmul1_weight = helper.make_tensor('matmul1_weight', TensorProto.FLOAT, [3, 4], np.random.random((3, 4)).reshape(12).tolist()) - matmul1_output = helper.make_tensor_value_info('matmul1_output', TensorProto.FLOAT, [2, 4]) - matmul1_node = onnx.helper.make_node('MatMul', ['input', 'matmul1_weight'], ['matmul1_output'], name='Matmul_0') + matmul1_weight = helper.make_tensor( + "matmul1_weight", TensorProto.FLOAT, [3, 4], np.random.random((3, 4)).reshape(12).tolist() + ) + matmul1_output = helper.make_tensor_value_info("matmul1_output", TensorProto.FLOAT, [2, 4]) + matmul1_node = onnx.helper.make_node("MatMul", ["input", "matmul1_weight"], ["matmul1_output"], name="Matmul_0") - matmul2_weight = helper.make_tensor('matmul2_weight', TensorProto.FLOAT, [3, 4], np.random.random((3, 4)).reshape(12).tolist()) - matmul2_output = helper.make_tensor_value_info('matmul2_output', TensorProto.FLOAT, [2, 4]) - matmul2_node = onnx.helper.make_node('MatMul', ['input', 'matmul2_weight'], ['matmul2_output'], name='Matmul_1') + matmul2_weight = helper.make_tensor( + "matmul2_weight", TensorProto.FLOAT, [3, 4], np.random.random((3, 4)).reshape(12).tolist() + ) + matmul2_output = helper.make_tensor_value_info("matmul2_output", TensorProto.FLOAT, [2, 4]) + matmul2_node = onnx.helper.make_node("MatMul", ["input", "matmul2_weight"], ["matmul2_output"], name="Matmul_1") - mod_output = helper.make_tensor_value_info('mod_output', TensorProto.FLOAT, [2, 4]) - mod_node = onnx.helper.make_node('Mod', ['matmul1_output', 'matmul2_output'], ['mod_output'], name='Mod_2') + mod_output = helper.make_tensor_value_info("mod_output", TensorProto.FLOAT, [2, 4]) + mod_node = onnx.helper.make_node("Mod", ["matmul1_output", "matmul2_output"], ["mod_output"], name="Mod_2") - matmul3_weight = helper.make_tensor('matmul3_weight', TensorProto.FLOAT, [4, 2], np.random.random((4, 2)).reshape(8).tolist()) - matmul3_output = helper.make_tensor_value_info('matmul3_output', TensorProto.FLOAT, [2, 2]) - matmul3_node = onnx.helper.make_node('MatMul', ['mod_output', 'matmul3_weight'], ['matmul3_output'], name='Matmul_3') + matmul3_weight = helper.make_tensor( + "matmul3_weight", TensorProto.FLOAT, [4, 2], np.random.random((4, 2)).reshape(8).tolist() + ) + matmul3_output = helper.make_tensor_value_info("matmul3_output", TensorProto.FLOAT, [2, 2]) + matmul3_node = onnx.helper.make_node( + "MatMul", ["mod_output", "matmul3_weight"], ["matmul3_output"], name="Matmul_3" + ) initializers = [matmul1_weight, matmul2_weight, matmul3_weight] - graph = helper.make_graph([matmul1_node, matmul2_node, mod_node, matmul3_node], - 'TestMod_test_model', - [input_tensor], [matmul3_output], initializer=initializers) + graph = helper.make_graph( + [matmul1_node, matmul2_node, mod_node, matmul3_node], + "TestMod_test_model", + [input_tensor], + [matmul3_output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 14)]) model.ir_version = 7 - q_config = {'Matmul_0':self.static_q_config, 'Matmul_1':self.static_q_config, 'Mod_2':self.static_q_config, - 'Matmul_3':self.static_q_config} - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'matmul1_weight': [np.uint8(10.), np.float32(0)], - 'matmul1_output': [np.uint8(10.), np.float32(0)], - 'matmul2_weight': [np.uint8(10.), np.float32(0)], - 'matmul2_output': [np.uint8(10.), np.float32(0)], - 'mod_output': [np.uint8(10.), np.float32(0)], - 'matmul3_weight': [np.uint8(10.), np.float32(0)], - 'matmul3_output': [np.uint8(10.), np.float32(0)], - } - quantizable_op_types = ['MatMul', 'Mod'] + q_config = { + "Matmul_0": self.static_q_config, + "Matmul_1": self.static_q_config, + "Mod_2": self.static_q_config, + "Matmul_3": self.static_q_config, + } + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "matmul1_weight": [np.uint8(10.0), np.float32(0)], + "matmul1_output": [np.uint8(10.0), np.float32(0)], + "matmul2_weight": [np.uint8(10.0), np.float32(0)], + "matmul2_output": [np.uint8(10.0), np.float32(0)], + "mod_output": [np.uint8(10.0), np.float32(0)], + "matmul3_weight": [np.uint8(10.0), np.float32(0)], + "matmul3_output": [np.uint8(10.0), np.float32(0)], + } + quantizable_op_types = ["MatMul", "Mod"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) - q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 8) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 5) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 8) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 5) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) def test_reducemin_reducemax(self): # MatMul-ReduceMin-MatMul - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, [3, 2, 3]) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [3, 2, 3]) - matmul1_weight = helper.make_tensor('matmul1_weight', TensorProto.FLOAT, [3, 2], np.random.random((3, 2)).reshape(6).tolist()) - matmul1_output = helper.make_tensor_value_info('matmul1_output', TensorProto.FLOAT, [3, 2, 2]) - matmul1_node = onnx.helper.make_node('MatMul', ['input', 'matmul1_weight'], ['matmul1_output'], name='Matmul_0') + matmul1_weight = helper.make_tensor( + "matmul1_weight", TensorProto.FLOAT, [3, 2], np.random.random((3, 2)).reshape(6).tolist() + ) + matmul1_output = helper.make_tensor_value_info("matmul1_output", TensorProto.FLOAT, [3, 2, 2]) + matmul1_node = onnx.helper.make_node("MatMul", ["input", "matmul1_weight"], ["matmul1_output"], name="Matmul_0") + + reducemin_output = helper.make_tensor_value_info("reducemin_output", TensorProto.FLOAT, [3, 1, 2]) + reducemin_node = onnx.helper.make_node( + "ReduceMin", + inputs=["matmul1_output"], + outputs=["reducemin_output"], + axes=[1], + keepdims=1, + name="Reducemin_1", + ) - reducemin_output = helper.make_tensor_value_info('reducemin_output', TensorProto.FLOAT, [3, 1, 2]) - reducemin_node = onnx.helper.make_node("ReduceMin", inputs=['matmul1_output'], - outputs=['reducemin_output'], axes=[1], keepdims=1, name='Reducemin_1') - - matmul2_weight = helper.make_tensor('matmul2_weight', TensorProto.FLOAT, [2, 3], np.random.random((2, 3)).reshape(6).tolist()) - matmul2_output = helper.make_tensor_value_info('matmul2_output', TensorProto.FLOAT, [3, 1, 3]) - matmul2_node = onnx.helper.make_node('MatMul', ['reducemin_output', 'matmul2_weight'], ['matmul2_output'], name='Matmul_2') + matmul2_weight = helper.make_tensor( + "matmul2_weight", TensorProto.FLOAT, [2, 3], np.random.random((2, 3)).reshape(6).tolist() + ) + matmul2_output = helper.make_tensor_value_info("matmul2_output", TensorProto.FLOAT, [3, 1, 3]) + matmul2_node = onnx.helper.make_node( + "MatMul", ["reducemin_output", "matmul2_weight"], ["matmul2_output"], name="Matmul_2" + ) initializers = [matmul1_weight, matmul2_weight] - graph = helper.make_graph([matmul1_node, reducemin_node, matmul2_node], - 'TestReduceMin_test_model', - [input_tensor], [matmul2_output], initializer=initializers) + graph = helper.make_graph( + [matmul1_node, reducemin_node, matmul2_node], + "TestReduceMin_test_model", + [input_tensor], + [matmul2_output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 - q_config = {'Matmul_0':self.static_q_config, 'Reducemin_1':self.static_q_config, 'Matmul_2':self.static_q_config} - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'matmul1_weight': [np.uint8(10.), np.float32(0)], - 'matmul1_output': [np.uint8(10.), np.float32(0)], - 'reducemin_output': [np.uint8(10.), np.float32(0)], - 'matmul2_weight': [np.uint8(10.), np.float32(0)], - 'matmul2_output': [np.uint8(10.), np.float32(0)]} - quantizable_op_types = ['MatMul', 'ReduceMin'] + q_config = { + "Matmul_0": self.static_q_config, + "Reducemin_1": self.static_q_config, + "Matmul_2": self.static_q_config, + } + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "matmul1_weight": [np.uint8(10.0), np.float32(0)], + "matmul1_output": [np.uint8(10.0), np.float32(0)], + "reducemin_output": [np.uint8(10.0), np.float32(0)], + "matmul2_weight": [np.uint8(10.0), np.float32(0)], + "matmul2_output": [np.uint8(10.0), np.float32(0)], + } + quantizable_op_types = ["MatMul", "ReduceMin"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 6) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 4) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) self.assertIsNotNone(session) # MatMul-ReduceMax-MatMul - input_tensor = helper.make_tensor_value_info('input', TensorProto.FLOAT, [3, 2, 3]) + input_tensor = helper.make_tensor_value_info("input", TensorProto.FLOAT, [3, 2, 3]) - matmul1_weight = helper.make_tensor('matmul1_weight', TensorProto.FLOAT, [3, 2], np.random.random((3, 2)).reshape(6).tolist()) - matmul1_output = helper.make_tensor_value_info('matmul1_output', TensorProto.FLOAT, [3, 2, 2]) - matmul1_node = onnx.helper.make_node('MatMul', ['input', 'matmul1_weight'], ['matmul1_output'], name='Matmul_0') + matmul1_weight = helper.make_tensor( + "matmul1_weight", TensorProto.FLOAT, [3, 2], np.random.random((3, 2)).reshape(6).tolist() + ) + matmul1_output = helper.make_tensor_value_info("matmul1_output", TensorProto.FLOAT, [3, 2, 2]) + matmul1_node = onnx.helper.make_node("MatMul", ["input", "matmul1_weight"], ["matmul1_output"], name="Matmul_0") + + reducemax_output = helper.make_tensor_value_info("reducemax_output", TensorProto.FLOAT, [3, 1, 2]) + reducemax_node = onnx.helper.make_node( + "ReduceMax", + inputs=["matmul1_output"], + outputs=["reducemax_output"], + axes=[1], + keepdims=1, + name="Reducemax_1", + ) - reducemax_output = helper.make_tensor_value_info('reducemax_output', TensorProto.FLOAT, [3, 1, 2]) - reducemax_node = onnx.helper.make_node("ReduceMax", inputs=['matmul1_output'], - outputs=['reducemax_output'], axes=[1], keepdims=1, name='Reducemax_1') - - matmul2_weight = helper.make_tensor('matmul2_weight', TensorProto.FLOAT, [2, 3], np.random.random((2, 3)).reshape(6).tolist()) - matmul2_output = helper.make_tensor_value_info('matmul2_output', TensorProto.FLOAT, [3, 1, 3]) - matmul2_node = onnx.helper.make_node('MatMul', ['reducemax_output', 'matmul2_weight'], ['matmul2_output'], name='Matmul_2') + matmul2_weight = helper.make_tensor( + "matmul2_weight", TensorProto.FLOAT, [2, 3], np.random.random((2, 3)).reshape(6).tolist() + ) + matmul2_output = helper.make_tensor_value_info("matmul2_output", TensorProto.FLOAT, [3, 1, 3]) + matmul2_node = onnx.helper.make_node( + "MatMul", ["reducemax_output", "matmul2_weight"], ["matmul2_output"], name="Matmul_2" + ) initializers = [matmul1_weight, matmul2_weight] - graph = helper.make_graph([matmul1_node, reducemax_node, matmul2_node], - 'TestReduceMax_test_model', - [input_tensor], [matmul2_output], initializer=initializers) + graph = helper.make_graph( + [matmul1_node, reducemax_node, matmul2_node], + "TestReduceMax_test_model", + [input_tensor], + [matmul2_output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 - q_config = {'Matmul_0':self.static_q_config, 'Reducemax_1':self.static_q_config, 'Matmul_2':self.static_q_config} - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'matmul1_weight': [np.uint8(10.), np.float32(0)], - 'matmul1_output': [np.uint8(10.), np.float32(0)], - 'reducemax_output': [np.uint8(10.), np.float32(0)], - 'matmul2_weight': [np.uint8(10.), np.float32(0)], - 'matmul2_output': [np.uint8(10.), np.float32(0)]} - quantizable_op_types = ['MatMul', 'ReduceMax'] + q_config = { + "Matmul_0": self.static_q_config, + "Reducemax_1": self.static_q_config, + "Matmul_2": self.static_q_config, + } + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "matmul1_weight": [np.uint8(10.0), np.float32(0)], + "matmul1_output": [np.uint8(10.0), np.float32(0)], + "reducemax_output": [np.uint8(10.0), np.float32(0)], + "matmul2_weight": [np.uint8(10.0), np.float32(0)], + "matmul2_output": [np.uint8(10.0), np.float32(0)], + } + quantizable_op_types = ["MatMul", "ReduceMax"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 1) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 1) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 1) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 1) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) q_model = self.qdq_test(model, q_config, quantize_params, quantizable_op_types) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['DequantizeLinear'], 6) - self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])['QuantizeLinear'], 4) - session = ort.InferenceSession(q_model.model.SerializeToString(), providers=['CPUExecutionProvider']) - -class TestCastONNXRT(unittest.TestCase): + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["DequantizeLinear"], 6) + self.assertEqual(Counter([node.op_type for node in q_model.model.graph.node])["QuantizeLinear"], 4) + session = ort.InferenceSession(q_model.model.SerializeToString(), providers=["CPUExecutionProvider"]) + +class TestCastONNXRT(unittest.TestCase): @classmethod def tearDownClass(cls): shutil.rmtree("./nc_workspace", ignore_errors=True) @@ -1359,9 +1525,9 @@ def build_model(self, inps, outs, weights, node_infos): for name, in_name, out_name, type, domain in node_infos: nodes.append(onnx.helper.make_node(type, in_name, out_name, name=name, domain=domain)) - graph = helper.make_graph(nodes, 'test', inputs, outputs, inits) + graph = helper.make_graph(nodes, "test", inputs, outputs, inits) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 15)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 15)]}) return model def build_test_data(self, names, shapes, dtypes): @@ -1373,339 +1539,401 @@ def build_test_data(self, names, shapes, dtypes): def get_fp16_mixed_precision_model(self, model): from neural_compressor import MixedPrecisionConfig from neural_compressor.mix_precision import fit - config = MixedPrecisionConfig(backend='onnxrt_cuda_ep', device='gpu', precision='fp16') + + config = MixedPrecisionConfig(backend="onnxrt_cuda_ep", device="gpu", precision="fp16") converted_model = fit(model, config) return converted_model def test_fp16(self): - optypes = ['Sum', 'Sub', 'Div', 'Pow', 'Add'] + optypes = ["Sum", "Sub", "Div", "Pow", "Add"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,2)]] - weights = [['input2', TensorProto.FLOAT, (1,2), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 2)]] + weights = [["input2", TensorProto.FLOAT, (1, 2), np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['Equal', 'Greater', 'GreaterOrEqual', 'Less', 'LessOrEqual'] + optypes = ["Equal", "Greater", "GreaterOrEqual", "Less", "LessOrEqual"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.BOOL, (1,2)]] - weights = [['input2', TensorProto.FLOAT, (1,2), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.BOOL, (1, 2)]] + weights = [["input2", TensorProto.FLOAT, (1, 2), np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['Abs', 'Exp', 'Log', 'Round', 'Sqrt', 'Softmax', 'Exp', 'Tanh', 'Sigmoid', 'LeakyRelu', 'Round'] + optypes = ["Abs", "Exp", "Log", "Round", "Sqrt", "Softmax", "Exp", "Tanh", "Sigmoid", "LeakyRelu", "Round"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,2)]] - node_infos = [['test', ['input1'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 2)]] + node_infos = [["test", ["input1"], ["output"], optype]] model = self.build_model(inps, outs, [], node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['ReduceMean', 'ReduceL1', 'ReduceL2', 'ReduceLogSum', 'ReduceLogSumExp', 'ReduceMax', 'ReduceProd', \ - 'ReduceSum', 'ReduceSumSquare'] + optypes = [ + "ReduceMean", + "ReduceL1", + "ReduceL2", + "ReduceLogSum", + "ReduceLogSumExp", + "ReduceMax", + "ReduceProd", + "ReduceSum", + "ReduceSumSquare", + ] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,1)]] - node_infos = [['test', ['input1'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 1)]] + node_infos = [["test", ["input1"], ["output"], optype]] model = self.build_model(inps, outs, [], node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['Gelu'] + optypes = ["Gelu"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,2)]] - node_infos = [['test', ['input1'], ['output'], optype, 'com.microsoft']] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 2)]] + node_infos = [["test", ["input1"], ["output"], optype, "com.microsoft"]] model = self.build_model(inps, outs, [], node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['BiasGelu', 'FastGelu'] + optypes = ["BiasGelu", "FastGelu"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, [2]]] - outs = [['output', TensorProto.FLOAT, [2]]] - weights = [['input2', TensorProto.FLOAT, [2], np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype, 'com.microsoft']] + inps = [["input1", TensorProto.FLOAT, [2]]] + outs = [["output", TensorProto.FLOAT, [2]]] + weights = [["input2", TensorProto.FLOAT, [2], np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype, "com.microsoft"]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(2)], ['float32']) + input_data = self.build_test_data(["input1"], [(2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - - optypes = ['MatMul'] + optypes = ["MatMul"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,1)]] - weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 1)]] + weights = [["input2", TensorProto.FLOAT, (2, 1), np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['FusedMatMul'] + optypes = ["FusedMatMul"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,1)]] - weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype, 'com.microsoft']] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 1)]] + weights = [["input2", TensorProto.FLOAT, (2, 1), np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype, "com.microsoft"]] model = self.build_model(inps, outs, weights, node_infos) ort.InferenceSession(model.SerializeToString()) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['Gemm'] + optypes = ["Gemm"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,1)]] - weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))], - ['input3', TensorProto.FLOAT, (1,1), np.random.random((1))]] - node_infos = [['test', ['input1', 'input2', 'input3'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 1)]] + weights = [ + ["input2", TensorProto.FLOAT, (2, 1), np.random.random((2))], + ["input3", TensorProto.FLOAT, (1, 1), np.random.random((1))], + ] + node_infos = [["test", ["input1", "input2", "input3"], ["output"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['LayerNormalization'] + optypes = ["LayerNormalization"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output1', TensorProto.FLOAT, (1,2)], ['output2', TensorProto.FLOAT, (1,2)], ['output3', TensorProto.FLOAT, (1,2)]] - weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))], - ['input3', TensorProto.FLOAT, (2,1), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2', 'input3'], ['output1', 'output2', 'output3'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [ + ["output1", TensorProto.FLOAT, (1, 2)], + ["output2", TensorProto.FLOAT, (1, 2)], + ["output3", TensorProto.FLOAT, (1, 2)], + ] + weights = [ + ["input2", TensorProto.FLOAT, (2, 1), np.random.random((2))], + ["input3", TensorProto.FLOAT, (2, 1), np.random.random((2))], + ] + node_infos = [["test", ["input1", "input2", "input3"], ["output1", "output2", "output3"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) - optypes = ['BatchNormalization'] + optypes = ["BatchNormalization"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, [1, 2]]] - outs = [['output1', TensorProto.FLOAT, [1, 2]]] - weights = [['input2', TensorProto.FLOAT, [2], np.random.random((2))], - ['input3', TensorProto.FLOAT, [2], np.random.random((2))], - ['input4', TensorProto.FLOAT, [2], np.random.random((2))], - ['input5', TensorProto.FLOAT, [2], np.random.random((2))],] - node_infos = [['test', ['input1', 'input2', 'input3', 'input4', 'input5'], ['output1'], optype]] + inps = [["input1", TensorProto.FLOAT, [1, 2]]] + outs = [["output1", TensorProto.FLOAT, [1, 2]]] + weights = [ + ["input2", TensorProto.FLOAT, [2], np.random.random((2))], + ["input3", TensorProto.FLOAT, [2], np.random.random((2))], + ["input4", TensorProto.FLOAT, [2], np.random.random((2))], + ["input5", TensorProto.FLOAT, [2], np.random.random((2))], + ] + node_infos = [["test", ["input1", "input2", "input3", "input4", "input5"], ["output1"], optype]] model = self.build_model(inps, outs, weights, node_infos) ort.InferenceSession(model.SerializeToString()) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_fp16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=ort.get_available_providers()) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(10 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession( + convert_model.model.SerializeToString(), providers=ort.get_available_providers() + ) outputs = session.run(None, input_data) def get_bf16_mixed_precision_model(self, model): from neural_compressor import MixedPrecisionConfig from neural_compressor.mix_precision import fit - config = MixedPrecisionConfig(backend='onnxrt_dnnl_ep', precision='bf16') + + config = MixedPrecisionConfig(backend="onnxrt_dnnl_ep", precision="bf16") converted_model = fit(model, config) return converted_model - @unittest.skipIf(not CpuInfo().bf16 or 'DnnlExecutionProvider' not in ort.get_available_providers(), - "skip since DnnlExecutionProvider is not supported") + @unittest.skipIf( + not CpuInfo().bf16 or "DnnlExecutionProvider" not in ort.get_available_providers(), + "skip since DnnlExecutionProvider is not supported", + ) def test_bf16(self): - optypes = ['Sum', 'Sub', 'Div', 'Pow', 'Add'] + optypes = ["Sum", "Sub", "Div", "Pow", "Add"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,2)]] - weights = [['input2', TensorProto.FLOAT, (1,2), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 2)]] + weights = [["input2", TensorProto.FLOAT, (1, 2), np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['Equal', 'Greater', 'GreaterOrEqual', 'Less', 'LessOrEqual'] + optypes = ["Equal", "Greater", "GreaterOrEqual", "Less", "LessOrEqual"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.BOOL, (1,2)]] - weights = [['input2', TensorProto.FLOAT, (1,2), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.BOOL, (1, 2)]] + weights = [["input2", TensorProto.FLOAT, (1, 2), np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['Abs', 'Exp', 'Log', 'Round', 'Sqrt', 'Softmax', 'Exp', 'Tanh', 'Sigmoid', 'LeakyRelu', 'Round'] + optypes = ["Abs", "Exp", "Log", "Round", "Sqrt", "Softmax", "Exp", "Tanh", "Sigmoid", "LeakyRelu", "Round"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,2)]] - node_infos = [['test', ['input1'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 2)]] + node_infos = [["test", ["input1"], ["output"], optype]] model = self.build_model(inps, outs, [], node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['ReduceMean', 'ReduceL1', 'ReduceL2', 'ReduceLogSum', 'ReduceLogSumExp', 'ReduceMax', 'ReduceProd', \ - 'ReduceSum', 'ReduceSumSquare'] + optypes = [ + "ReduceMean", + "ReduceL1", + "ReduceL2", + "ReduceLogSum", + "ReduceLogSumExp", + "ReduceMax", + "ReduceProd", + "ReduceSum", + "ReduceSumSquare", + ] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,1)]] - node_infos = [['test', ['input1'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 1)]] + node_infos = [["test", ["input1"], ["output"], optype]] model = self.build_model(inps, outs, [], node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['Gelu'] + optypes = ["Gelu"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,2)]] - node_infos = [['test', ['input1'], ['output'], optype, 'com.microsoft']] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 2)]] + node_infos = [["test", ["input1"], ["output"], optype, "com.microsoft"]] model = self.build_model(inps, outs, [], node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['BiasGelu', 'FastGelu'] + optypes = ["BiasGelu", "FastGelu"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, [2]]] - outs = [['output', TensorProto.FLOAT, [2]]] - weights = [['input2', TensorProto.FLOAT, [2], np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype, 'com.microsoft']] + inps = [["input1", TensorProto.FLOAT, [2]]] + outs = [["output", TensorProto.FLOAT, [2]]] + weights = [["input2", TensorProto.FLOAT, [2], np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype, "com.microsoft"]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(2)], ['float32']) + input_data = self.build_test_data(["input1"], [(2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - - optypes = ['MatMul'] + optypes = ["MatMul"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,1)]] - weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 1)]] + weights = [["input2", TensorProto.FLOAT, (2, 1), np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['FusedMatMul'] + optypes = ["FusedMatMul"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,1)]] - weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2'], ['output'], optype, 'com.microsoft']] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 1)]] + weights = [["input2", TensorProto.FLOAT, (2, 1), np.random.random((2))]] + node_infos = [["test", ["input1", "input2"], ["output"], optype, "com.microsoft"]] model = self.build_model(inps, outs, weights, node_infos) ort.InferenceSession(model.SerializeToString()) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['Gemm'] + optypes = ["Gemm"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output', TensorProto.FLOAT, (1,1)]] - weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))], - ['input3', TensorProto.FLOAT, (1,1), np.random.random((1))]] - node_infos = [['test', ['input1', 'input2', 'input3'], ['output'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [["output", TensorProto.FLOAT, (1, 1)]] + weights = [ + ["input2", TensorProto.FLOAT, (2, 1), np.random.random((2))], + ["input3", TensorProto.FLOAT, (1, 1), np.random.random((1))], + ] + node_infos = [["test", ["input1", "input2", "input3"], ["output"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['LayerNormalization'] + optypes = ["LayerNormalization"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, (1,2)]] - outs = [['output1', TensorProto.FLOAT, (1,2)], ['output2', TensorProto.FLOAT, (1,2)], ['output3', TensorProto.FLOAT, (1,2)]] - weights = [['input2', TensorProto.FLOAT, (2,1), np.random.random((2))], - ['input3', TensorProto.FLOAT, (2,1), np.random.random((2))]] - node_infos = [['test', ['input1', 'input2', 'input3'], ['output1', 'output2', 'output3'], optype]] + inps = [["input1", TensorProto.FLOAT, (1, 2)]] + outs = [ + ["output1", TensorProto.FLOAT, (1, 2)], + ["output2", TensorProto.FLOAT, (1, 2)], + ["output3", TensorProto.FLOAT, (1, 2)], + ] + weights = [ + ["input2", TensorProto.FLOAT, (2, 1), np.random.random((2))], + ["input3", TensorProto.FLOAT, (2, 1), np.random.random((2))], + ] + node_infos = [["test", ["input1", "input2", "input3"], ["output1", "output2", "output3"], optype]] model = self.build_model(inps, outs, weights, node_infos) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) - optypes = ['BatchNormalization'] + optypes = ["BatchNormalization"] for optype in optypes: - inps = [['input1', TensorProto.FLOAT, [1, 2]]] - outs = [['output1', TensorProto.FLOAT, [1, 2]]] - weights = [['input2', TensorProto.FLOAT, [2], np.random.random((2))], - ['input3', TensorProto.FLOAT, [2], np.random.random((2))], - ['input4', TensorProto.FLOAT, [2], np.random.random((2))], - ['input5', TensorProto.FLOAT, [2], np.random.random((2))],] - node_infos = [['test', ['input1', 'input2', 'input3', 'input4', 'input5'], ['output1'], optype]] + inps = [["input1", TensorProto.FLOAT, [1, 2]]] + outs = [["output1", TensorProto.FLOAT, [1, 2]]] + weights = [ + ["input2", TensorProto.FLOAT, [2], np.random.random((2))], + ["input3", TensorProto.FLOAT, [2], np.random.random((2))], + ["input4", TensorProto.FLOAT, [2], np.random.random((2))], + ["input5", TensorProto.FLOAT, [2], np.random.random((2))], + ] + node_infos = [["test", ["input1", "input2", "input3", "input4", "input5"], ["output1"], optype]] model = self.build_model(inps, outs, weights, node_infos) ort.InferenceSession(model.SerializeToString()) - input_data = self.build_test_data(['input1'], [(1,2)], ['float32']) + input_data = self.build_test_data(["input1"], [(1, 2)], ["float32"]) convert_model = self.get_bf16_mixed_precision_model(model) - self.assertTrue('Cast' in set([i.op_type for i in convert_model.nodes()])) - self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == 'Cast'])) - session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=['DnnlExecutionProvider']) + self.assertTrue("Cast" in set([i.op_type for i in convert_model.nodes()])) + self.assertTrue(16 in set([i.attribute[0].i for i in convert_model.nodes() if i.op_type == "Cast"])) + session = ort.InferenceSession(convert_model.model.SerializeToString(), providers=["DnnlExecutionProvider"]) outputs = session.run(None, input_data) diff --git a/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py b/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py index e94adabcd40..66536839b9f 100644 --- a/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py +++ b/test/adaptor/onnxrt_adaptor/test_weight_only_adaptor.py @@ -1,41 +1,49 @@ import os -import onnx import shutil import subprocess import unittest + import numpy as np +import onnx import onnxruntime as ort from transformers import AutoTokenizer -from neural_compressor import quantization, PostTrainingQuantConfig + +from neural_compressor import PostTrainingQuantConfig, quantization + def Inference(model, data): sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_all_providers()) out = sess.run(None, data) return out + class DummyNLPDataloader(object): def __init__(self, model_name): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.sequence_a = "intel-extension-for-transformers is based in SH" self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH" - self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors='pt') - self.encoded_dict['labels'] = 1 + self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors="pt") + self.encoded_dict["labels"] = 1 self.batch_size = 1 def __iter__(self): - yield {'input_ids': self.encoded_dict['input_ids'].detach().cpu().numpy(), - 'attention_mask': self.encoded_dict['attention_mask'].detach().cpu().numpy()}, self.encoded_dict['labels'] + yield { + "input_ids": self.encoded_dict["input_ids"].detach().cpu().numpy(), + "attention_mask": self.encoded_dict["attention_mask"].detach().cpu().numpy(), + }, self.encoded_dict["labels"] -class TestWeightOnlyAdaptor(unittest.TestCase): +class TestWeightOnlyAdaptor(unittest.TestCase): @classmethod def setUpClass(self): - cmd = 'optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation gptj/' - p = subprocess.Popen(cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) # nosec + cmd = "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation gptj/" + p = subprocess.Popen( + cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) # nosec p.communicate() - self.model = onnx.load('gptj/decoder_model.onnx') - self.dataloader = DummyNLPDataloader('hf-internal-testing/tiny-random-gptj') + self.model = onnx.load("gptj/decoder_model.onnx") + self.dataloader = DummyNLPDataloader("hf-internal-testing/tiny-random-gptj") @classmethod def tearDownClass(self): @@ -43,9 +51,8 @@ def tearDownClass(self): shutil.rmtree("gptj", ignore_errors=True) def test_RTN_quant(self): - conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", ) q_model = quantization.fit(self.model, conf) for data, _ in self.dataloader: @@ -55,14 +62,14 @@ def test_RTN_quant(self): self.assertTrue((np.abs(q_out[0] - org_out[0]) < 0.5).all()) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 8, # 1-8 bits - 'group_size': -1, # -1 (per-channel) - 'scheme': 'sym', - 'algorithm': 'RTN', + "bits": 8, # 1-8 bits + "group_size": -1, # -1 (per-channel) + "scheme": "sym", + "algorithm": "RTN", }, }, }, @@ -75,21 +82,20 @@ def test_RTN_quant(self): self.assertTrue((np.abs(q_out[0] - org_out[0]) < 0.5).all()) def test_AWQ_quant(self): - conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': -1, # -1 (per-channel) - 'scheme': 'sym', - 'algorithm': 'AWQ', + "bits": 4, # 1-8 bits + "group_size": -1, # -1 (per-channel) + "scheme": "sym", + "algorithm": "AWQ", }, }, }, recipes={ - 'awq_args':{'auto_scale': True, 'mse_range': True}, + "awq_args": {"auto_scale": True, "mse_range": True}, }, ) q_model = quantization.fit(self.model, conf, calib_dataloader=self.dataloader) @@ -100,19 +106,19 @@ def test_AWQ_quant(self): self.assertTrue((np.abs(q_out[0] - org_out[0]) < 0.5).all()) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, - 'scheme': 'sym', - 'algorithm': 'AWQ', + "bits": 4, # 1-8 bits + "group_size": 32, + "scheme": "sym", + "algorithm": "AWQ", }, }, }, recipes={ - 'awq_args':{'auto_scale': False, 'mse_range': True}, + "awq_args": {"auto_scale": False, "mse_range": True}, }, ) q_model = quantization.fit(self.model, conf, calib_dataloader=self.dataloader) @@ -123,19 +129,19 @@ def test_AWQ_quant(self): self.assertTrue((np.abs(q_out[0] - org_out[0]) < 0.5).all()) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, - 'scheme': 'asym', - 'algorithm': 'AWQ', + "bits": 4, # 1-8 bits + "group_size": 32, + "scheme": "asym", + "algorithm": "AWQ", }, }, }, recipes={ - 'awq_args':{'auto_scale': True, 'mse_range': False}, + "awq_args": {"auto_scale": True, "mse_range": False}, }, ) q_model = quantization.fit(self.model, conf, calib_dataloader=self.dataloader) @@ -146,16 +152,15 @@ def test_AWQ_quant(self): self.assertTrue((np.abs(q_out[0] - org_out[0]) < 0.5).all()) def test_GPTQ_quant(self): - conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': -1, # -1 (per-channel) - 'scheme': 'sym', - 'algorithm': 'GPTQ', + "bits": 4, # 1-8 bits + "group_size": -1, # -1 (per-channel) + "scheme": "sym", + "algorithm": "GPTQ", }, }, }, diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py index c53eedeb1db..571a58d3dd8 100644 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1.x.py @@ -1,24 +1,27 @@ import copy -import neural_compressor.adaptor.pytorch as nc_torch -import numpy as np import os import pickle import shutil +import unittest + +import numpy as np import torch import torch.nn as nn import torch.nn.quantized as nnq -import unittest +from packaging.version import Version +from torch.quantization import DeQuantStub, QuantStub + +import neural_compressor.adaptor.pytorch as nc_torch from neural_compressor.adaptor import FRAMEWORKS -from neural_compressor.model import MODELS -from neural_compressor.experimental import Quantization, common from neural_compressor.conf.config import QuantConf +from neural_compressor.experimental import Quantization, common +from neural_compressor.model import MODELS from neural_compressor.utils.pytorch import load -from neural_compressor.utils.utility import recover -from neural_compressor.utils.utility import LazyImport -from torch.quantization import QuantStub, DeQuantStub -from packaging.version import Version +from neural_compressor.utils.utility import LazyImport, recover + try: import intel_extension_for_pytorch as ipex + IPEX = True except: IPEX = False @@ -34,7 +37,7 @@ FX_MODE = False -fake_dyn_yaml = ''' +fake_dyn_yaml = """ model: name: imagenet framework: pytorch @@ -63,10 +66,10 @@ random_seed: 9527 workspace: path: saved - ''' + """ -fake_ptq_yaml = ''' +fake_ptq_yaml = """ model: name: imagenet framework: pytorch @@ -111,9 +114,9 @@ random_seed: 9527 workspace: path: saved - ''' + """ -fake_auto_yaml = ''' +fake_auto_yaml = """ model: name: imagenet framework: pytorch_fx @@ -137,10 +140,10 @@ random_seed: 9527 workspace: path: saved - ''' + """ -fake_ptq_yaml_for_fx = ''' +fake_ptq_yaml_for_fx = """ model: name: imagenet framework: pytorch_fx @@ -193,10 +196,10 @@ random_seed: 9527 workspace: path: saved - ''' + """ -fake_qat_yaml = ''' +fake_qat_yaml = """ model: name: imagenet framework: pytorch @@ -247,40 +250,42 @@ random_seed: 9527 workspace: path: saved - ''' + """ def build_pytorch_yaml(): - with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: + with open("ptq_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_ptq_yaml) - with open('dynamic_yaml.yaml', 'w', encoding="utf-8") as f: + with open("dynamic_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_dyn_yaml) - with open('qat_yaml.yaml', 'w', encoding="utf-8") as f: + with open("qat_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_qat_yaml) - with open('auto_yaml.yaml', 'w', encoding="utf-8") as f: + with open("auto_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_auto_yaml) + def build_pytorch_fx_yaml(): if PT_VERSION >= Version("1.9.0").release: fake_fx_ptq_yaml = fake_ptq_yaml_for_fx else: - fake_fx_ptq_yaml = fake_ptq_yaml.replace('pytorch', 'pytorch_fx') - with open('fx_ptq_yaml.yaml', 'w', encoding="utf-8") as f: + fake_fx_ptq_yaml = fake_ptq_yaml.replace("pytorch", "pytorch_fx") + with open("fx_ptq_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_fx_ptq_yaml) - fake_fx_dyn_yaml = fake_dyn_yaml.replace('pytorch', 'pytorch_fx') - with open('fx_dynamic_yaml.yaml', 'w', encoding="utf-8") as f: + fake_fx_dyn_yaml = fake_dyn_yaml.replace("pytorch", "pytorch_fx") + with open("fx_dynamic_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_fx_dyn_yaml) - fake_fx_qat_yaml = fake_qat_yaml.replace('pytorch', 'pytorch_fx') - with open('fx_qat_yaml.yaml', 'w', encoding="utf-8") as f: + fake_fx_qat_yaml = fake_qat_yaml.replace("pytorch", "pytorch_fx") + with open("fx_qat_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_fx_qat_yaml) + def build_dump_tensors_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: imagenet framework: pytorch @@ -299,8 +304,8 @@ def build_dump_tensors_yaml(): workspace: path: saved tensorboard: true - ''' - with open('dump_yaml.yaml', 'w', encoding="utf-8") as f: + """ + with open("dump_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) @@ -336,6 +341,7 @@ class DynamicModel(torch.nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv2d(1, 1, 1) + def forward(self, x): if x is not None: x = self.conv(x) @@ -394,6 +400,7 @@ def forward(self, x): x = self.dequant(x) return x + class DynamicControlModel(torch.nn.Module): def __init__(self): super().__init__() @@ -416,7 +423,7 @@ def forward(self, x): class LSTMModel(nn.Module): - '''Container module with an encoder, a recurrent module, and a decoder.''' + """Container module with an encoder, a recurrent module, and a decoder.""" def __init__(self, ntoken=10, ninp=512, nhid=256, nlayers=5, dropout=0.5): super(LSTMModel, self).__init__() @@ -471,15 +478,17 @@ def q_func(model): class TestPytorchAdaptor(unittest.TestCase): - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "workspace_path": "./"} + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": "./", + } framework = "pytorch" adaptor = FRAMEWORKS[framework](framework_specific_info) model = q_resnet18() - nc_model = MODELS['pytorch'](model) + nc_model = MODELS["pytorch"](model) @classmethod def setUpClass(self): @@ -488,13 +497,13 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('ptq_yaml.yaml') - os.remove('dynamic_yaml.yaml') - os.remove('qat_yaml.yaml') - os.remove('dump_yaml.yaml') - os.remove('auto_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("ptq_yaml.yaml") + os.remove("dynamic_yaml.yaml") + os.remove("qat_yaml.yaml") + os.remove("dump_yaml.yaml") + os.remove("auto_yaml.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_get_all_weight_name(self): assert len(list(self.nc_model.get_all_weight_names())) == 62 @@ -506,38 +515,34 @@ def test_get_weight(self): if name == "fc.bias": param.data.fill_(0.1) assert int(torch.sum(self.nc_model.get_weight("layer4.1.conv2.weight"))) == 0 - assert torch.allclose( - torch.sum( - self.nc_model.get_weight("fc.bias")), - torch.tensor(100.)) + assert torch.allclose(torch.sum(self.nc_model.get_weight("fc.bias")), torch.tensor(100.0)) def test_get_input(self): - model = MODELS['pytorch'](q_resnet18()) + model = MODELS["pytorch"](q_resnet18()) model.model.eval().fuse_model() model.register_forward_pre_hook() rand_input = torch.rand(100, 3, 224, 224).float() model.model(rand_input) - assert torch.equal(model.get_inputs('x'), rand_input) + assert torch.equal(model.get_inputs("x"), rand_input) model.remove_hooks() def test_update_weights(self): - self.nc_model.update_weights('fc.bias', torch.zeros([1000])) + self.nc_model.update_weights("fc.bias", torch.zeros([1000])) assert int(torch.sum(self.nc_model.get_weight("fc.bias"))) == 0 def test_get_gradient(self): with self.assertRaises(AssertionError): - self.nc_model.get_gradient('fc.bias') + self.nc_model.get_gradient("fc.bias") for name, tensor in self.nc_model._model.named_parameters(): - if name == 'fc.bias': + if name == "fc.bias": tensor.grad = torch.zeros_like(tensor) break - assert torch.equal(torch.Tensor(self.nc_model.get_gradient('fc.bias')), torch.zeros_like(tensor)) + assert torch.equal(torch.Tensor(self.nc_model.get_gradient("fc.bias")), torch.zeros_like(tensor)) rand_input = torch.rand(100, 3, 224, 224).float() rand_input.grad = torch.ones_like(rand_input) - assert torch.equal(torch.Tensor(self.nc_model.get_gradient(rand_input)), - torch.ones_like(rand_input)) + assert torch.equal(torch.Tensor(self.nc_model.get_gradient(rand_input)), torch.ones_like(rand_input)) def test_report_sparsity(self): df, total_sparsity = self.nc_model.report_sparsity() @@ -545,109 +550,111 @@ def test_report_sparsity(self): self.assertTrue(len(df) == 22) def test_quantization_saved(self): - for fake_yaml in ['dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml']: + for fake_yaml in ["dynamic_yaml.yaml", "qat_yaml.yaml", "ptq_yaml.yaml"]: model = M() quantizer = Quantization(fake_yaml) - quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) quantizer.model = model quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer.fit() eval_func(q_model) - q_model.save('./saved') + q_model.save("./saved") # Load configure and weights by neural_compressor.utils saved_model = load("./saved", model) eval_func(saved_model) # recover int8 model from history - history_file = './saved/history.snapshot' + history_file = "./saved/history.snapshot" model_recover = recover(model, history_file, 0) eval_func(model_recover) - self.assertEqual(type(saved_model.conv), \ - type(model_recover.conv)) - shutil.rmtree('./saved', ignore_errors=True) + self.assertEqual(type(saved_model.conv), type(model_recover.conv)) + shutil.rmtree("./saved", ignore_errors=True) from neural_compressor.experimental import Benchmark - evaluator = Benchmark('ptq_yaml.yaml') + + evaluator = Benchmark("ptq_yaml.yaml") # Load configure and weights by neural_compressor.model evaluator.model = model evaluator.b_dataloader = common.DataLoader(dataset) - evaluator.fit('accuracy') + evaluator.fit("accuracy") - for fake_yaml in ['qat_yaml.yaml', 'ptq_yaml.yaml']: + for fake_yaml in ["qat_yaml.yaml", "ptq_yaml.yaml"]: model = copy.deepcopy(self.model) - if fake_yaml == 'ptq_yaml.yaml': + if fake_yaml == "ptq_yaml.yaml": model.eval().fuse_model() conf = QuantConf(fake_yaml) quantizer = Quantization(conf) - dataset = quantizer.dataset('dummy', (100, 3, 224, 224)) + dataset = quantizer.dataset("dummy", (100, 3, 224, 224)) quantizer.model = model - if fake_yaml == 'qat_yaml.yaml': + if fake_yaml == "qat_yaml.yaml": quantizer.q_func = q_func else: quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_func = eval_func q_model = quantizer.fit() - q_model.save('./saved') + q_model.save("./saved") # Load configure and weights by neural_compressor.utils saved_model = load("./saved", model) eval_func(saved_model) - shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) def test_quantization_new_saved(self): - for fake_yaml in ['dynamic_yaml.yaml', 'qat_yaml.yaml', 'ptq_yaml.yaml']: + for fake_yaml in ["dynamic_yaml.yaml", "qat_yaml.yaml", "ptq_yaml.yaml"]: model = M() quantizer = Quantization(fake_yaml) - quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) quantizer.model = model quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer.fit() eval_func(q_model) - torch.save(q_model.quantized_state_dict(), './saved/model.pt') + torch.save(q_model.quantized_state_dict(), "./saved/model.pt") # Load configure and weights by neural_compressor.utils from neural_compressor.experimental.common import Model + common_model = Model(model) - common_model.load_quantized_state_dict(torch.load('./saved/model.pt')) + common_model.load_quantized_state_dict(torch.load("./saved/model.pt")) eval_func(common_model) - self.assertEqual(type(q_model._model.linear), \ - type(common_model._model.linear)) - shutil.rmtree('./saved', ignore_errors=True) + self.assertEqual(type(q_model._model.linear), type(common_model._model.linear)) + shutil.rmtree("./saved", ignore_errors=True) @unittest.skipIf(IPEX, "this function is affected by IPEX, Fixing now.") def test_non_quant_module(self): - for fake_yaml in ['qat_yaml.yaml', 'ptq_yaml.yaml']: + for fake_yaml in ["qat_yaml.yaml", "ptq_yaml.yaml"]: model = PartialQuantModel() conf = QuantConf(fake_yaml) quantizer = Quantization(conf) - dataset = quantizer.dataset('dummy', (1, 3, 224, 224)) - non_quant_dict = {'non_quant_module_name': ['conv', 'conv1', 'sub.conv'], \ - 'non_quant_module_class': ['BatchNorm2d', 'FP32Model']} + dataset = quantizer.dataset("dummy", (1, 3, 224, 224)) + non_quant_dict = { + "non_quant_module_name": ["conv", "conv1", "sub.conv"], + "non_quant_module_class": ["BatchNorm2d", "FP32Model"], + } quantizer.model = common.Model(model, **non_quant_dict) - if fake_yaml == 'qat_yaml.yaml': + if fake_yaml == "qat_yaml.yaml": quantizer.q_func = q_func else: quantizer.calib_func = eval_func quantizer.eval_func = eval_func q_model = quantizer.fit() - q_model.save('./saved') + q_model.save("./saved") saved_model = load("./saved", model, **non_quant_dict) eval_func(saved_model) - shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) def test_auto_quant(self): def eval_func(model): return 1 model_origin = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 2, + ntoken=10, + ninp=512, + nhid=256, + nlayers=2, ) # run fx_quant in neural_compressor and save the quantized GraphModule - quantizer = Quantization('auto_yaml.yaml') - dataset = quantizer.dataset('dummy', (3, 10), label=True) + quantizer = Quantization("auto_yaml.yaml") + dataset = quantizer.dataset("dummy", (3, 10), label=True) quantizer.eval_func = eval_func quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = common.Model(model_origin) @@ -656,92 +663,105 @@ def eval_func(model): def test_workspace_path(self): model = M() - quantizer = Quantization('ptq_yaml.yaml') - quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer = Quantization("ptq_yaml.yaml") + quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) quantizer.model = model quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer.fit() eval_func(q_model) - torch.save(q_model.quantized_state_dict(), './saved/best_model.pt') + torch.save(q_model.quantized_state_dict(), "./saved/best_model.pt") # Load configure and weights by workspace_path from neural_compressor.experimental.common import Model + common_model = Model(model) - common_model.workspace_path = './saved' + common_model.workspace_path = "./saved" eval_func(common_model) - self.assertEqual(type(q_model._model.linear), \ - type(common_model._model.linear)) - shutil.rmtree('./saved', ignore_errors=True) + self.assertEqual(type(q_model._model.linear), type(common_model._model.linear)) + shutil.rmtree("./saved", ignore_errors=True) def test_get_graph_info(self): from neural_compressor.model.torch_model import PyTorchModel + model = PyTorchModel(self.model) op_map = model.graph_info - self.assertTrue(op_map['conv1'] == 'Conv2d') + self.assertTrue(op_map["conv1"] == "Conv2d") def test_tensorboard(self): model = copy.deepcopy(self.nc_model) model.model.eval().fuse_model() - quantizer = Quantization('dump_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer = Quantization("dump_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) quantizer.model = model.model quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_func = eval_func quantizer.fit() - self.assertTrue(True if os.path.exists('runs/eval/baseline_acc0.0') else False) + self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.eval_func = None quantizer.fit() - self.assertTrue(True if os.path.exists('runs/eval/baseline_acc0.0') else False) + self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False) def test_tensor_dump_and_set(self): model = copy.deepcopy(self.nc_model) model.model.eval().fuse_model() - quantizer = Quantization('ptq_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer = Quantization("ptq_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) dataloader = common.DataLoader(dataset) - dataloader = common._generate_common_dataloader(dataloader, 'pytorch') + dataloader = common._generate_common_dataloader(dataloader, "pytorch") quantizer.eval_dataloader = dataloader quantizer.calib_dataloader = dataloader quantizer.model = model.model q_model = quantizer.fit() quantizer.strategy.adaptor.inspect_tensor( - model, dataloader, op_list=['conv1.0', 'layer1.0.conv1.0'], - iteration_list=[1, 2], inspect_type='all', save_to_disk=True) - with open('saved/inspect_result.pkl', 'rb') as fp: + model, + dataloader, + op_list=["conv1.0", "layer1.0.conv1.0"], + iteration_list=[1, 2], + inspect_type="all", + save_to_disk=True, + ) + with open("saved/inspect_result.pkl", "rb") as fp: tensor_dict = pickle.load(fp) a = tensor_dict["activation"][0] w = tensor_dict["weight"] if PT_VERSION >= Version("1.8.0").release: - self.assertTrue(w['conv1.0']['conv1.0.weight'].shape[0] == - a['conv1.0']['conv1.0.output0'].shape[1]) + self.assertTrue(w["conv1.0"]["conv1.0.weight"].shape[0] == a["conv1.0"]["conv1.0.output0"].shape[1]) else: - self.assertTrue(w['conv1.0']['conv1.0.weight'].shape[0] == - a['conv1.0']['conv1.1.output0'].shape[1]) - data = np.random.random(w['conv1.0']['conv1.0.weight'].shape).astype(np.float32) - quantizer.strategy.adaptor.set_tensor(q_model, {'conv1.0.weight': data}) - changed_tensor = q_model.get_weight('conv1.weight') + self.assertTrue(w["conv1.0"]["conv1.0.weight"].shape[0] == a["conv1.0"]["conv1.1.output0"].shape[1]) + data = np.random.random(w["conv1.0"]["conv1.0.weight"].shape).astype(np.float32) + quantizer.strategy.adaptor.set_tensor(q_model, {"conv1.0.weight": data}) + changed_tensor = q_model.get_weight("conv1.weight") scales = changed_tensor.q_per_channel_scales() changed_tensor_fp32 = torch.dequantize(changed_tensor) self.assertTrue(np.allclose(data, changed_tensor_fp32.numpy(), atol=2 / np.min(scales.numpy()))) quantizer.strategy.adaptor.inspect_tensor( - q_model, dataloader, op_list=['conv1.0', 'layer1.0.conv1.0'], - iteration_list=[1, 2], inspect_type='all', save_to_disk=False) + q_model, + dataloader, + op_list=["conv1.0", "layer1.0.conv1.0"], + iteration_list=[1, 2], + inspect_type="all", + save_to_disk=False, + ) def test_forward_wrapper(self): vision_model = resnet18() + class dummymodel(torch.nn.Module): def __init__(self, model): super(dummymodel, self).__init__() self._model = model - def forward(self,input=None): + + def forward(self, input=None): return self._model(input) - data = [[{'input': torch.rand(3,224,224)}, torch.ones(1,1)], ] + data = [ + [{"input": torch.rand(3, 224, 224)}, torch.ones(1, 1)], + ] # dataloader.batch_size=100 dataloader = common.DataLoader(data, batch_size=1) - quantizer = Quantization('dynamic_yaml.yaml') + quantizer = Quantization("dynamic_yaml.yaml") model = dummymodel(vision_model) quantizer.model = model quantizer.calib_dataloader = dataloader @@ -777,7 +797,7 @@ def forward(self, x): return w model = ModelWithFunctionals() - model = MODELS['pytorch'](model) + model = MODELS["pytorch"](model) x = torch.rand(10, 1, dtype=torch.float) y = model.model(x) fallback_ops = [] @@ -789,8 +809,7 @@ def forward(self, x): model.model.quant.qconfig = torch.quantization.default_qconfig if PT_VERSION >= Version("1.8.0").release: model.model.dequant.qconfig = torch.quantization.default_qconfig - nc_torch._fallback_quantizable_ops_recursively( - model.model, '', fallback_ops, op_qcfgs={}) + nc_torch._fallback_quantizable_ops_recursively(model.model, "", fallback_ops, op_qcfgs={}) if PT_VERSION >= Version("2.0.0").release: from torch.quantization.quantize import _add_observer_ as add_observer_ else: @@ -799,153 +818,177 @@ def forward(self, x): model.model(x) torch.quantization.convert(model.model, self.adaptor.q_mapping, inplace=True) qy = model.model(x) - tol = {'atol': 1e-01, 'rtol': 1e-03} + tol = {"atol": 1e-01, "rtol": 1e-03} self.assertTrue(np.allclose(y, qy, **tol)) + @unittest.skipIf(not FX_MODE, "Unsupport Fx Mode with PyTorch Version Below 1.8") class TestPytorchFXAdaptor(unittest.TestCase): - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "workspace_path": "./"} + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": "./", + } framework = "pytorch_fx" adaptor = FRAMEWORKS[framework](framework_specific_info) + @classmethod def setUpClass(self): build_pytorch_fx_yaml() @classmethod def tearDownClass(self): - os.remove('fx_ptq_yaml.yaml') - os.remove('fx_dynamic_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fx_ptq_yaml.yaml") + os.remove("fx_dynamic_yaml.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_fx_quant(self): - for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml']: + for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]: model_origin = resnet18() # run fx_quant in neural_compressor and save the quantized GraphModule quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (10, 3, 224, 224), label=True) + dataset = quantizer.dataset("dummy", (10, 3, 224, 224), label=True) quantizer.eval_func = eval_func - if fake_yaml == 'fx_qat_yaml.yaml': + if fake_yaml == "fx_qat_yaml.yaml": quantizer.q_func = q_func else: quantizer.calib_func = eval_func dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = dataloader - quantizer.model = common.Model(model_origin, - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + quantizer.model = common.Model( + model_origin, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) q_model = quantizer.fit() - q_model.save('./saved') + q_model.save("./saved") # Load configure and weights with neural_compressor.utils - model_fx = load('./saved', model_origin, - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + model_fx = load( + "./saved", + model_origin, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) # recover int8 model with only tune_cfg - history_file = './saved/history.snapshot' - model_fx_recover = recover(model_origin, history_file, 0, - **{'prepare_custom_config_dict': - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': - {'preserved_attributes': []} - }) + history_file = "./saved/history.snapshot" + model_fx_recover = recover( + model_origin, + history_file, + 0, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) self.assertEqual(model_fx.code, model_fx_recover.code) - shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) - for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml']: + for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]: model_origin = M() # run fx_quant in neural_compressor and save the quantized GraphModule quantizer = Quantization(fake_yaml) - quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True - dataset = quantizer.dataset('dummy', (10, 3, 224, 224), label=True) + quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True + dataset = quantizer.dataset("dummy", (10, 3, 224, 224), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = common.Model(model_origin, - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + quantizer.model = common.Model( + model_origin, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) q_model = quantizer.fit() - q_model.save('./saved') + q_model.save("./saved") # Load configure and weights with neural_compressor.utils - model_fx = load('./saved', model_origin, - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []}, \ - 'dataloader': quantizer.calib_dataloader - }) + model_fx = load( + "./saved", + model_origin, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + "dataloader": quantizer.calib_dataloader, + } + ) self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) - shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) - @unittest.skipIf(PT_VERSION < Version("1.9.0").release, - "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend") + @unittest.skipIf( + PT_VERSION < Version("1.9.0").release, + "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend", + ) def test_fx_dynamic_quant(self): model = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 5, + ntoken=10, + ninp=512, + nhid=256, + nlayers=5, ) # run fx_quant in neural_compressor and save the quantized GraphModule model.eval() - quantizer = Quantization('fx_dynamic_yaml.yaml') - quantizer.model = common.Model(copy.deepcopy(model), - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + quantizer = Quantization("fx_dynamic_yaml.yaml") + quantizer.model = common.Model( + copy.deepcopy(model), + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) q_model = quantizer.fit() - q_model.save('./saved') + q_model.save("./saved") # Load configure and weights by neural_compressor.utils - model_fx = load("./saved", copy.deepcopy(model), - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + model_fx = load( + "./saved", + copy.deepcopy(model), + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) # Test the functionality of older model saving type state_dict = torch.load("./saved/best_model.pt") - tune_cfg = state_dict.pop('best_configure') + tune_cfg = state_dict.pop("best_configure") import yaml - with open("./saved/best_configure.yaml", 'w') as f: + + with open("./saved/best_configure.yaml", "w") as f: yaml.dump(tune_cfg, f, default_flow_style=False) torch.save(state_dict, "./saved/best_model_weights.pt") - os.remove('./saved/best_model.pt') - model_fx = load("./saved", copy.deepcopy(model), - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + os.remove("./saved/best_model.pt") + model_fx = load( + "./saved", + copy.deepcopy(model), + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) # recover int8 model with only tune_cfg - history_file = './saved/history.snapshot' - model_fx_recover = recover(model, history_file, 0, - **{'prepare_custom_config_dict': - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': - {'preserved_attributes': []} - }) + history_file = "./saved/history.snapshot" + model_fx_recover = recover( + model, + history_file, + 0, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) self.assertEqual(model_fx.code, model_fx_recover.code) - shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) def test_default_dynamic_quant(self): def eval_func(model): @@ -955,69 +998,75 @@ def q_func(model): return model # Model Definition - for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml']: + for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]: model_origin = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 2, + ntoken=10, + ninp=512, + nhid=256, + nlayers=2, ) # run fx_quant in neural_compressor and save the quantized GraphModule quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (3, 10), label=True) + dataset = quantizer.dataset("dummy", (3, 10), label=True) quantizer.eval_func = eval_func - if fake_yaml == 'fx_qat_yaml.yaml': + if fake_yaml == "fx_qat_yaml.yaml": quantizer.q_func = q_func quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = common.Model(model_origin) q_model = quantizer.fit() - self.assertTrue('quantize' in str(type(q_model.model.encoder))) - self.assertTrue('quantize' in str(type(q_model.model.rnn))) + self.assertTrue("quantize" in str(type(q_model.model.encoder))) + self.assertTrue("quantize" in str(type(q_model.model.rnn))) def test_fx_sub_module_quant(self): - for fake_yaml in ['fx_qat_yaml.yaml', 'fx_dynamic_yaml.yaml', 'fx_ptq_yaml.yaml']: + for fake_yaml in ["fx_qat_yaml.yaml", "fx_dynamic_yaml.yaml", "fx_ptq_yaml.yaml"]: model_origin = DynamicControlModel() # run fx_quant in neural_compressor and save the quantized GraphModule quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True) quantizer.eval_func = eval_func - if fake_yaml == 'fx_qat_yaml.yaml': + if fake_yaml == "fx_qat_yaml.yaml": quantizer.q_func = q_func quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.model = common.Model(model_origin, - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + quantizer.model = common.Model( + model_origin, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) q_model = quantizer.fit() - q_model.save('./saved') + q_model.save("./saved") # Load configure and weights with neural_compressor.utils - model_fx = load('./saved/best_model.pt', model_origin, - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + model_fx = load( + "./saved/best_model.pt", + model_origin, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule)) # recover int8 model with only tune_cfg - history_file = './saved/history.snapshot' - model_fx_recover = recover(model_origin, history_file, 0, - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + history_file = "./saved/history.snapshot" + model_fx_recover = recover( + model_origin, + history_file, + 0, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) self.assertEqual(model_fx.sub.code, model_fx_recover.sub.code) - shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) def test_deepcopy_failure(self): def eval_func(model): return 1 # To build an object t2, which will fail on deepcopy. - class T1(): + class T1: def __init__(self, t1) -> None: self.t1 = t1 self.j = 1 @@ -1030,68 +1079,73 @@ def __hash__(self): t2 = T1([t1]) t1.add(t2) - for fake_yaml in ['fx_ptq_yaml.yaml']: + for fake_yaml in ["fx_ptq_yaml.yaml"]: model_origin = M() model_origin.tmp = t2 # run fx_quant in neural_compressor and save the quantized GraphModule quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True) quantizer.eval_func = eval_func quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = common.Model(model_origin) q_model = quantizer.fit() self.assertTrue(isinstance(q_model.model, torch.fx.graph_module.GraphModule)) - @unittest.skipIf(PT_VERSION < Version("1.11.0").release, - "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend") + @unittest.skipIf( + PT_VERSION < Version("1.11.0").release, + "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend", + ) def test_bf16_capability(self): model_origin = DynamicControlModel() - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" q_capability = self.adaptor._get_quantizable_ops(model_origin) - del os.environ['FORCE_BF16'] + del os.environ["FORCE_BF16"] + self.assertEqual([elem["weight"]["dtype"] for elem in q_capability["optypewise"]["Conv2d"]], [["int8"], "fp32"]) self.assertEqual( - [elem['weight']['dtype'] for elem in q_capability['optypewise']['Conv2d']], - [['int8'], 'fp32']) - self.assertEqual( - [elem['activation']['dtype'] for elem in q_capability['optypewise']['Conv2d']], - [['uint8'], 'fp32']) + [elem["activation"]["dtype"] for elem in q_capability["optypewise"]["Conv2d"]], [["uint8"], "fp32"] + ) self.assertEqual( - [elem['weight']['dtype'] for elem in q_capability['opwise'][('conv', 'Conv2d')]], - [['int8'], 'fp32']) + [elem["weight"]["dtype"] for elem in q_capability["opwise"][("conv", "Conv2d")]], [["int8"], "fp32"] + ) self.assertEqual( - [elem['activation']['dtype'] for elem in q_capability['opwise'][('conv', 'Conv2d')]], - [['uint8'], 'fp32']) + [elem["activation"]["dtype"] for elem in q_capability["opwise"][("conv", "Conv2d")]], [["uint8"], "fp32"] + ) self.assertEqual( - [elem['weight']['dtype'] for elem in q_capability['opwise'][('linear', 'Linear')]], - [['int8'], 'fp32', 'bf16']) + [elem["weight"]["dtype"] for elem in q_capability["opwise"][("linear", "Linear")]], + [["int8"], "fp32", "bf16"], + ) self.assertEqual( - [elem['activation']['dtype'] for elem in q_capability['opwise'][('linear', 'Linear')]], - [['uint8'], 'fp32', 'bf16']) + [elem["activation"]["dtype"] for elem in q_capability["opwise"][("linear", "Linear")]], + [["uint8"], "fp32", "bf16"], + ) - @unittest.skipIf(PT_VERSION < Version("1.11.0").release, - "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend") + @unittest.skipIf( + PT_VERSION < Version("1.11.0").release, + "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend", + ) def test_mix_precision(self): - fake_yaml = 'fx_ptq_yaml.yaml' + fake_yaml = "fx_ptq_yaml.yaml" model_origin = DynamicControlModel() # run fx_quant in neural_compressor and save the quantized GraphModule quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True) quantizer.eval_func = eval_func quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.model = common.Model(model_origin, - **{'prepare_custom_config_dict': \ - {'non_traceable_module_name': ['a']}, - 'convert_custom_config_dict': \ - {'preserved_attributes': []} - }) + quantizer.model = common.Model( + model_origin, + **{ + "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]}, + "convert_custom_config_dict": {"preserved_attributes": []}, + } + ) q_model = quantizer.fit() tune_cfg = q_model.q_config - tune_cfg['op'][('conv.module', 'Conv2d')].clear() - tune_cfg['op'][('conv.module', 'Conv2d')] = \ - {'weight': {'dtype': 'bf16'}, 'activation': {'dtype': 'bf16'}} - tune_cfg["bf16_ops_list"].append(('conv.module', 'Conv2d')) + tune_cfg["op"][("conv.module", "Conv2d")].clear() + tune_cfg["op"][("conv.module", "Conv2d")] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}} + tune_cfg["bf16_ops_list"].append(("conv.module", "Conv2d")) from neural_compressor.adaptor.torch_utils.bf16_convert import Convert + q_model._model = Convert(q_model._model, tune_cfg) self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16) @@ -1099,6 +1153,7 @@ def test_mix_precision(self): def test_symbolic_trace(self): from neural_compressor.adaptor.torch_utils.symbolic_trace import symbolic_trace + model_origin = DynamicControlModel() traced_model = symbolic_trace(model_origin, is_qat=False) if PT_VERSION >= Version("1.11.0").release: @@ -1111,34 +1166,40 @@ def test_symbolic_trace(self): def test_tensor_dump(self): model = resnet18() - model = MODELS['pytorch'](model) - quantizer = Quantization('fx_ptq_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + model = MODELS["pytorch"](model) + quantizer = Quantization("fx_ptq_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) dataloader = common.DataLoader(dataset) - dataloader = common._generate_common_dataloader(dataloader, 'pytorch') + dataloader = common._generate_common_dataloader(dataloader, "pytorch") quantizer.eval_dataloader = dataloader quantizer.calib_dataloader = dataloader quantizer.model = model.model q_model = quantizer.fit() op_list, _ = quantizer.strategy.adaptor.diagnosis_helper(model, q_model, None) quantizer.strategy.adaptor.inspect_tensor( - model, dataloader, op_list=op_list, - iteration_list=[1], inspect_type='all', save_to_disk=True) - with open('saved/inspect_result.pkl', 'rb') as fp: + model, dataloader, op_list=op_list, iteration_list=[1], inspect_type="all", save_to_disk=True + ) + with open("saved/inspect_result.pkl", "rb") as fp: tensor_dict = pickle.load(fp) a = tensor_dict["activation"][0] w = tensor_dict["weight"] - self.assertTrue(w['conv1']['conv1.weight'].shape[0] == - a['conv1']['conv1.output0'].shape[1]) + self.assertTrue(w["conv1"]["conv1.weight"].shape[0] == a["conv1"]["conv1.output0"].shape[1]) quantizer.strategy.adaptor.inspect_tensor( - q_model, dataloader, op_list=['conv1', 'layer2.0.downsample.0'], - iteration_list=[1, 2], inspect_type='all', save_to_disk=True) - with open('saved/inspect_result.pkl', 'rb') as fp: + q_model, + dataloader, + op_list=["conv1", "layer2.0.downsample.0"], + iteration_list=[1, 2], + inspect_type="all", + save_to_disk=True, + ) + with open("saved/inspect_result.pkl", "rb") as fp: tensor_dict = pickle.load(fp) a = tensor_dict["activation"][0] w = tensor_dict["weight"] - self.assertTrue(w['layer2.0.downsample.0']['layer2.0.downsample.0.weight'].shape[0] == - a['layer2.0.downsample.0']['layer2.0.downsample.0.output0'].shape[1]) + self.assertTrue( + w["layer2.0.downsample.0"]["layer2.0.downsample.0.weight"].shape[0] + == a["layer2.0.downsample.0"]["layer2.0.downsample.0.output0"].shape[1] + ) if __name__ == "__main__": diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py index 19e38eb2adf..7bffe22192f 100644 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py @@ -1,20 +1,25 @@ import copy -import neural_compressor.adaptor.pytorch as nc_torch import os import shutil +import unittest + import torch import torch.nn as nn -import unittest -from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig, set_workspace, Metric -from neural_compressor.data import Datasets, DATALOADERS, DataLoader -from neural_compressor import quantization -from neural_compressor.training import prepare_compression, fit -from neural_compressor.utils.pytorch import load -from neural_compressor.utils.utility import recover -from neural_compressor.utils.utility import LazyImport -from torch.quantization import QuantStub, DeQuantStub from packaging.version import Version +from torch.quantization import DeQuantStub, QuantStub +import neural_compressor.adaptor.pytorch as nc_torch +from neural_compressor import ( + Metric, + PostTrainingQuantConfig, + QuantizationAwareTrainingConfig, + quantization, + set_workspace, +) +from neural_compressor.data import DATALOADERS, DataLoader, Datasets +from neural_compressor.training import fit, prepare_compression +from neural_compressor.utils.pytorch import load +from neural_compressor.utils.utility import LazyImport, recover # improve lazy import UT coverage resnet18 = LazyImport("torchvision.models.resnet18") @@ -27,134 +32,36 @@ ptq_fx_op_name_list = { - "layer1.0.conv1": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer1.0.conv2": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, + "layer1.0.conv1": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, + "layer1.0.conv2": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, "layer2.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["minmax"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } + "activation": {"dtype": ["uint8"], "algorithm": ["minmax"], "granularity": ["per_tensor"], "scheme": ["sym"]}, + "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme": ["sym"]}, }, "layer3.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["kl"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } - }, - "layer1.0.add_relu": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } + "activation": {"dtype": ["uint8"], "algorithm": ["kl"], "granularity": ["per_tensor"], "scheme": ["sym"]}, + "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme": ["sym"]}, }, - "conv.module": { - "weight": { - "dtype": ["fp32"] - }, - "activation": { - "dtype": ["fp32"] - } - }, - "default_qconfig": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - } + "layer1.0.add_relu": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, + "conv.module": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}, + "default_qconfig": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, } qat_op_name_list = { - "layer1.0.conv1": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, - "layer1.0.conv2": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - }, + "layer1.0.conv1": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, + "layer1.0.conv2": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, "layer2.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["minmax"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } + "activation": {"dtype": ["uint8"], "algorithm": ["minmax"], "granularity": ["per_tensor"], "scheme": ["sym"]}, + "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme": ["sym"]}, }, "layer3.0.conv1": { - "activation": { - "dtype": ["uint8"], - "algorithm": ["kl"], - "granularity": ["per_tensor"], - "scheme": ["sym"] - }, - "weight": { - "dtype": ["int8"], - "algorithm": ["minmax"], - "granularity": ["per_channel"], - "scheme": ["sym"] - } + "activation": {"dtype": ["uint8"], "algorithm": ["kl"], "granularity": ["per_tensor"], "scheme": ["sym"]}, + "weight": {"dtype": ["int8"], "algorithm": ["minmax"], "granularity": ["per_channel"], "scheme": ["sym"]}, }, - "layer1.0.add_relu": { - "activation": { - "dtype": ["fp32"] - }, - "weight": { - "dtype": ["fp32"] - } - } + "layer1.0.add_relu": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, } - - class M(torch.nn.Module): def __init__(self): super().__init__() @@ -187,6 +94,7 @@ class DynamicModel(torch.nn.Module): def __init__(self): super().__init__() self.conv = nn.Conv2d(1, 1, 1) + def forward(self, x): if x is not None: x = self.conv(x) @@ -309,8 +217,7 @@ def test_fx_quant(self): dataloader = DATALOADERS["pytorch"](dataset) if approach == "qat": model = copy.deepcopy(model_origin) - conf = QuantizationAwareTrainingConfig( - op_name_dict=qat_op_name_list) + conf = QuantizationAwareTrainingConfig(op_name_dict=qat_op_name_list) compression_manager = prepare_compression(model, conf) compression_manager.callbacks.on_train_begin() model = compression_manager.model @@ -318,14 +225,10 @@ def test_fx_quant(self): compression_manager.callbacks.on_train_end() compression_manager.save("./saved") else: - conf = PostTrainingQuantConfig( - op_name_dict=ptq_fx_op_name_list) + conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list) conf.example_inputs = torch.randn([1, 3, 224, 224]) set_workspace("./saved") - q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader, - eval_func=eval_func) + q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, eval_func=eval_func) q_model.save("./saved") # Load configure and weights with neural_compressor.utils model_fx = load("./saved", model_origin) @@ -341,19 +244,13 @@ def test_fx_quant(self): dataloader = DATALOADERS["pytorch"](dataset) if approach == "qat": model = copy.deepcopy(model_origin) - conf = QuantizationAwareTrainingConfig( - op_name_dict=qat_op_name_list - ) + conf = QuantizationAwareTrainingConfig(op_name_dict=qat_op_name_list) compression_manager = prepare_compression(model, conf) q_model = fit(compression_manager=compression_manager, train_func=train_func, eval_func=eval_func) compression_manager.save("./saved") else: - conf = PostTrainingQuantConfig( - op_name_dict=ptq_fx_op_name_list - ) - q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader) + conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list) + q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader) q_model.save("./saved") # Load configure and weights with neural_compressor.utils model_fx = load("./saved", model_origin) @@ -367,31 +264,32 @@ def test_quantize_with_metric(self): dataloader = DATALOADERS["pytorch"](dataset) # run fx_quant in neural_compressor and save the quantized GraphModule conf = PostTrainingQuantConfig() - q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader, - eval_dataloader=dataloader, - eval_metric=Metric(name="topk", k=1)) + q_model = quantization.fit( + model_origin, + conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_metric=Metric(name="topk", k=1), + ) self.assertTrue("quantize" in str(type(q_model.model.fc))) def test_quantize_with_calib_func(self): model_origin = resnet18() # run fx_quant in neural_compressor and save the quantized GraphModule conf = PostTrainingQuantConfig() - q_model = quantization.fit(model_origin, - conf, - calib_func=eval_func, - eval_func=eval_func) + q_model = quantization.fit(model_origin, conf, calib_func=eval_func, eval_func=eval_func) self.assertTrue("quantize" in str(type(q_model.model.fc))) - @unittest.skipIf(PT_VERSION < Version("1.9.0").release, - "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend") + @unittest.skipIf( + PT_VERSION < Version("1.9.0").release, + "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend", + ) def test_fx_dynamic_quant(self): origin_model = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 5, + ntoken=10, + ninp=512, + nhid=256, + nlayers=5, ) # run fx_quant in neural_compressor and save the quantized GraphModule origin_model.eval() @@ -408,6 +306,7 @@ def test_fx_dynamic_quant(self): state_dict = torch.load("./saved/best_model.pt") tune_cfg = state_dict.pop("best_configure") import yaml + with open("./saved/best_configure.yaml", "w") as f: yaml.dump(tune_cfg, f, default_flow_style=False) torch.save(state_dict, "./saved/best_model_weights.pt") @@ -428,19 +327,17 @@ def eval_func(model): # Model Definition for approach in ["qat", "auto"]: model_origin = LSTMModel( - ntoken = 10, - ninp = 512, - nhid = 256, - nlayers = 2, + ntoken=10, + ninp=512, + nhid=256, + nlayers=2, ) dataset = Datasets("pytorch")["dummy"]((3, 10)) dataloader = DATALOADERS["pytorch"](dataset) # run fx_quant in neural_compressor and save the quantized GraphModule if approach == "qat": model = copy.deepcopy(model_origin) - conf = QuantizationAwareTrainingConfig( - op_name_dict=qat_op_name_list - ) + conf = QuantizationAwareTrainingConfig(op_name_dict=qat_op_name_list) compression_manager = prepare_compression(model, conf) compression_manager.callbacks.on_train_begin() model = compression_manager.model.model @@ -450,9 +347,7 @@ def eval_func(model): self.assertTrue("quantize" in str(type(model.rnn))) else: conf = PostTrainingQuantConfig(approach="auto") - q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader) + q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader) self.assertTrue("quantize" in str(type(q_model.model.encoder))) self.assertTrue("quantize" in str(type(q_model.model.rnn))) @@ -474,26 +369,27 @@ def test_fx_sub_module_quant(self): else: set_workspace("./saved") conf = PostTrainingQuantConfig() - q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader) + q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader) q_model.save("./saved") # Load configure and weights with neural_compressor.utils - model_fx = load("./saved/best_model.pt", model_origin, - **{"dataloader": torch.utils.data.DataLoader(dataset) - }) + model_fx = load( + "./saved/best_model.pt", model_origin, **{"dataloader": torch.utils.data.DataLoader(dataset)} + ) self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule)) if approach != "qat": # recover int8 model with only tune_cfg history_file = "./saved/history.snapshot" - model_fx_recover = recover(model_origin, history_file, 0, - **{"dataloader": torch.utils.data.DataLoader(dataset)}) + model_fx_recover = recover( + model_origin, history_file, 0, **{"dataloader": torch.utils.data.DataLoader(dataset)} + ) self.assertEqual(model_fx.sub.code, model_fx_recover.sub.code) shutil.rmtree("./saved", ignore_errors=True) - @unittest.skipIf(PT_VERSION < Version("1.11.0").release, - "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend") + @unittest.skipIf( + PT_VERSION < Version("1.11.0").release, + "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend", + ) def test_mix_precision(self): model_origin = DynamicControlModel() # run fx_quant in neural_compressor and save the quantized GraphModule @@ -501,16 +397,13 @@ def test_mix_precision(self): dataloader = DataLoader("pytorch", dataset) set_workspace("./saved") conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list) - q_model = quantization.fit(model_origin, - conf, - calib_dataloader=dataloader, - calib_func=eval_func) + q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, calib_func=eval_func) tune_cfg = q_model.q_config tune_cfg["op"][("conv.module", "Conv2d")].clear() - tune_cfg["op"][("conv.module", "Conv2d")] = \ - {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}} + tune_cfg["op"][("conv.module", "Conv2d")] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}} tune_cfg["bf16_ops_list"].append(("conv.module", "Conv2d")) from neural_compressor.adaptor.torch_utils.bf16_convert import Convert + q_model._model = Convert(q_model._model, tune_cfg) self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16) @@ -519,53 +412,56 @@ def test_mix_precision(self): def test_hawq_metric(self): # Test for hawq metric import torchvision - from neural_compressor.data import Datasets, DATALOADERS - from neural_compressor.quantization import fit + + from neural_compressor.adaptor.torch_utils.hawq_metric import hawq_top from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.model.torch_model import PyTorchFXModel - from neural_compressor.adaptor.torch_utils.hawq_metric import hawq_top + from neural_compressor.quantization import fit ori_model = torchvision.models.resnet18() pt_model = PyTorchFXModel(ori_model) dataset = Datasets("pytorch")["dummy"](((16, 3, 224, 224))) dataloader = DATALOADERS["pytorch"](dataset) q_model = fit(ori_model, conf=PostTrainingQuantConfig(), calib_dataloader=dataloader) - op_to_traces = hawq_top(fp32_model=pt_model, - q_model=q_model, - dataloader=dataloader, - criterion=None, - enable_act=True) + op_to_traces = hawq_top( + fp32_model=pt_model, q_model=q_model, dataloader=dataloader, criterion=None, enable_act=True + ) self.assertIsNotNone(op_to_traces) + @unittest.skipIf(not FX_MODE, "Unsupport Fx Mode with PyTorch Version Below 1.8") class TestPyTorchBlockDetector(unittest.TestCase): def test_block_detector(self): - from neural_compressor.adaptor.torch_utils.pattern_detector import ( - TransformerBasedModelBlockPatternDetector, - BLOCK_PATTERNS) from transformers import BertModel + from neural_compressor.adaptor.torch_utils.pattern_detector import ( + BLOCK_PATTERNS, + TransformerBasedModelBlockPatternDetector, + ) + model = BertModel.from_pretrained("bert-base-uncased") detector = TransformerBasedModelBlockPatternDetector(model, BLOCK_PATTERNS) result = detector.detect_block() - assert len(result['attention_blocks']), 12 - assert len(result['ffn_blocks']), 12 + assert len(result["attention_blocks"]), 12 + assert len(result["ffn_blocks"]), 12 found_attention_op = False found_dense_op = False - for block in ['attention_blocks']: + for block in ["attention_blocks"]: for op in block: - if 'dense' in op: + if "dense" in op: found_dense_op = True break - for block in ['ffn_blocks']: + for block in ["ffn_blocks"]: for op in block: - if 'attention' in op: + if "attention" in op: found_attention_op = True break assert not found_attention_op assert not found_dense_op + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/pytorch_adaptor/test_weight_only_adaptor.py b/test/adaptor/pytorch_adaptor/test_weight_only_adaptor.py index 51f25ed0ade..33e6c1e41fd 100644 --- a/test/adaptor/pytorch_adaptor/test_weight_only_adaptor.py +++ b/test/adaptor/pytorch_adaptor/test_weight_only_adaptor.py @@ -1,13 +1,15 @@ -import sys import copy +import sys + sys.path.append("./") import os import shutil -import torch import unittest + +import torch import transformers -from neural_compressor import quantization, PostTrainingQuantConfig +from neural_compressor import PostTrainingQuantConfig, quantization from neural_compressor.adaptor.torch_utils.model_wrapper import MulLinear, WeightOnlyLinear @@ -29,12 +31,13 @@ def eval_func(model): # switch to evaluate mode model.eval() with torch.no_grad(): - input = torch.randn(3,30) + input = torch.randn(3, 30) # compute output output = model(input) return 0.0 -class SimpleDataLoader(): + +class SimpleDataLoader: def __init__(self): self.batch_size = 1 @@ -43,7 +46,7 @@ def __iter__(self): yield torch.randn([1, 30]) -class LLMDataLoader(): +class LLMDataLoader: def __init__(self): self.batch_size = 1 @@ -53,17 +56,17 @@ def __iter__(self): class TestPytorchWeightOnlyAdaptor(unittest.TestCase): - approach = 'weight_only' + approach = "weight_only" @classmethod def setUpClass(self): self.dataloader = SimpleDataLoader() self.gptj = transformers.AutoModelForCausalLM.from_pretrained( - 'hf-internal-testing/tiny-random-GPTJForCausalLM', + "hf-internal-testing/tiny-random-GPTJForCausalLM", torchscript=True, ) self.gptj_no_jit = transformers.AutoModelForCausalLM.from_pretrained( - 'hf-internal-testing/tiny-random-GPTJForCausalLM', + "hf-internal-testing/tiny-random-GPTJForCausalLM", ) self.gptj.seqlen = 512 self.llm_dataloader = LLMDataLoader() @@ -75,12 +78,12 @@ def tearDownClass(self): shutil.rmtree("runs", ignore_errors=True) def test_RTN_int_quant(self): - input = torch.randn(3,30) + input = torch.randn(3, 30) model = Model() out1 = model(input) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", ) q_model = quantization.fit(model, conf) out2 = q_model(input) @@ -88,16 +91,16 @@ def test_RTN_int_quant(self): self.assertFalse(torch.all(out1 == out2)) compressed_model = q_model.export_compressed_model() out3 = compressed_model(input) - self.assertTrue(torch.all(out3==out2)) + self.assertTrue(torch.all(out3 == out2)) model = Model() out1 = model(input) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", recipes={ # By default, sym_full_range is False and 4 bit sym will only use range [-7,7]. - 'rtn_args': {'sym_full_range': True} - } + "rtn_args": {"sym_full_range": True} + }, ) q_model = quantization.fit(model, conf) out2 = q_model(input) @@ -105,24 +108,24 @@ def test_RTN_int_quant(self): self.assertFalse(torch.all(out1 == out2)) compressed_model = q_model.export_compressed_model(sym_full_range=True) out3 = compressed_model(input) - self.assertTrue(torch.all(out3==out2)) + self.assertTrue(torch.all(out3 == out2)) model = Model() out1 = model(input) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'dtype': 'int4', # 1-8 bits + "dtype": "int4", # 1-8 bits }, }, }, recipes={ # By default, sym_full_range is False and 4 bit sym will only use range [-7,7]. # When mse_range is set to True, enable clip for weight by checking mse. - 'rtn_args': {'sym_full_range': True, 'mse_range': True} - } + "rtn_args": {"sym_full_range": True, "mse_range": True} + }, ) q_model = quantization.fit(model, conf) out2 = q_model(input) @@ -132,21 +135,21 @@ def test_RTN_int_quant(self): model = Model() out1 = model(input) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 8, # 1-8 bits - 'group_size': -1, # -1 (per-channel) - 'scheme': 'sym', - 'algorithm': 'RTN', + "bits": 8, # 1-8 bits + "group_size": -1, # -1 (per-channel) + "scheme": "sym", + "algorithm": "RTN", }, }, }, recipes={ # By default, sym_full_range is False and 4 bit sym will only use range [-7,7]. - 'rtn_args': {'return_int': True} - } + "rtn_args": {"return_int": True} + }, ) q_model = quantization.fit(model, conf, eval_func=eval_func) out2 = q_model(input) @@ -156,14 +159,14 @@ def test_RTN_int_quant(self): model = Model() out1 = model(input) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, # 1 - 1024 or higher - 'scheme': 'asym', - 'algorithm': 'RTN', + "bits": 4, # 1-8 bits + "group_size": 32, # 1 - 1024 or higher + "scheme": "asym", + "algorithm": "RTN", }, }, }, @@ -176,27 +179,27 @@ def test_RTN_int_quant(self): model = Model() out1 = model(input) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_name_dict={ - 'fc1':{ # re.match + "fc1": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, # 1 - 1024 or higher - 'scheme': 'sym', - 'algorithm': 'RTN', + "bits": 4, # 1-8 bits + "group_size": 32, # 1 - 1024 or higher + "scheme": "sym", + "algorithm": "RTN", }, }, - 'fc2':{ # re.match + "fc2": { # re.match "weight": { - 'bits': 3, # 1-8 bits - 'group_size': 16, # 1 - 1024 or higher - 'scheme': 'asym', - 'algorithm': 'RTN', + "bits": 3, # 1-8 bits + "group_size": 16, # 1 - 1024 or higher + "scheme": "asym", + "algorithm": "RTN", }, }, - 'fc3':{ # re.match + "fc3": { # re.match "weight": { - 'dtype': 'fp32', + "dtype": "fp32", }, }, }, @@ -205,38 +208,39 @@ def test_RTN_int_quant(self): out2 = q_model(input) self.assertTrue(torch.all(torch.isclose(out1, out2, atol=5e-1))) self.assertFalse(torch.all(out1 == out2)) - q_model.save('saved') + q_model.save("saved") from neural_compressor.utils.pytorch import load - new_model = load('saved', model, weight_only=True) + + new_model = load("saved", model, weight_only=True) out1 = new_model(input) self.assertTrue(torch.all(out1 == out2)) - - model_size1 = os.path.getsize('saved/best_model.pt')/1024 + model_size1 = os.path.getsize("saved/best_model.pt") / 1024 print("FP32 Model size:{:.3f}M".format(model_size1)) from neural_compressor.model import Model as INCModel + inc_model = INCModel(new_model) - inc_model.export_compressed_model(qweight_config_path = 'saved/qconfig.json') - torch.save(inc_model.state_dict(), 'saved/tmp.pt') - model_size2 = os.path.getsize('saved/tmp.pt')/1024 + inc_model.export_compressed_model(qweight_config_path="saved/qconfig.json") + torch.save(inc_model.state_dict(), "saved/tmp.pt") + model_size2 = os.path.getsize("saved/tmp.pt") / 1024 print("WeightOnlyLinear Model size:{:.3f}M".format(model_size2)) self.assertTrue(isinstance(inc_model.model.fc1, WeightOnlyLinear)) self.assertTrue(model_size1 / model_size2 > 2) def test_RTN_fp4_quant(self): - for dtype in ['nf4', 'fp4', 'fp4_e2m1_bnb', 'fp4_e2m1']: - input = torch.randn(3,30) + for dtype in ["nf4", "fp4", "fp4_e2m1_bnb", "fp4_e2m1"]: + input = torch.randn(3, 30) model = Model() out1 = model(input) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'dtype': dtype, # select from int, nf4, or fp4 + "dtype": dtype, # select from int, nf4, or fp4 # nf4/fp4 have fixed bits and scheme. - 'group_size': 32, # -1 (per-channel) - 'algorithm': 'RTN', + "group_size": 32, # -1 (per-channel) + "algorithm": "RTN", }, }, }, @@ -247,57 +251,54 @@ def test_RTN_fp4_quant(self): self.assertFalse(torch.all(out1 == out2)) compressed_model = q_model.export_compressed_model() out3 = compressed_model(input) - self.assertTrue(torch.all(out3==out2)) + self.assertTrue(torch.all(out3 == out2)) def test_AWQ_quant(self): conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, # -1 (per-channel) - 'scheme': 'asym', - 'algorithm': 'AWQ', + "bits": 4, # 1-8 bits + "group_size": 32, # -1 (per-channel) + "scheme": "asym", + "algorithm": "AWQ", }, }, }, op_name_dict={ - '.*3.*':{ # re.match - "weight": { - 'dtype': 'fp32' - }, + ".*3.*": { # re.match + "weight": {"dtype": "fp32"}, }, - '.*4.*':{ # re.match + ".*4.*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, # -1 (per-channel) - 'scheme': 'asym', - 'algorithm': 'RTN', + "bits": 4, # 1-8 bits + "group_size": 32, # -1 (per-channel) + "scheme": "asym", + "algorithm": "RTN", }, }, - '.*lm_head':{ # re.match - "weight": { - 'dtype': 'fp32' - }, + ".*lm_head": { # re.match + "weight": {"dtype": "fp32"}, }, }, recipes={ - 'awq_args':{'auto_scale': True, 'mse_range': True, 'folding': False}, + "awq_args": {"auto_scale": True, "mse_range": True, "folding": False}, }, ) fp32_model = copy.deepcopy(self.gptj) q_model = quantization.fit( - fp32_model, - conf, + fp32_model, + conf, calib_dataloader=self.llm_dataloader, ) - q_model.save('saved') + q_model.save("saved") input = torch.ones([1, 10], dtype=torch.long) out1 = q_model(input) from neural_compressor.utils.pytorch import load + fp32_model = copy.deepcopy(self.gptj) - reload_model = load('saved', fp32_model, weight_only=True) + reload_model = load("saved", fp32_model, weight_only=True) out2 = reload_model(input) q_model.export_compressed_model() out3 = q_model(input) @@ -308,70 +309,65 @@ def test_AWQ_quant(self): self.assertTrue(isinstance(q_model.model.lm_head, torch.nn.Linear)) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, # -1 (per-channel) - 'scheme': 'asym', - 'algorithm': 'AWQ', + "bits": 4, # 1-8 bits + "group_size": 32, # -1 (per-channel) + "scheme": "asym", + "algorithm": "AWQ", }, }, }, op_name_dict={ - '.*3.*':{ # re.match - "weight": { - 'dtype': 'fp32' - }, + ".*3.*": { # re.match + "weight": {"dtype": "fp32"}, }, - '.*4.*':{ # re.match + ".*4.*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, # -1 (per-channel) - 'scheme': 'asym', - 'algorithm': 'RTN', + "bits": 4, # 1-8 bits + "group_size": 32, # -1 (per-channel) + "scheme": "asym", + "algorithm": "RTN", }, }, - '.*lm_head':{ # re.match - "weight": { - 'dtype': 'fp32' - }, + ".*lm_head": { # re.match + "weight": {"dtype": "fp32"}, }, }, recipes={ - 'rtn_args': {'return_int': True}, - 'awq_args':{'auto_scale': True, 'mse_range': True, 'folding': False}, + "rtn_args": {"return_int": True}, + "awq_args": {"auto_scale": True, "mse_range": True, "folding": False}, }, ) fp32_model = copy.deepcopy(self.gptj) q_model = quantization.fit( - fp32_model, - conf, + fp32_model, + conf, calib_dataloader=self.llm_dataloader, ) self.assertTrue(isinstance(q_model.model.transformer.h[0].mlp.fc_out, MulLinear)) self.assertTrue(isinstance(q_model.model.transformer.h[3].mlp.fc_out, torch.nn.Linear)) self.assertTrue(isinstance(q_model.model.transformer.h[4].mlp.fc_out, WeightOnlyLinear)) - conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, # -1 (per-channel) - 'scheme': 'asym', - 'algorithm': 'AWQ', + "bits": 4, # 1-8 bits + "group_size": 32, # -1 (per-channel) + "scheme": "asym", + "algorithm": "AWQ", }, }, }, ) fp32_model = copy.deepcopy(self.gptj_no_jit) q_model = quantization.fit( - fp32_model, - conf, + fp32_model, + conf, calib_dataloader=self.llm_dataloader, ) self.assertTrue(isinstance(q_model.model.transformer.h[0].mlp.fc_in, MulLinear)) @@ -382,27 +378,28 @@ def test_AWQ_nf4_quant(self): fp32_model = copy.deepcopy(self.gptj) out1 = fp32_model(input) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'dtype': 'nf4', # select from int, nf4, or fp4 + "dtype": "nf4", # select from int, nf4, or fp4 # nf4/fp4 have fixed bits and scheme. - 'group_size': 32, # -1 (per-channel) - 'algorithm': 'RTN', + "group_size": 32, # -1 (per-channel) + "algorithm": "RTN", }, }, }, op_name_dict={ - 'lm_head':{ # re.match + "lm_head": { # re.match "weight": { - 'dtype': 'fp32', + "dtype": "fp32", }, }, }, ) q_model = quantization.fit( - fp32_model, conf, + fp32_model, + conf, calib_dataloader=self.llm_dataloader, ) out2 = q_model(input) @@ -413,6 +410,7 @@ def test_AWQ_nf4_quant(self): def test_AWQ_util(self): from neural_compressor.adaptor.torch_utils.util import get_module_input_output + class DemoModel(torch.nn.Module): def __init__(self): super(DemoModel, self).__init__() @@ -425,25 +423,24 @@ def forward(self, x): return out tmp = torch.randn([3, 3]) + class DemoCalibDataloader: def __init__(self): self.batch_size = 1 + def __iter__(self): for i in range(3): yield tmp - module_hook_config = { - 'fc1': ['output'], - 'fc2': ['input', 'output'] - } + module_hook_config = {"fc1": ["output"], "fc2": ["input", "output"]} model = DemoModel() out = model(tmp) values = get_module_input_output(model, module_hook_config, DemoCalibDataloader()) - self.assertTrue(torch.allclose(values['fc1']['output'][0], values['fc2']['input'][0])) - self.assertTrue(torch.allclose(values['fc2']['output'][0], out)) + self.assertTrue(torch.allclose(values["fc1"]["output"][0], values["fc2"]["input"][0])) + self.assertTrue(torch.allclose(values["fc2"]["output"][0], out)) def test_GPTQ_fixed_length_quant(self): - class GPTQLLMDataLoader(): + class GPTQLLMDataLoader: def __init__(self): self.batch_size = 1 @@ -451,88 +448,102 @@ def __iter__(self): for i in range(10): yield torch.ones([1, 512], dtype=torch.long) - class GPTQLLMDataLoaderList(): + class GPTQLLMDataLoaderList: def __init__(self): self.batch_size = 1 def __iter__(self): for i in range(10): yield (torch.ones([1, 512], dtype=torch.long), torch.ones([1, 512], dtype=torch.long)) - - class GPTQLLMDataLoaderDict(): + + class GPTQLLMDataLoaderDict: def __init__(self): self.batch_size = 1 def __iter__(self): for i in range(10): - yield {'input_ids': torch.ones([1, 512], dtype=torch.long), 'attention_mask': torch.ones([1, 512], dtype=torch.long)} + yield { + "input_ids": torch.ones([1, 512], dtype=torch.long), + "attention_mask": torch.ones([1, 512], dtype=torch.long), + } dataloader = GPTQLLMDataLoader() dataloader_list = GPTQLLMDataLoaderList() dataloader_dict = GPTQLLMDataLoaderDict() conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 8, # -1 (per-channel) - 'scheme': 'sym', - 'algorithm': 'GPTQ', + "bits": 4, # 1-8 bits + "group_size": 8, # -1 (per-channel) + "scheme": "sym", + "algorithm": "GPTQ", }, }, }, op_name_dict={ - '.*lm_head':{ # re.match - "weight": { - 'dtype': 'fp32' - }, + ".*lm_head": { # re.match + "weight": {"dtype": "fp32"}, }, }, recipes={ - 'gptq_args':{'percdamp': 0.01, 'act_order': False}, + "gptq_args": {"percdamp": 0.01, "act_order": False}, }, ) - + # case 1: tensor model_1 = copy.deepcopy(self.gptj) input = torch.ones([1, 512], dtype=torch.long) - q_model = quantization.fit(model_1, conf, calib_dataloader=dataloader,) - q_model.save('saved') + q_model = quantization.fit( + model_1, + conf, + calib_dataloader=dataloader, + ) + q_model.save("saved") out1 = q_model.model(input) compressed_model = q_model.export_compressed_model() out2 = compressed_model(input) - torch.save(compressed_model.state_dict(), 'saved/compressed_model.pt') + torch.save(compressed_model.state_dict(), "saved/compressed_model.pt") self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-05)) # # case 2: list or tuple model_2 = copy.deepcopy(self.gptj) input = torch.ones([1, 512], dtype=torch.long) - q_model = quantization.fit(model_2, conf, calib_dataloader=dataloader_list,) - q_model.save('saved') + q_model = quantization.fit( + model_2, + conf, + calib_dataloader=dataloader_list, + ) + q_model.save("saved") out1 = q_model.model(input) compressed_model = q_model.export_compressed_model() out2 = compressed_model(input) - torch.save(compressed_model.state_dict(), 'saved/compressed_model.pt') + torch.save(compressed_model.state_dict(), "saved/compressed_model.pt") self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-05)) - + # # case 2: list or tuple model_3 = copy.deepcopy(self.gptj) input = torch.ones([1, 512], dtype=torch.long) - q_model = quantization.fit(model_3, conf, calib_dataloader=dataloader_dict,) - q_model.save('saved') + q_model = quantization.fit( + model_3, + conf, + calib_dataloader=dataloader_dict, + ) + q_model.save("saved") out1 = q_model.model(input) compressed_model = q_model.export_compressed_model() out2 = compressed_model(input) - torch.save(compressed_model.state_dict(), 'saved/compressed_model.pt') + torch.save(compressed_model.state_dict(), "saved/compressed_model.pt") self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-05)) print("GPTQ with fixed length Done") def test_GPTQ_unfixed_length_quant(self): import random - class GPTQLLMDataLoader(): + + class GPTQLLMDataLoader: def __init__(self): self.batch_size = 1 @@ -541,7 +552,7 @@ def __iter__(self): length = random.randint(1, 1024) yield torch.ones([1, length], dtype=torch.long) - class GPTQLLMDataLoaderList(): + class GPTQLLMDataLoaderList: def __init__(self): self.batch_size = 1 @@ -549,75 +560,88 @@ def __iter__(self): for i in range(10): length = random.randint(1, 1024) yield (torch.ones([1, length], dtype=torch.long), torch.ones([1, length], dtype=torch.long)) - - class GPTQLLMDataLoaderDict(): + + class GPTQLLMDataLoaderDict: def __init__(self): self.batch_size = 1 def __iter__(self): for i in range(10): length = random.randint(1, 1024) - yield {'input_ids': torch.ones([1, length], dtype=torch.long), 'attention_mask': torch.ones([1, length], dtype=torch.long)} + yield { + "input_ids": torch.ones([1, length], dtype=torch.long), + "attention_mask": torch.ones([1, length], dtype=torch.long), + } dataloader = GPTQLLMDataLoader() dataloader_list = GPTQLLMDataLoaderList() dataloader_dict = GPTQLLMDataLoaderDict() conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 8, # -1 (per-channel) - 'scheme': 'sym', - 'algorithm': 'GPTQ', + "bits": 4, # 1-8 bits + "group_size": 8, # -1 (per-channel) + "scheme": "sym", + "algorithm": "GPTQ", }, }, }, op_name_dict={ - '.*lm_head':{ # re.match - "weight": { - 'dtype': 'fp32' - }, + ".*lm_head": { # re.match + "weight": {"dtype": "fp32"}, }, }, recipes={ - 'gptq_args':{'percdamp': 0.01, 'act_order': False, 'use_max_length': True}, + "gptq_args": {"percdamp": 0.01, "act_order": False, "use_max_length": True}, }, ) - + # case 1: tensor model_1 = copy.deepcopy(self.gptj) input = torch.ones([1, 512], dtype=torch.long) - q_model = quantization.fit(model_1, conf, calib_dataloader=dataloader,) - q_model.save('saved') + q_model = quantization.fit( + model_1, + conf, + calib_dataloader=dataloader, + ) + q_model.save("saved") out1 = q_model.model(input) compressed_model = q_model.export_compressed_model() out2 = compressed_model(input) - torch.save(compressed_model.state_dict(), 'saved/compressed_model.pt') + torch.save(compressed_model.state_dict(), "saved/compressed_model.pt") self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-05)) # # case 2: list or tuple model_2 = copy.deepcopy(self.gptj) input = torch.ones([1, 512], dtype=torch.long) - q_model = quantization.fit(model_2, conf, calib_dataloader=dataloader_list,) - q_model.save('saved') + q_model = quantization.fit( + model_2, + conf, + calib_dataloader=dataloader_list, + ) + q_model.save("saved") out1 = q_model.model(input) compressed_model = q_model.export_compressed_model() out2 = compressed_model(input) - torch.save(compressed_model.state_dict(), 'saved/compressed_model.pt') + torch.save(compressed_model.state_dict(), "saved/compressed_model.pt") self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-05)) - + # # case 2: list or tuple model_3 = copy.deepcopy(self.gptj) input = torch.ones([1, 512], dtype=torch.long) - q_model = quantization.fit(model_3, conf, calib_dataloader=dataloader_dict,) - q_model.save('saved') + q_model = quantization.fit( + model_3, + conf, + calib_dataloader=dataloader_dict, + ) + q_model.save("saved") out1 = q_model.model(input) compressed_model = q_model.export_compressed_model() out2 = compressed_model(input) - torch.save(compressed_model.state_dict(), 'saved/compressed_model.pt') + torch.save(compressed_model.state_dict(), "saved/compressed_model.pt") self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-05)) print("GPTQ with fixed length Done") @@ -636,32 +660,35 @@ def __iter__(self): yield (torch.ones([1, 512], dtype=torch.long), torch.ones([1, 512], dtype=torch.long)) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", op_type_dict={ - '.*':{ # re.match + ".*": { # re.match "weight": { - 'bits': 4, # 1-8 bits - 'group_size': 32, # -1 (per-channel) - 'scheme': 'sym', - 'algorithm': 'TEQ', + "bits": 4, # 1-8 bits + "group_size": 32, # -1 (per-channel) + "scheme": "sym", + "algorithm": "TEQ", }, }, }, op_name_dict={ - '.*lm_head':{ # re.match - "weight": { - 'dtype': 'fp32' - }, + ".*lm_head": { # re.match + "weight": {"dtype": "fp32"}, }, }, recipes={ - 'teq_args':{"folding": True}, + "teq_args": {"folding": True}, }, ) dataloader = teq_inc_loader() model_1 = copy.deepcopy(self.gptj) - q_model = quantization.fit(model_1, conf, calib_dataloader=dataloader,) + q_model = quantization.fit( + model_1, + conf, + calib_dataloader=dataloader, + ) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_bf16_convert.py b/test/adaptor/tensorflow_adaptor/test_bf16_convert.py index b1f6219dfa1..23e09fad335 100644 --- a/test/adaptor/tensorflow_adaptor/test_bf16_convert.py +++ b/test/adaptor/tensorflow_adaptor/test_bf16_convert.py @@ -3,19 +3,17 @@ import shutil import unittest from unittest import result -import numpy as np -from neural_compressor.adaptor.tf_utils.graph_rewriter.bf16.bf16_convert import BF16Convert +import numpy as np import tensorflow as tf -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util + +from neural_compressor.adaptor.tf_utils.graph_rewriter.bf16.bf16_convert import BF16Convert def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -36,13 +34,14 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: + """ + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) f.close() + def build_newapi_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -63,13 +62,14 @@ def build_newapi_fake_yaml(): relative: 0.01 workspace: path: saved - ''' - with open('newapi_fake_yaml.yaml',"w",encoding="utf-8") as f: + """ + with open("newapi_fake_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) f.close() + def build_fake_bf16_rnn_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -113,209 +113,246 @@ def build_fake_bf16_rnn_yaml(): relative: 0.05 exit_policy: performance_only: True - ''' - with open('fake_bf16_rnn.yaml',"w",encoding="utf-8") as f: + """ + with open("fake_bf16_rnn.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) f.close() + def create_test_graph(bf16_graph=True): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) if bf16_graph: cast_node = node_def_pb2.NodeDef() cast_node.op = "Cast" cast_node.name = "cast" - cast_node.attr['SrcT'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - cast_node.attr['DstT'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum)) + cast_node.attr["SrcT"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + cast_node.attr["DstT"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum)) cast_node.input.extend([bias_add_node.name]) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum if bf16_graph else dtypes.float32.as_datatype_enum)) + relu_node.attr["T"].CopyFrom( + attr_value_pb2.AttrValue( + type=dtypes.bfloat16.as_datatype_enum if bf16_graph else dtypes.float32.as_datatype_enum + ) + ) relu_node.input.extend([cast_node.name if bf16_graph else bias_add_node.name]) if bf16_graph: cast2_node = node_def_pb2.NodeDef() cast2_node.op = "Cast" cast2_node.name = "cast2" - cast2_node.attr['SrcT'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum)) - cast2_node.attr['DstT'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + cast2_node.attr["SrcT"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum)) + cast2_node.attr["DstT"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) cast2_node.input.extend([relu_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" - conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv2_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape + ) + ) + ) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([cast2_node.name if bf16_graph else relu_node.name, conv2_weight_node.name]) - conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) - bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value2, bias_value2.dtype.type, bias_value2.shape))) + bias_node2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value2, bias_value2.dtype.type, bias_value2.shape) + ) + ) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" - bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) - bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node2.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" - relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) log_node = node_def_pb2.NodeDef() log_node.name = "log1" log_node.op = "Log" - log_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + log_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) log_node.input.extend([relu_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" - conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv3_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape + ) + ) + ) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" - conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([log_node.name, conv3_weight_node.name]) - conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv3_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv3_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) identity_node = node_def_pb2.NodeDef() identity_node.name = "final" identity_node.op = "Identity" - identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + identity_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) identity_node.input.extend([conv3_node.name]) test_graph = graph_pb2.GraphDef() if bf16_graph: - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - cast_node, - relu_node, - cast2_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - log_node, - relu_node2, - conv3_weight_node, - conv3_node, - identity_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + cast_node, + relu_node, + cast2_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + log_node, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node, + ] + ) else: - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - relu_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - log_node, - relu_node2, - conv3_weight_node, - conv3_node, - identity_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + log_node, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node, + ] + ) return test_graph + class TestBF16Convert(unittest.TestCase): - rn50_fp32_pb_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb' - pb_path = '/tmp/.neural_compressor/resnet50_fp32_pretrained_model.pb' + rn50_fp32_pb_url = ( + "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb" + ) + pb_path = "/tmp/.neural_compressor/resnet50_fp32_pretrained_model.pb" platform = platform.system().lower() if platform == "windows": - pb_path = 'C:\\tmp\.neural_compressor\\resnet50_fp32_pretrained_model.pb' + pb_path = "C:\\tmp\.neural_compressor\\resnet50_fp32_pretrained_model.pb" + @classmethod def setUpClass(self): if not os.path.exists(self.pb_path): if self.platform == "linux": - os.system('mkdir -p /tmp/.neural_compressor && wget {} -O {} '.format(self.rn50_fp32_pb_url, self.pb_path)) + os.system( + "mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.rn50_fp32_pb_url, self.pb_path) + ) elif self.platform == "windows": - os.system('md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor') + os.system("md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor") from urllib import request + request.urlretrieve(self.rn50_fp32_pb_url) self.input_graph = tf.compat.v1.GraphDef() @@ -329,38 +366,39 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('newapi_fake_yaml.yaml') - os.remove('fake_bf16_rnn.yaml') + os.remove("fake_yaml.yaml") + os.remove("newapi_fake_yaml.yaml") + os.remove("fake_bf16_rnn.yaml") shutil.rmtree("saved", ignore_errors=True) def test_bf16_transpose_b_matmul(self): from tensorflow.core.framework import attr_value_pb2 - os.environ['FORCE_BF16'] = '1' + + os.environ["FORCE_BF16"] = "1" DT_BFLOAT16 = attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum) g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=float) - x = tf.compat.v1.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.compat.v1.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - z = tf.matmul(x, y, name='no_quant_matmul', transpose_b=True) - z = tf.nn.relu6(z, name='op_to_store') + z = tf.matmul(x, y, name="no_quant_matmul", transpose_b=True) + z = tf.nn.relu6(z, name="op_to_store") is_bf16 = False with tf.compat.v1.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul' and i.attr["T"] == DT_BFLOAT16: + if i.op == "MatMul" and i.attr["T"] == DT_BFLOAT16: is_bf16 = True break self.assertEqual(is_bf16, True) @@ -389,10 +427,11 @@ def test_do_transform(self): self.assertEqual(new_conv3.attr["T"].type, dtypes.float32) def test_bf16_fallback(self): - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" from neural_compressor.experimental import Quantization, common - quantizer = Quantization('newapi_fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) + + quantizer = Quantization("newapi_fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(1, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.test_fp32_graph @@ -406,22 +445,19 @@ def test_bf16_fallback(self): # self.assertEqual(node.attr["T"].type, dtypes.bfloat16.as_datatype_enum) # self.assertTrue(cast_op_count == 0) - @unittest.skipIf(tf.version.VERSION.find('up') == -1, "Only supports tf 1.x") + @unittest.skipIf(tf.version.VERSION.find("up") == -1, "Only supports tf 1.x") def test_bf16_rnn(self): - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" try: inp = tf.keras.layers.Input(shape=(None, 4)) - lstm_1 = tf.keras.layers.LSTM(units=10, - return_sequences=True)(inp) + lstm_1 = tf.keras.layers.LSTM(units=10, return_sequences=True)(inp) dropout_1 = tf.keras.layers.Dropout(0.2)(lstm_1) - lstm_2 = tf.keras.layers.LSTM(units=10, - return_sequences=False)(dropout_1) + lstm_2 = tf.keras.layers.LSTM(units=10, return_sequences=False)(dropout_1) dropout_2 = tf.keras.layers.Dropout(0.2)(lstm_2) out = tf.keras.layers.Dense(1)(dropout_2) model = tf.keras.models.Model(inputs=inp, outputs=out) - model.compile(loss="mse", - optimizer=tf.keras.optimizers.RMSprop()) + model.compile(loss="mse", optimizer=tf.keras.optimizers.RMSprop()) # input_names = [t.name.split(":")[0] for t in model.inputs] output_names = [t.name.split(":")[0] for t in model.outputs] @@ -435,6 +471,7 @@ def test_bf16_rnn(self): graph = sess.graph from tensorflow.compat.v1 import graph_util + graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), @@ -445,24 +482,22 @@ def test_bf16_rnn(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_bf16_rnn.yaml') - quantizer.calib_dataloader = common.DataLoader( - dataset=list(zip(quant_data[0], quant_data[1]))) - quantizer.eval_dataloader = common.DataLoader( - dataset=list(zip(evl_data[0], evl_data[1]))) + quantizer = Quantization("fake_bf16_rnn.yaml") + quantizer.calib_dataloader = common.DataLoader(dataset=list(zip(quant_data[0], quant_data[1]))) + quantizer.eval_dataloader = common.DataLoader(dataset=list(zip(evl_data[0], evl_data[1]))) quantizer.model = graph_def quantized_model = quantizer.fit() convert_to_bf16_flag = False for i in quantized_model.graph_def.node: - if i.name == 'lstm/while/MatMul_3' and \ - i.attr['T'].type == dtypes.bfloat16.as_datatype_enum: + if i.name == "lstm/while/MatMul_3" and i.attr["T"].type == dtypes.bfloat16.as_datatype_enum: convert_to_bf16_flag = True self.assertEqual(convert_to_bf16_flag, True) - except (NotImplementedError): + except NotImplementedError: # Kernel bug, happens when the version of python is 3.7 and the version of numpy is >= 1.20.0 pass + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_smooth_quant_tf.py b/test/adaptor/tensorflow_adaptor/test_smooth_quant_tf.py index c179108ba8b..3b24e5804f8 100644 --- a/test/adaptor/tensorflow_adaptor/test_smooth_quant_tf.py +++ b/test/adaptor/tensorflow_adaptor/test_smooth_quant_tf.py @@ -1,12 +1,14 @@ import unittest -import tensorflow as tf + import numpy as np +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.util import disable_random +from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.data.dataloaders.dataloader import DataLoader from neural_compressor.quantization import fit -from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.utils.utility import set_random_seed -from tensorflow.compat.v1 import graph_util class TestSmoothQuantTF(unittest.TestCase): @@ -24,48 +26,53 @@ def test_conv_sq(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) set_random_seed(9527) config = PostTrainingQuantConfig( quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.5}}, - calibration_sampling_size=[500]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}, + calibration_sampling_size=[500], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(100, 56, 56, 16), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset, batch_size=1) + + dataset = Datasets("tensorflow")["dummy"](shape=(100, 56, 56, 16), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset, batch_size=1) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 2) @@ -75,12 +82,13 @@ def test_sq_matmul(self): x_data = np.random.rand(1024, 1024).astype(np.float32) y_data = np.random.rand(1024, 1024).astype(np.float32) import tensorflow.compat.v1 as tf - x = tf.placeholder(tf.float32, shape=[1024, 1024], name='x') + + x = tf.placeholder(tf.float32, shape=[1024, 1024], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[1024, 1024]) z = tf.matmul(x, y) bias = np.random.rand(1024).astype(np.float32) z = tf.nn.bias_add(z, bias) - z = tf.nn.relu(z, name='op_to_store') + z = tf.nn.relu(z, name="op_to_store") with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) @@ -89,24 +97,28 @@ def test_sq_matmul(self): set_random_seed(9527) config = PostTrainingQuantConfig( quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.5}}, - calibration_sampling_size=[1024]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}, + calibration_sampling_size=[1024], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(1024, 1024), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset, batch_size=1024) + + dataset = Datasets("tensorflow")["dummy"](shape=(1024, 1024), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset, batch_size=1024) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 1) @@ -117,51 +129,58 @@ def test_sq_conv_matmul(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv1_weights = tf.compat.v1.get_variable("weight_conv1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weight_conv1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x_pad, conv1_weights, strides=[1, 2, 2, 1], padding="VALID") - matmul_weights = tf.compat.v1.get_variable("weight_matmul", [28*28*16, 7*7*32], - initializer=tf.compat.v1.random_normal_initializer()) - conv1_reshaped = tf.reshape(conv1, shape=[-1, 28*28*16]) + matmul_weights = tf.compat.v1.get_variable( + "weight_matmul", [28 * 28 * 16, 7 * 7 * 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv1_reshaped = tf.reshape(conv1, shape=[-1, 28 * 28 * 16]) matmul = tf.matmul(conv1_reshaped, matmul_weights) reshape = tf.reshape(matmul, (1, 7, 7, 32)) - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [7, 7, 32, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [7, 7, 32, 1], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(reshape, conv2_weights, strides=[1, 2, 2, 1], padding="VALID") - leaky_relu = tf.nn.leaky_relu(conv2, name='op_to_store') + leaky_relu = tf.nn.leaky_relu(conv2, name="op_to_store") - out_name = leaky_relu.name.split(':')[0] + out_name = leaky_relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) set_random_seed(9527) config = PostTrainingQuantConfig( quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.6}}, - calibration_sampling_size=[500]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.6}}, + calibration_sampling_size=[500], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(100, 56, 56, 16), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset) + + dataset = Datasets("tensorflow")["dummy"](shape=(100, 56, 56, 16), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 3) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorboard.py b/test/adaptor/tensorflow_adaptor/test_tensorboard.py index b8d3042a540..53ca82016c5 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorboard.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorboard.py @@ -1,21 +1,21 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os -import yaml import shutil +import unittest +import numpy as np import tensorflow as tf -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +import yaml +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util + from neural_compressor.adaptor.tf_utils.util import version1_gt_version2 + tf.compat.v1.disable_eager_execution() + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -36,155 +36,178 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_model(): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node.input.extend([bias_add_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" - conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv2_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape + ) + ) + ) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) - conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) - bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value2, bias_value2.dtype.type, bias_value2.shape))) + bias_node2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value2, bias_value2.dtype.type, bias_value2.shape) + ) + ) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" - bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) - bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node2.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" - relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" - conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv3_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape + ) + ) + ) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" - conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) - conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv3_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv3_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) identity_node = node_def_pb2.NodeDef() identity_node.name = "final" identity_node.op = "Identity" - identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + identity_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) identity_node.input.extend([conv3_node.name]) graph = graph_pb2.GraphDef() - graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - relu_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - relu_node2, - conv3_weight_node, - conv3_node, - identity_node - ]) + graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node, + ] + ) return graph -class TestTensorboard(unittest.TestCase): +class TestTensorboard(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -192,18 +215,19 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") shutil.rmtree("saved", ignore_errors=True) shutil.rmtree("runs/", ignore_errors=True) - - @unittest.skipIf(version1_gt_version2(tf.version.VERSION, '2.5.0'), \ - "Skip test_bf16_fallback case for tf 2.6.0 and above.") + + @unittest.skipIf( + version1_gt_version2(tf.version.VERSION, "2.5.0"), "Skip test_bf16_fallback case for tf 2.6.0 and above." + ) def test_run_basic_one_trial(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (1, 224, 224, 3), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", (1, 224, 224, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -211,5 +235,6 @@ def test_run_basic_one_trial(self): self.assertTrue(True if len(os.listdir("./runs/eval")) > 2 else False) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_bias_correction.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_bias_correction.py index a037b8c0be9..d62a32e318c 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_bias_correction.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_bias_correction.py @@ -1,50 +1,53 @@ import os import unittest + +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + import neural_compressor -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper +from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper from neural_compressor.adaptor.tf_utils.transform_graph.bias_correction import BiasCorrection -from neural_compressor.adaptor.tensorflow import TensorflowQuery -import tensorflow as tf -from tensorflow.compat.v1 import graph_util class TestBiasCorrection(unittest.TestCase): def test_bias_correction(self): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - if tf.version.VERSION <= '2.1.0': + if tf.version.VERSION <= "2.1.0": x = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.nn.bias_add(conv1, conv_bias) - relu = tf.nn.relu(conv_bias, name='Relu_1') - op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml")).get_eightbit_patterns() + relu = tf.nn.relu(conv_bias, name="Relu_1") + op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml") + ).get_eightbit_patterns() with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = QuantizeGraphHelper.remove_training_nodes( - output_graph_def, protected_nodes=[relu.name.split(':')[0]]) - inputs = [x.name.split(':')[0]] - outputs = [relu.name.split(':')[0]] + output_graph_def, protected_nodes=[relu.name.split(":")[0]] + ) + inputs = [x.name.split(":")[0]] + outputs = [relu.name.split(":")[0]] op_wise_config = { - "Conv2D": (False, 'minmax', False, 7.0), + "Conv2D": (False, "minmax", False, 7.0), } - int8_graph_def, _, _ = QuantizeGraphForIntel(output_graph_def, inputs, outputs, - op_wise_config, op_wise_sequences, - 'cpu').do_transform() - correct_graph_def = BiasCorrection( - int8_graph_def, output_graph_def).do_transformation() - self.assertEqual(len(correct_graph_def.node), len(int8_graph_def.node)) - + int8_graph_def, _, _ = QuantizeGraphForIntel( + output_graph_def, inputs, outputs, op_wise_config, op_wise_sequences, "cpu" + ).do_transform() + correct_graph_def = BiasCorrection(int8_graph_def, output_graph_def).do_transformation() + self.assertEqual(len(correct_graph_def.node), len(int8_graph_def.node)) + + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_calculate_op_sensitivity.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_calculate_op_sensitivity.py index 5a9c5af6c0e..076fb27c666 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_calculate_op_sensitivity.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_calculate_op_sensitivity.py @@ -1,11 +1,13 @@ import os import shutil import unittest -import tensorflow as tf + import numpy as np +import tensorflow as tf + def build_msev2_yaml(): - mse_yaml = ''' + mse_yaml = """ model: name: fake_yaml framework: tensorflow @@ -24,79 +26,87 @@ def build_msev2_yaml(): exit_policy: max_trials: 10 timeout: 3600 - ''' - with open('mse_yaml.yaml', 'w', encoding="utf-8") as f: + """ + with open("mse_yaml.yaml", "w", encoding="utf-8") as f: f.write(mse_yaml) + def build_fake_model(): try: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d( + input=op, + filters=z, + strides=[1, 1, 1, 1], + padding="VALID", + ) + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID") + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + class TestGetOutputTensor(unittest.TestCase): @classmethod def setUpClass(self): build_msev2_yaml() self.model = build_fake_model() - + @classmethod def tearDownClass(self): - os.remove('mse_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - + os.remove("mse_yaml.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + def test_get_output_op_names(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('mse_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("mse_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.model qmodel = quantizer.fit() - - self.assertEqual( - quantizer.strategy.adaptor.get_output_op_names(qmodel), - ["Conv2D_dummy_biasadd"]) + self.assertEqual(quantizer.strategy.adaptor.get_output_op_names(qmodel), ["Conv2D_dummy_biasadd"]) def test_calculate_op_sensitivity(self): from neural_compressor.experimental import Quantization, common - + quantizer = Quantization("mse_yaml.yaml") quantizer.model = self.model - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.pre_process() @@ -114,23 +124,27 @@ def test_calculate_op_sensitivity(self): tune_cfg=tune_cfg, output_op_names=output_op_names, confidence_batches=1, - fallback=True) - self.assertIn(('op_to_store', 'conv2d'), op_sensitivity) - self.assertIn(('Conv2D', 'conv2d'), op_sensitivity) - - tune_cfg['op'][('op_to_store', 'conv2d')] = { - 'activation': {'dtype': 'fp32', 'quant_mode': 'fp32'}, - 'weight': {'dtype': 'fp32'}} - + fallback=True, + ) + self.assertIn(("op_to_store", "conv2d"), op_sensitivity) + self.assertIn(("Conv2D", "conv2d"), op_sensitivity) + + tune_cfg["op"][("op_to_store", "conv2d")] = { + "activation": {"dtype": "fp32", "quant_mode": "fp32"}, + "weight": {"dtype": "fp32"}, + } + op_sensitivity = adaptor.calculate_op_sensitivity( model=quantizer.model, dataloader=dataloader, tune_cfg=tune_cfg, output_op_names=output_op_names, confidence_batches=1, - fallback=True) - self.assertNotIn(('op_to_store', 'conv2d'), op_sensitivity) - self.assertIn(('Conv2D', 'conv2d'), op_sensitivity) + fallback=True, + ) + self.assertNotIn(("op_to_store", "conv2d"), op_sensitivity) + self.assertIn(("Conv2D", "conv2d"), op_sensitivity) + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_convert_layout.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_convert_layout.py index 4ebd856a358..c3ac386d5af 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_convert_layout.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_convert_layout.py @@ -1,35 +1,38 @@ import unittest -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic import convert_layout + import tensorflow as tf from tensorflow.compat.v1 import graph_util + +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic import convert_layout from neural_compressor.adaptor.tf_utils.util import version1_gte_version2 + class TestConvertLayout(unittest.TestCase): def test_convert_layout(self): tf.compat.v1.disable_eager_execution() - with tf.device('/CPU:0'): + with tf.device("/CPU:0"): x = tf.compat.v1.placeholder(tf.float32, [1, 10, 10, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 10, 3], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID", - data_format='NCHW') - relu = tf.nn.relu(conv, name='relu') - out_name = relu.name.split(':')[0] + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 10, 3], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID", data_format="NCHW") + relu = tf.nn.relu(conv, name="relu") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) convert = convert_layout.ConvertLayoutOptimizer(output_graph_def, [out_name]) convert_graph = convert.do_transformation() for node in convert_graph.node: - if node.op == 'Conv2D' and 'data_format' in node.attr: - if version1_gte_version2(tf.version.VERSION, '2.4.0'): - self.assertEqual(node.attr['data_format'].s, b'NHWC') + if node.op == "Conv2D" and "data_format" in node.attr: + if version1_gte_version2(tf.version.VERSION, "2.4.0"): + self.assertEqual(node.attr["data_format"].s, b"NHWC") else: - self.assertEqual(node.attr['data_format'].s, b'NCHW') + self.assertEqual(node.attr["data_format"].s, b"NCHW") + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_data_pipline.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_data_pipline.py index e0e8216d043..4597dc9f220 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_data_pipline.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_data_pipline.py @@ -2,56 +2,53 @@ # -*- coding: utf-8 -*- # import unittest + import numpy as np -from neural_compressor.objective import Performance +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name, iterator_sess_run +from neural_compressor.objective import Performance -import tensorflow as tf -from tensorflow.compat.v1 import graph_util -class TestDataPipelineConvert(unittest.TestCase): +class TestDataPipelineConvert(unittest.TestCase): def test_data_pipeline(self): tf.compat.v1.disable_eager_execution() - raw_dataset = np.ones([100,224, 224, 3], dtype=np.float32) + raw_dataset = np.ones([100, 224, 224, 3], dtype=np.float32) tf_dataset = tf.compat.v1.data.Dataset.from_tensor_slices(raw_dataset) tf_dataset = tf_dataset.batch(1) ds_iterator = tf_dataset.make_initializable_iterator() iter_tensors = ds_iterator.get_next() - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) - conv1 = tf.nn.conv2d(iter_tensors, conv_weights, - strides=[1, 1, 1, 1], padding="SAME") + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) + conv1 = tf.nn.conv2d(iter_tensors, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) - relu = tf.nn.relu(conv_bias, name='Relu_1') + relu = tf.nn.relu(conv_bias, name="Relu_1") - output_names=[relu.name.split(':')[0], 'MakeIterator'] + output_names = [relu.name.split(":")[0], "MakeIterator"] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=output_names) - output_graph_def = QuantizeGraphHelper.remove_training_nodes( - output_graph_def, protected_nodes=output_names) + sess=sess, input_graph_def=sess.graph_def, output_node_names=output_names + ) + output_graph_def = QuantizeGraphHelper.remove_training_nodes(output_graph_def, protected_nodes=output_names) graph = tf.Graph() with graph.as_default(): - tf.import_graph_def(output_graph_def, name='') - print('graph has been generated....') - - iter_op = graph.get_operation_by_name('MakeIterator') - output_tensor = get_tensor_by_name(graph, output_names[0]) + tf.import_graph_def(output_graph_def, name="") + print("graph has been generated....") + + iter_op = graph.get_operation_by_name("MakeIterator") + output_tensor = get_tensor_by_name(graph, output_names[0]) sess = tf.compat.v1.Session(graph=graph) - iterator_sess_run(sess, iter_op, \ - feed_dict={}, output_tensor=output_tensor) + iterator_sess_run(sess, iter_op, feed_dict={}, output_tensor=output_tensor) measurer = Performance() - iterator_sess_run(sess, iter_op, feed_dict={}, \ - output_tensor=output_tensor, measurer=measurer) + iterator_sess_run(sess, iter_op, feed_dict={}, output_tensor=output_tensor, measurer=measurer) if __name__ == "__main__": diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_fold_batch_norm.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_fold_batch_norm.py index 80e73958816..d857c6a4ad8 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_fold_batch_norm.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_fold_batch_norm.py @@ -1,35 +1,34 @@ -import unittest -import numpy as np import copy -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer +import unittest +import numpy as np import tensorflow as tf from tensorflow.compat.v1 import graph_util + +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper + + class TestFoldBatchnorm(unittest.TestCase): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) - beta = tf.compat.v1.get_variable(name='beta', - shape=[32], - initializer=tf.compat.v1.random_normal_initializer()) - gamma = tf.compat.v1.get_variable(name='gamma', - shape=[32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) + beta = tf.compat.v1.get_variable(name="beta", shape=[32], initializer=tf.compat.v1.random_normal_initializer()) + gamma = tf.compat.v1.get_variable(name="gamma", shape=[32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.nn.bias_add(conv1, conv_bias) normed = tf.compat.v1.layers.batch_normalization(conv_bias) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[normed.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[normed.name.split(":")[0]] + ) output_graph_def = QuantizeGraphHelper.remove_training_nodes( - output_graph_def, protected_nodes=[normed.name.split(':')[0]]) + output_graph_def, protected_nodes=[normed.name.split(":")[0]] + ) graph_def = copy.deepcopy(output_graph_def) fold_graph_def = FoldBatchNormNodesOptimizer(output_graph_def).do_transformation() @@ -38,20 +37,20 @@ def test_fold_output_values(self): graph = tf.compat.v1.Graph() fold_graph = tf.compat.v1.Graph() with graph.as_default(): - tf.compat.v1.import_graph_def(self.graph_def, name='') + tf.compat.v1.import_graph_def(self.graph_def, name="") with tf.compat.v1.Session(graph=graph) as sess: sess.run(tf.compat.v1.global_variables_initializer()) - x = graph.get_tensor_by_name('input:0') - normed = graph.get_tensor_by_name('batch_normalization/FusedBatchNormV3:0') + x = graph.get_tensor_by_name("input:0") + normed = graph.get_tensor_by_name("batch_normalization/FusedBatchNormV3:0") y = sess.run(normed, feed_dict={x: input_data}) with fold_graph.as_default(): - tf.compat.v1.import_graph_def(self.fold_graph_def, name='') + tf.compat.v1.import_graph_def(self.fold_graph_def, name="") with tf.compat.v1.Session(graph=fold_graph) as sess: sess.run(tf.compat.v1.global_variables_initializer()) - x = fold_graph.get_tensor_by_name('input:0') - normed = fold_graph.get_tensor_by_name('batch_normalization/FusedBatchNormV3:0') + x = fold_graph.get_tensor_by_name("input:0") + normed = fold_graph.get_tensor_by_name("batch_normalization/FusedBatchNormV3:0") y_fold = sess.run(normed, feed_dict={x: input_data}) assert np.allclose(y, y_fold, rtol=1e-05, atol=1e-05) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_fold_const.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_fold_const.py index b98be56670b..3d8bea52e49 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_fold_const.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_fold_const.py @@ -1,13 +1,11 @@ import unittest -import numpy as np -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_constant import GraphFoldConstantOptimizer +import numpy as np import tensorflow as tf -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 from tensorflow.python.framework import tensor_util +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_constant import GraphFoldConstantOptimizer class TestFoldConstant(unittest.TestCase): @@ -20,16 +18,20 @@ class TestFoldConstant(unittest.TestCase): input0_node.op = "Const" input0_value = np.float32(np.abs(np.random.randn(4, 3, 2))) input0_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input0_value, input0_value.dtype.type, input0_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input0_value, input0_value.dtype.type, input0_value.shape) + ) + ) input1_node = node_def_pb2.NodeDef() input1_node.name = "input1" input1_node.op = "Const" input1_value = np.float32(np.abs(np.random.randn(4, 1, 1))) input1_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input1_value, input1_value.dtype.type, input1_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input1_value, input1_value.dtype.type, input1_value.shape) + ) + ) add_node = node_def_pb2.NodeDef() add_node.op = "Add" @@ -41,16 +43,20 @@ class TestFoldConstant(unittest.TestCase): input2_node.op = "Const" input2_value = np.float32(np.abs(np.random.randn(1))) input2_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input2_value, input2_value.dtype.type, input2_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input2_value, input2_value.dtype.type, input2_value.shape) + ) + ) input3_node = node_def_pb2.NodeDef() input3_node.name = "input3" input3_node.op = "Const" input3_value = np.float32(np.abs(np.random.randn(1))) input3_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input3_value, input3_value.dtype.type, input3_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input3_value, input3_value.dtype.type, input3_value.shape) + ) + ) switch_node = node_def_pb2.NodeDef() switch_node.name = "switch" @@ -61,8 +67,10 @@ class TestFoldConstant(unittest.TestCase): input4_node.op = "Const" input4_value = np.float32(np.abs(np.random.randn(1))) input4_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input4_value, input4_value.dtype.type, input4_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input4_value, input4_value.dtype.type, input4_value.shape) + ) + ) input4_node.input.extend([switch_node.name]) input5_node = node_def_pb2.NodeDef() @@ -70,8 +78,10 @@ class TestFoldConstant(unittest.TestCase): input5_node.op = "Const" input5_value = np.float32(np.abs(np.random.randn(1))) input5_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input5_value, input5_value.dtype.type, input5_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input5_value, input5_value.dtype.type, input5_value.shape) + ) + ) input5_node.input.extend([switch_node.name]) cond_end = node_def_pb2.NodeDef() @@ -110,26 +120,34 @@ class TestFoldConstant(unittest.TestCase): end_node.input.extend([block_node.name, res_node.name]) graph_def = graph_pb2.GraphDef() - graph_def.node.extend([ - x_node, input0_node, input1_node, input2_node, input3_node, add_node, mul_node, sqrt_node, - relu_node, block_node, res_node, end_node - ]) + graph_def.node.extend( + [ + x_node, + input0_node, + input1_node, + input2_node, + input3_node, + add_node, + mul_node, + sqrt_node, + relu_node, + block_node, + res_node, + end_node, + ] + ) def test_fold_constant(self): - graph = self.graph_def rewriter = GraphFoldConstantOptimizer(graph) new_graph = rewriter.do_transformation() for node in new_graph.node: - assert node.name in [ - "placeholder", "block_output", "rsqrt_const", "relu", "res_add_const", "end" - ] + assert node.name in ["placeholder", "block_output", "rsqrt_const", "relu", "res_add_const", "end"] def test_condition_fold_constant(self): graph_def = graph_pb2.GraphDef() - graph_def.node.extend([self.cond_end, self.input4_node, - self.input5_node, self.switch_node]) + graph_def.node.extend([self.cond_end, self.input4_node, self.input5_node, self.switch_node]) rewriter = GraphFoldConstantOptimizer(graph_def) new_graph = rewriter.do_transformation() for node in new_graph.node: @@ -142,16 +160,20 @@ def test_slice_int_input(self): index0_node.op = "Const" index0_value = np.array(3).astype(np.int32).reshape(()) index0_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - index0_value, index0_value.dtype.type, index0_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(index0_value, index0_value.dtype.type, index0_value.shape) + ) + ) index1_node = node_def_pb2.NodeDef() index1_node.name = "index1" index1_node.op = "Const" index1_value = np.array(1).astype(np.int32).reshape(()) index1_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - index1_value, index1_value.dtype.type, index1_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(index1_value, index1_value.dtype.type, index1_value.shape) + ) + ) minus_node = node_def_pb2.NodeDef() minus_node.name = "sub" @@ -164,5 +186,6 @@ def test_slice_int_input(self): with tf.compat.v1.Session() as sess: tf.compat.v1.import_graph_def(new_graph) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_get_estimator_graph.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_get_estimator_graph.py index b812904e0f0..3e2cd0afb4e 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_get_estimator_graph.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_get_estimator_graph.py @@ -1,46 +1,42 @@ # # -*- coding: utf-8 -*- # -import unittest import os import platform -from neural_compressor.adaptor.tf_utils.util import get_estimator_graph +import unittest import tensorflow as tf -class TestEstimatorGraphConvert(unittest.TestCase): +from neural_compressor.adaptor.tf_utils.util import get_estimator_graph + + +class TestEstimatorGraphConvert(unittest.TestCase): @classmethod def setUpClass(self): - self.dst_path = '/tmp/.neural_compressor/train.csv' - self.titanic_file = tf.keras.utils.get_file(self.dst_path, \ - "https://storage.googleapis.com/tf-datasets/titanic/train.csv") + self.dst_path = "/tmp/.neural_compressor/train.csv" + self.titanic_file = tf.keras.utils.get_file( + self.dst_path, "https://storage.googleapis.com/tf-datasets/titanic/train.csv" + ) def test_get_estimator_graph(self): def train_input_fn(): - titanic = tf.data.experimental.make_csv_dataset( - self.titanic_file, batch_size=32, - label_name="survived") - titanic_batches = ( - titanic.cache().repeat().shuffle(500) - .prefetch(tf.data.experimental.AUTOTUNE)) - return titanic_batches - age = tf.feature_column.numeric_column('age') - cls = tf.feature_column.categorical_column_with_vocabulary_list('class', \ - ['First', 'Second', 'Third']) - embark = tf.feature_column.categorical_column_with_hash_bucket('embark_town', 32) + titanic = tf.data.experimental.make_csv_dataset(self.titanic_file, batch_size=32, label_name="survived") + titanic_batches = titanic.cache().repeat().shuffle(500).prefetch(tf.data.experimental.AUTOTUNE) + return titanic_batches + + age = tf.feature_column.numeric_column("age") + cls = tf.feature_column.categorical_column_with_vocabulary_list("class", ["First", "Second", "Third"]) + embark = tf.feature_column.categorical_column_with_hash_bucket("embark_town", 32) import tempfile + model_dir = tempfile.mkdtemp() - model = tf.estimator.LinearClassifier( - model_dir=model_dir, - feature_columns=[embark, cls, age], - n_classes=2 - ) + model = tf.estimator.LinearClassifier(model_dir=model_dir, feature_columns=[embark, cls, age], n_classes=2) model = model.train(input_fn=train_input_fn, steps=100) result = model.evaluate(train_input_fn, steps=10) graph = get_estimator_graph(model, train_input_fn) - self.assertTrue(isinstance(graph, tf.Graph)) + self.assertTrue(isinstance(graph, tf.Graph)) graph_def = graph.as_graph_def() self.assertGreater(len(graph_def.node), 1) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_gpu.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_gpu.py index 289295fd57b..eaf85b49d20 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_gpu.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_gpu.py @@ -1,14 +1,16 @@ # # -*- coding: utf-8 -*- # -import platform -import unittest +import logging import os +import platform import sys +import unittest +from importlib.abc import MetaPathFinder + import cpuinfo -import logging import tensorflow as tf -from importlib.abc import MetaPathFinder + class ForbiddenModules(MetaPathFinder): def __init__(self, modules): @@ -19,12 +21,16 @@ def find_spec(self, fullname, path, target=None): if fullname in self.modules: raise ImportError(fullname) + class TestTensorflowGpu(unittest.TestCase): - mb_model_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb' - pb_path = '/tmp/.neural_compressor/mobilenet_fp32.pb' + mb_model_url = ( + "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb" + ) + pb_path = "/tmp/.neural_compressor/mobilenet_fp32.pb" platforms = platform.system().lower() if platforms == "windows": - pb_path = 'C:\\tmp\\.neural_compressor\\mobilenet_fp32.pb' + pb_path = "C:\\tmp\\.neural_compressor\\mobilenet_fp32.pb" + @classmethod def setUpClass(cls): sys.meta_path.insert(0, ForbiddenModules({"intel_extension_for_pytorch"})) @@ -32,8 +38,9 @@ def setUpClass(cls): if cls.platforms == "linux": os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(cls.mb_model_url, cls.pb_path)) elif cls.platforms == "windows": - os.system('md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor') + os.system("md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor") from urllib import request + request.urlretrieve(cls.mb_model_url) cls.log_env = os.environ.get("LOGLEVEL") cls.logger_root = logging.getLogger() @@ -43,35 +50,48 @@ def setUpClass(cls): cls.logger_root_level = cls.logger_root.level cls.logger_nc.warning(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") cls.logger_nc.warning(f"Environment variable: LOGLEVEL = {cls.log_env}") - cls.logger_nc.warning(f"Before importing neural_compressor: {sys.modules[__name__].__file__}-{cls.__name__}, " \ - f"Root_Logger_Level = {cls.logger_root.level}") - cls.logger_nc.warning(f"Before importing neural_compressor: {sys.modules[__name__].__file__}-{cls.__name__}, " \ - f"NC_Logger_Level = {cls.logger_nc.level}") + cls.logger_nc.warning( + f"Before importing neural_compressor: {sys.modules[__name__].__file__}-{cls.__name__}, " + f"Root_Logger_Level = {cls.logger_root.level}" + ) + cls.logger_nc.warning( + f"Before importing neural_compressor: {sys.modules[__name__].__file__}-{cls.__name__}, " + f"NC_Logger_Level = {cls.logger_nc.level}" + ) import neural_compressor from neural_compressor.adaptor.tensorflow import TensorflowQuery - cls.op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml")).get_eightbit_patterns() - cls.logger_nc.warning(f"After importing neural_compressor: {sys.modules[__name__].__file__}-{cls.__name__}, " \ - f"Root_Logger_Level = {cls.logger_root.level}") - cls.logger_nc.warning(f"After importing neural_compressor: {sys.modules[__name__].__file__}-{cls.__name__}, " \ - f"NC_Logger_Level = {cls.logger_nc.level}") + + cls.op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml") + ).get_eightbit_patterns() + cls.logger_nc.warning( + f"After importing neural_compressor: {sys.modules[__name__].__file__}-{cls.__name__}, " + f"Root_Logger_Level = {cls.logger_root.level}" + ) + cls.logger_nc.warning( + f"After importing neural_compressor: {sys.modules[__name__].__file__}-{cls.__name__}, " + f"NC_Logger_Level = {cls.logger_nc.level}" + ) def test_tensorflow_gpu_conversion(self): - from neural_compressor.adaptor.tf_utils.util import read_graph + from neural_compressor.adaptor.tf_utils.graph_rewriter.int8.post_hostconst_converter import ( + PostHostConstConverter, + ) from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel - from neural_compressor.adaptor.tf_utils.graph_rewriter.int8.post_hostconst_converter import PostHostConstConverter + from neural_compressor.adaptor.tf_utils.util import read_graph + input_graph_def = read_graph(self.pb_path) - input_node_names = ['Placeholder'] - output_node_names = ['MobilenetV1/Predictions/Reshape_1'] - op_wise_config = { - 'MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Conv2D': (False, 'minmax', False, 7.0)} + input_node_names = ["Placeholder"] + output_node_names = ["MobilenetV1/Predictions/Reshape_1"] + op_wise_config = {"MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Conv2D": (False, "minmax", False, 7.0)} tf.compat.v1.disable_eager_execution() converter = QuantizeGraphForIntel( - input_graph_def, input_node_names, output_node_names, op_wise_config, self.op_wise_sequences, 'gpu') + input_graph_def, input_node_names, output_node_names, op_wise_config, self.op_wise_sequences, "gpu" + ) converted_pb, _, _ = converter.do_transform() hostconst_pb = PostHostConstConverter(converted_pb).do_transformation() - target_node_name = 'MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Conv2D_eightbit_quantized_conv' + target_node_name = "MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Conv2D_eightbit_quantized_conv" node_details = {} for i in hostconst_pb.node: @@ -85,7 +105,7 @@ def test_tensorflow_gpu_conversion(self): weights_max_node = node_details[target_node.input[-1]] self.assertEqual(weights_max_node.op, "HostConst") self.assertEqual(weights_min_node.op, "HostConst") - + self.assertEqual(self.logger_root.level, self.logger_root_level) if self.log_env: self.assertEqual(logging.getLevelName(self.logger_nc.level), self.log_env) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_cac.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_cac.py index 7c0f9a369db..544cc99a59d 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_cac.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_cac.py @@ -2,31 +2,27 @@ # -*- coding: utf-8 -*- # import unittest -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper -from neural_compressor.adaptor.tf_utils.graph_rewriter.int8.scale_propagation import \ - ScaleProPagationTransformer import tensorflow as tf from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import dtypes +from neural_compressor.adaptor.tf_utils.graph_rewriter.int8.scale_propagation import ScaleProPagationTransformer +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper class TestGraphScaleProPagation(unittest.TestCase): def test_scale_propagation(self): """Test scale propagation for below pattern - requantize + quantizedavgpool+ quantized conv2d + requantize. - """ + requantize + quantizedavgpool+ quantized conv2d + requantize.""" tf.compat.v1.disable_eager_execution() input_constant_name = "input_constant" relu_name = "relu" float_graph_def = graph_pb2.GraphDef() input_constant = QuantizeGraphHelper.create_constant_node( - input_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 1]) + input_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 1] + ) float_graph_def.node.extend([input_constant]) requantize_min_name = "requantize_min_const" requantize_min = QuantizeGraphHelper.create_constant_node( @@ -42,22 +38,31 @@ def test_scale_propagation(self): dtype=dtypes.float32, ) float_graph_def.node.extend([requantize_max]) - relu_node = QuantizeGraphHelper.create_node("Requantize", relu_name, [ - input_constant_name, input_constant_name + ':1', - input_constant_name + ':2', requantize_min_name, - requantize_max_name - ]) + relu_node = QuantizeGraphHelper.create_node( + "Requantize", + relu_name, + [ + input_constant_name, + input_constant_name + ":1", + input_constant_name + ":2", + requantize_min_name, + requantize_max_name, + ], + ) QuantizeGraphHelper.set_attr_dtype(relu_node, "Tinput", dtypes.qint32) - QuantizeGraphHelper.set_attr_dtype(relu_node, "out_type", - dtypes.quint8) + QuantizeGraphHelper.set_attr_dtype(relu_node, "out_type", dtypes.quint8) float_graph_def.node.extend([relu_node]) b_constant_name = "b_constant" mat_mul_name = "mat_mul" b_constant = QuantizeGraphHelper.create_constant_node( - b_constant_name, value=[0], dtype=dtypes.float32, shape=[ + b_constant_name, + value=[0], + dtype=dtypes.float32, + shape=[ 1, - ]) + ], + ) float_graph_def.node.extend([b_constant]) avgpool_max_constant_name = "avgpool_max_constant" @@ -68,11 +73,12 @@ def test_scale_propagation(self): dtype=dtypes.float32, shape=[ 1, - ]) + ], + ) float_graph_def.node.extend([avgpool_max]) quantized_avgpool = QuantizeGraphHelper.create_node( - "QuantizedAvgPool", mat_mul_name, - [relu_name, b_constant_name, avgpool_max_constant_name]) + "QuantizedAvgPool", mat_mul_name, [relu_name, b_constant_name, avgpool_max_constant_name] + ) QuantizeGraphHelper.set_attr_dtype(quantized_avgpool, "T", dtypes.float32) float_graph_def.node.extend([quantized_avgpool]) @@ -81,14 +87,12 @@ def test_scale_propagation(self): offset_constant_name = "offset_constant" offset_constant = QuantizeGraphHelper.create_constant_node( - offset_constant_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) + offset_constant_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) float_graph_def.node.extend([offset_constant]) bias_add_node = QuantizeGraphHelper.create_node( - "QuantizedConv2DWithBiasAndRelu", bias_add_name, - [mat_mul_name, offset_constant_name]) + "QuantizedConv2DWithBiasAndRelu", bias_add_name, [mat_mul_name, offset_constant_name] + ) QuantizeGraphHelper.set_attr_dtype(bias_add_node, "T", dtypes.float32) float_graph_def.node.extend([bias_add_node]) post_min_value = -1 @@ -110,26 +114,31 @@ def test_scale_propagation(self): post_requantize_name = "post_requantize" post_requantize_node = QuantizeGraphHelper.create_node( - "Requantize", post_requantize_name, [ - bias_add_name, bias_add_name + ':1', bias_add_name + ':2', - post_requantize_min_name, post_requantize_max_name - ]) + "Requantize", + post_requantize_name, + [ + bias_add_name, + bias_add_name + ":1", + bias_add_name + ":2", + post_requantize_min_name, + post_requantize_max_name, + ], + ) float_graph_def.node.extend([post_requantize_node]) - optimized_graph = ScaleProPagationTransformer( - float_graph_def).do_transformation() + optimized_graph = ScaleProPagationTransformer(float_graph_def).do_transformation() update_min_value = None update_max_value = None for node in optimized_graph.node: - if node.name == 'relu_cac_requantize_min_value': - update_min_value = node.attr['value'].tensor.float_val[0] + if node.name == "relu_cac_requantize_min_value": + update_min_value = node.attr["value"].tensor.float_val[0] - if node.name == 'relu_cac_requantize_max_value': - update_max_value = node.attr['value'].tensor.float_val[0] + if node.name == "relu_cac_requantize_max_value": + update_max_value = node.attr["value"].tensor.float_val[0] self.assertEqual(update_min_value, post_min_value) self.assertEqual(update_max_value, post_max_value) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_column_wise_mul.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_column_wise_mul.py index 1dec712a735..ff657c942f1 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_column_wise_mul.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_column_wise_mul.py @@ -2,15 +2,16 @@ # -*- coding: utf-8 -*- # import unittest + import numpy as np +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_column_wise_mul import FuseColumnWiseMulOptimizer from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util class TestColumnWiseMulFusion(unittest.TestCase): - @disable_random() def test_conv_mul_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") @@ -22,26 +23,25 @@ def test_conv_mul_fusion(self): mul_tensor = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32) mul = tf.math.multiply(conv, mul_tensor) relu = tf.nn.relu(mul) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) output_graph_def = FuseColumnWiseMulOptimizer(output_graph_def).do_transformation() found_mul = False for i in output_graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": found_mul = True break self.assertEqual(found_mul, False) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_concat.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_concat.py index f5999d5a0dd..da161bd2737 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_concat.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_concat.py @@ -1,22 +1,22 @@ # # # -*- coding: utf-8 -*- -import unittest import os import platform -import yaml -import neural_compressor -from neural_compressor.adaptor.tf_utils.util import read_graph -from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel -from neural_compressor.adaptor.tensorflow import TensorflowQuery -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +import neural_compressor +from neural_compressor.adaptor.tensorflow import TensorflowQuery +from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel +from neural_compressor.adaptor.tf_utils.util import disable_random, read_graph + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -43,48 +43,52 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() class TestTensorflowConcat(unittest.TestCase): - mb_model_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/inceptionv3_fp32_pretrained_model.pb' - pb_path = '/tmp/.neural_compressor/inceptionv3_fp32.pb' + mb_model_url = ( + "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/inceptionv3_fp32_pretrained_model.pb" + ) + pb_path = "/tmp/.neural_compressor/inceptionv3_fp32.pb" platform = platform.system().lower() if platform == "windows": - pb_path = 'C:\\tmp\\.neural_compressor\\inceptionv3_fp32.pb' + pb_path = "C:\\tmp\\.neural_compressor\\inceptionv3_fp32.pb" + @classmethod def setUpClass(self): if not os.path.exists(self.pb_path) and self.platform == "linux": - os.system( - "mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.mb_model_url, self.pb_path)) - self.op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml")).get_eightbit_patterns() + os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.mb_model_url, self.pb_path)) + self.op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml") + ).get_eightbit_patterns() build_fake_yaml() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") + @unittest.skipIf(tf.__version__ < "2.0", "does not support on 1.15up3") def test_tensorflow_concat_quantization(self): - output_graph_def = read_graph(self.pb_path) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 299, 299, 3), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 299, 299, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() found_quantized_concat_node = False - target_concat_node_name = 'v0/cg/incept_v3_a0/concat_eightbit_quantized_concatv2' + target_concat_node_name = "v0/cg/incept_v3_a0/concat_eightbit_quantized_concatv2" from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer + cur_graph = GraphAnalyzer() cur_graph.graph = output_graph.graph_def graph_info = cur_graph.parse_graph() @@ -96,10 +100,8 @@ def test_tensorflow_concat_quantization(self): # print (input_conv_name, graph_info[input_conv_name].node.input) min_freezed_out_name = graph_info[input_conv_name].node.input[-2] max_freezed_out_name = graph_info[input_conv_name].node.input[-1] - min_freezed_out_value = ( - graph_info[min_freezed_out_name].node.attr['value'].tensor.float_val)[0] - max_freezed_out_value = ( - graph_info[max_freezed_out_name].node.attr['value'].tensor.float_val)[0] + min_freezed_out_value = (graph_info[min_freezed_out_name].node.attr["value"].tensor.float_val)[0] + max_freezed_out_value = (graph_info[max_freezed_out_name].node.attr["value"].tensor.float_val)[0] min_out.append(min_freezed_out_value) max_out.append(max_freezed_out_value) @@ -108,38 +110,33 @@ def test_tensorflow_concat_quantization(self): @disable_random() def test_concat_with_different_input_type(self): - x = tf.compat.v1.placeholder( - tf.float32, [1, 128, 128, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) + x = tf.compat.v1.placeholder(tf.float32, [1, 128, 128, 16], name="input") + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) sqrt = tf.math.sqrt(x) relu_sqrt = tf.nn.relu(sqrt) - conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[ - 1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - conv1 = tf.nn.conv2d(x, conv_weights, strides=[ - 1, 2, 2, 1], padding="SAME", name='last') + conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) concat = tf.concat([relu, conv_bias], 1) - final_node = tf.nn.relu(concat, name='op_to_store') - out_name = final_node.name.split(':')[0] + final_node = tf.nn.relu(concat, name="op_to_store") + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 128, 128, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 128, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -147,8 +144,8 @@ def test_concat_with_different_input_type(self): quantized_concat = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedConcatV2': - quantized_concat = True + if i.op == "QuantizedConcatV2": + quantized_concat = True self.assertEqual(quantized_concat, False) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_add_relu_fusion.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_add_relu_fusion.py index a8b0933c796..d7f5f9a5ec5 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_add_relu_fusion.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_add_relu_fusion.py @@ -1,50 +1,54 @@ import os import unittest + +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + import neural_compressor -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper -from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel from neural_compressor.adaptor.tensorflow import TensorflowQuery +from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util + class TestConvAddRelu(unittest.TestCase): @disable_random() def test_conv_add_relu(self): x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - if tf.version.VERSION <= '2.1.0': + if tf.version.VERSION <= "2.1.0": x = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.nn.bias_add(conv1, conv_bias) - relu = tf.nn.relu(conv_bias, name='Relu_1') - op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml")).get_eightbit_patterns() + relu = tf.nn.relu(conv_bias, name="Relu_1") + op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml") + ).get_eightbit_patterns() with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = QuantizeGraphHelper.remove_training_nodes( - output_graph_def, protected_nodes=[relu.name.split(':')[0]]) - inputs = [x.name.split(':')[0]] - outputs = [relu.name.split(':')[0]] + output_graph_def, protected_nodes=[relu.name.split(":")[0]] + ) + inputs = [x.name.split(":")[0]] + outputs = [relu.name.split(":")[0]] op_wise_config = { - "Conv2D": (False, 'minmax', False, 7.0), + "Conv2D": (False, "minmax", False, 7.0), } - fold_graph_def, _, _ = QuantizeGraphForIntel(output_graph_def, inputs, outputs, - op_wise_config, op_wise_sequences, - 'cpu').do_transform() + fold_graph_def, _, _ = QuantizeGraphForIntel( + output_graph_def, inputs, outputs, op_wise_config, op_wise_sequences, "cpu" + ).do_transform() found_QuantizedConv2DWithBiasAndRelu = False for i in fold_graph_def.node: - if i.op == 'QuantizedConv2DWithBiasAndRelu': + if i.op == "QuantizedConv2DWithBiasAndRelu": found_QuantizedConv2DWithBiasAndRelu = True break self.assertEqual(found_QuantizedConv2DWithBiasAndRelu, True) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_as_output.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_as_output.py index a1c0a9ab256..1cd9fd0f5f6 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_as_output.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_as_output.py @@ -1,18 +1,17 @@ import os import shutil import unittest + import numpy as np +import tensorflow as tf +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util + from neural_compressor.experimental import Quantization, common -import tensorflow as tf -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -32,151 +31,177 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: + """ + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) f.close() + def create_test_graph(): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node.input.extend([bias_add_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" - conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv2_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape + ) + ) + ) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) - conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) - bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value2, bias_value2.dtype.type, bias_value2.shape))) + bias_node2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value2, bias_value2.dtype.type, bias_value2.shape) + ) + ) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" - bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) - bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node2.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" - relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) log_node = node_def_pb2.NodeDef() log_node.name = "log1" log_node.op = "Log" - log_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + log_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) log_node.input.extend([relu_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" - conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv3_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape + ) + ) + ) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" - conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([log_node.name, conv3_weight_node.name]) - conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv3_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv3_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - relu_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - log_node, - relu_node2, - conv3_weight_node, - conv3_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + log_node, + relu_node2, + conv3_weight_node, + conv3_node, + ] + ) return test_graph -@unittest.skipIf(tf.__version__ < '2.8.0', "only support spr-base TF") + + +@unittest.skipIf(tf.__version__ < "2.8.0", "only support spr-base TF") class TestConvAsOutput(unittest.TestCase): @classmethod def setUpClass(self): @@ -185,21 +210,22 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") shutil.rmtree("saved", ignore_errors=True) def test_do_transform(self): - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(1, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = create_test_graph() output_graph = quantizer.fit() - f = tf.io.gfile.GFile('ut.pb', 'wb') + f = tf.io.gfile.GFile("ut.pb", "wb") f.write(output_graph.graph_def.SerializeToString()) for node in output_graph.graph_def.node: - if node.name == 'conv3_eightbit_requantize': + if node.name == "conv3_eightbit_requantize": self.assertTrue("Quantized" in node.op) + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_fusion.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_fusion.py index 28314b87b71..1d10d5287d8 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_fusion.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_fusion.py @@ -1,23 +1,25 @@ # # -*- coding: utf-8 -*- # -import unittest import os import platform -import yaml +import unittest + import numpy as np +import tensorflow as tf +import yaml +from tensorflow.compat.v1 import graph_util + import neural_compressor -from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer from neural_compressor.adaptor.tensorflow import TensorflowQuery +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer +from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -43,9 +45,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -57,7 +59,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_conv_relu_fusion(self): @@ -65,23 +67,24 @@ def test_conv_relu_fusion(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -89,7 +92,7 @@ def test_conv_relu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu': + if i.op == "Relu": found_conv_fusion = False break @@ -102,23 +105,24 @@ def test_depthwiseconv_biasadd_fusion(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(x_pad, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - normed = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - out_name = normed.name.split(':')[0] + normed = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + out_name = normed.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -126,7 +130,7 @@ def test_depthwiseconv_biasadd_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedDepthwiseConv2DWithBias': + if i.op == "QuantizedDepthwiseConv2DWithBias": found_conv_fusion = True break @@ -137,23 +141,24 @@ def test_depthwiseconv_biasadd_fusion_with_negative_input(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(x_pad, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - normed = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - out_name = normed.name.split(':')[0] + normed = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + out_name = normed.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -161,36 +166,39 @@ def test_depthwiseconv_biasadd_fusion_with_negative_input(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedDepthwiseConv2DWithBias': + if i.op == "QuantizedDepthwiseConv2DWithBias": found_conv_fusion = True break self.assertEqual(found_conv_fusion, False) - @unittest.skipUnless(bool( - tf.version.VERSION.find('1.15.0-up') != -1 or tf.version.VERSION >= '2.1.0'), 'not supported the current tf version.') + @unittest.skipUnless( + bool(tf.version.VERSION.find("1.15.0-up") != -1 or tf.version.VERSION >= "2.1.0"), + "not supported the current tf version.", + ) @disable_random() def test_conv_biasadd_relu6_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu6 = tf.nn.relu6(normed, name='op_to_store') + relu6 = tf.nn.relu6(normed, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -198,7 +206,7 @@ def test_conv_biasadd_relu6_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu6': + if i.op == "Relu6": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -208,24 +216,25 @@ def test_conv_biasadd_add_relu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") - normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2,1,2,3,4,5,6,7,8,9,0,12,2,3,4])) + normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 2, 3, 4])) relu = tf.nn.relu(normed2 + tf.constant([3.0])) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -234,7 +243,7 @@ def test_conv_biasadd_add_relu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op.find('QuantizedConv2D') != -1: + if i.op.find("QuantizedConv2D") != -1: found_conv_fusion = True break @@ -245,25 +254,26 @@ def test_conv_squeeze_biasadd_relu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") squeeze = tf.squeeze(conv2) - normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2,1,2,3,4,5,6,7,8,9,0,12,2,3,4])) + normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 2, 3, 4])) relu = tf.nn.relu(normed2) - identity = tf.identity(relu, name='op_to_store') + identity = tf.identity(relu, name="op_to_store") - out_name = identity.name.split(':')[0] + out_name = identity.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -272,7 +282,7 @@ def test_conv_squeeze_biasadd_relu_fusion(self): correct_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedConv2DWithBiasAndReluAndRequantize': + if i.op == "QuantizedConv2DWithBiasAndReluAndRequantize": correct_conv_fusion = True break @@ -284,32 +294,34 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): top_relu = tf.nn.leaky_relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.AddV2(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.AddV2(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -318,7 +330,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedConv2DWithBiasAndRequantize': + if i.op == "QuantizedConv2DWithBiasAndRequantize": found_conv_fusion = True break @@ -330,32 +342,34 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_2(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.AddV2(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.AddV2(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -364,7 +378,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_2(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedConv2DWithBiasSignedSumAndReluAndRequantize': + if i.op == "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize": found_conv_fusion = True break @@ -376,8 +390,9 @@ def test_conv_fusion_with_last_matmul(self): top_relu = tf.nn.relu(x) # paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) # x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) @@ -394,19 +409,19 @@ def test_conv_fusion_with_last_matmul(self): y_1 = tf.constant(y_data_1, dtype=tf.float32, shape=[1, 1]) z_2nd_matmul = tf.matmul(relu1, y_1) - relu6 = tf.nn.relu6(z_2nd_matmul, name='op_to_store') + relu6 = tf.nn.relu6(z_2nd_matmul, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -414,7 +429,7 @@ def test_conv_fusion_with_last_matmul(self): quantize_v2_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_v2_count += 1 break @@ -424,35 +439,38 @@ def test_conv_fusion_with_last_matmul(self): def test_conv_fusion_with_last_conv(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_weights_2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights_2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(pooling, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID") - conv_weights_3 = tf.compat.v1.get_variable("weight3", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights_3 = tf.compat.v1.get_variable( + "weight3", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) relu2 = tf.nn.relu(conv2) conv3 = tf.nn.conv2d(relu2, conv_weights_3, strides=[1, 2, 2, 1], padding="VALID") relu3 = tf.nn.relu(conv3) - relu6 = tf.nn.relu6(relu3, name='op_to_store') + relu6 = tf.nn.relu6(relu3, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -460,7 +478,7 @@ def test_conv_fusion_with_last_conv(self): quantize_v2_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_v2_count += 1 break @@ -472,21 +490,22 @@ def test_conv_fusion_with_max_pooling(self): relu = tf.nn.relu(x) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_weights = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(pooling, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - biasadd = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - out_name = biasadd.name.split(':')[0] + biasadd = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + out_name = biasadd.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -496,35 +515,42 @@ def test_conv_fusion_with_max_pooling(self): quantized_conv_data_type = None for i in output_graph.graph_def.node: if i.op.find("QuantizedMaxPool") != -1: - quantized_pool_data_type = i.attr['T'].type + quantized_pool_data_type = i.attr["T"].type if i.op.find("QuantizedConv2D") != -1: - quantized_conv_data_type = i.attr['Tinput'].type + quantized_conv_data_type = i.attr["Tinput"].type self.assertNotEqual(quantized_pool_data_type, None) self.assertEqual(quantized_pool_data_type, quantized_conv_data_type) + + class TestGraphConvFusion(unittest.TestCase): - rn50_fp32_pb_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb' - pb_path = '/tmp/.neural_compressor/resnet50_fp32_pretrained_model.pb' + rn50_fp32_pb_url = ( + "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb" + ) + pb_path = "/tmp/.neural_compressor/resnet50_fp32_pretrained_model.pb" platform = platform.system().lower() if platform == "windows": - pb_path = 'C:\\tmp\.neural_compressor\\resnet50_fp32_pretrained_model.pb' - inputs = ['input'] - outputs = ['predict'] + pb_path = "C:\\tmp\.neural_compressor\\resnet50_fp32_pretrained_model.pb" + inputs = ["input"] + outputs = ["predict"] op_wise_config = { - "v0/resnet_v13/conv14/conv2d/Conv2D": (False, 'minmax', False, 7.0), - "v0/resnet_v13/conv11/conv2d/Conv2D": (False, 'minmax', False, 7.0), - "v0/resnet_v17/conv27/conv2d/Conv2D": (False, 'minmax', False, 7.0) + "v0/resnet_v13/conv14/conv2d/Conv2D": (False, "minmax", False, 7.0), + "v0/resnet_v13/conv11/conv2d/Conv2D": (False, "minmax", False, 7.0), + "v0/resnet_v17/conv27/conv2d/Conv2D": (False, "minmax", False, 7.0), } @classmethod def setUpClass(self): if not os.path.exists(self.pb_path): if self.platform == "linux": - os.system('mkdir -p /tmp/.neural_compressor && wget {} -O {} '.format(self.rn50_fp32_pb_url, self.pb_path)) + os.system( + "mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.rn50_fp32_pb_url, self.pb_path) + ) elif self.platform == "windows": - os.system('md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor') + os.system("md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor") from urllib import request + request.urlretrieve(self.rn50_fp32_pb_url) self.input_graph = tf.compat.v1.GraphDef() with open(self.pb_path, "rb") as f: @@ -535,30 +561,36 @@ def test_conv_biasadd_relu_fusion(self): self._tmp_graph_def = graph_util.remove_training_nodes(self.input_graph, self.outputs) - self._tmp_graph_def = StripUnusedNodesOptimizer(self._tmp_graph_def, - self.inputs, self.outputs).do_transformation() + self._tmp_graph_def = StripUnusedNodesOptimizer( + self._tmp_graph_def, self.inputs, self.outputs + ).do_transformation() self._tmp_graph_def = FoldBatchNormNodesOptimizer(self._tmp_graph_def).do_transformation() - op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml")).get_eightbit_patterns() + op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml") + ).get_eightbit_patterns() - output_graph, _, _ = QuantizeGraphForIntel(self._tmp_graph_def, self.inputs, self.outputs, - self.op_wise_config, op_wise_sequences, - 'cpu').do_transform() + output_graph, _, _ = QuantizeGraphForIntel( + self._tmp_graph_def, self.inputs, self.outputs, self.op_wise_config, op_wise_sequences, "cpu" + ).do_transform() node_name_type_mapping = {} for i in output_graph.node: node_name_type_mapping[i.name] = i.op - should_disable_sum_node_name = 'v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_quantized_conv' - should_enable_sum_node_name = 'v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_quantized_conv' - should_disable_sum_flag = should_disable_sum_node_name in node_name_type_mapping and node_name_type_mapping[ - should_disable_sum_node_name] == 'QuantizedConv2DWithBias' - should_enable_sum_flag = should_enable_sum_node_name in node_name_type_mapping and node_name_type_mapping[ - should_enable_sum_node_name] == 'QuantizedConv2DWithBiasSumAndRelu' + should_disable_sum_node_name = "v0/resnet_v17/conv27/conv2d/Conv2D_eightbit_quantized_conv" + should_enable_sum_node_name = "v0/resnet_v13/conv11/conv2d/Conv2D_eightbit_quantized_conv" + should_disable_sum_flag = ( + should_disable_sum_node_name in node_name_type_mapping + and node_name_type_mapping[should_disable_sum_node_name] == "QuantizedConv2DWithBias" + ) + should_enable_sum_flag = ( + should_enable_sum_node_name in node_name_type_mapping + and node_name_type_mapping[should_enable_sum_node_name] == "QuantizedConv2DWithBiasSumAndRelu" + ) self.assertEqual(should_enable_sum_flag, True) self.assertEqual(should_disable_sum_flag, True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_math.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_math.py index ba855b18a57..a5ca1ac4cd1 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_math.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_conv_math.py @@ -3,48 +3,51 @@ # import unittest + import numpy as np +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_conv_with_math import FuseConvWithMathOptimizer from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util + class TestConvWithMath(unittest.TestCase): @disable_random() def test_convert_conv_with_math(self): tf.compat.v1.disable_eager_execution() - x = tf.compat.v1.placeholder(tf.float32, [1,224, 224, 3], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv = tf.nn.conv2d(x, tf.constant(np.random.random([3, 3, 3, 16]), dtype=tf.float32), strides=[1, 1, 1, 1], - padding="SAME") + conv = tf.nn.conv2d( + x, tf.constant(np.random.random([3, 3, 3, 16]), dtype=tf.float32), strides=[1, 1, 1, 1], padding="SAME" + ) sub = tf.math.subtract(conv, tf.constant(np.random.random(16), dtype=tf.float32)) realdiv = tf.realdiv(sub, tf.constant(np.random.random(16), dtype=tf.float32)) mul = tf.math.multiply(realdiv, tf.constant(np.random.random(16), dtype=tf.float32)) - conv_add = tf.nn.bias_add(mul, tf.constant(np.random.random(16), dtype=tf.float32), name='bias_add') + conv_add = tf.nn.bias_add(mul, tf.constant(np.random.random(16), dtype=tf.float32), name="bias_add") relu = tf.nn.relu(conv_add) identity = tf.identity(relu) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[identity.name.split(':')[0]]) - fixed_input = np.random.random([1,224, 224, 3]) - + sess=sess, input_graph_def=sess.graph_def, output_node_names=[identity.name.split(":")[0]] + ) + fixed_input = np.random.random([1, 224, 224, 3]) + default_g = tf.Graph() with default_g.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") with tf.compat.v1.Session(graph=default_g) as sess: - output = sess.run(['Identity:0'], {'input:0': fixed_input}) + output = sess.run(["Identity:0"], {"input:0": fixed_input}) post_graph = FuseConvWithMathOptimizer(graph_def).do_transformation() g = tf.Graph() with g.as_default(): - tf.import_graph_def(post_graph, name='optimized') + tf.import_graph_def(post_graph, name="optimized") with tf.compat.v1.Session(graph=g) as sess: - optimized_output = sess.run(['optimized/Identity:0'], {'optimized/input:0': fixed_input}) + optimized_output = sess.run(["optimized/Identity:0"], {"optimized/input:0": fixed_input}) converted = True for node in post_graph.node: @@ -52,7 +55,8 @@ def test_convert_conv_with_math(self): converted = False self.assertEqual(converted, True) - self.assertEqual(np.allclose (output[0], optimized_output[0]), True) + self.assertEqual(np.allclose(output[0], optimized_output[0]), True) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_layout.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_layout.py index a29978dd83e..d071ec02b0a 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_layout.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_layout.py @@ -3,38 +3,40 @@ # import unittest -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.convert_layout import ConvertLayoutOptimizer import tensorflow as tf from tensorflow.compat.v1 import graph_util + +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.convert_layout import ConvertLayoutOptimizer from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 + class TestConvertLayout(unittest.TestCase): def test_convert_layout(self): - if version1_lt_version2(tf.version.VERSION, '2.4.0'): + if version1_lt_version2(tf.version.VERSION, "2.4.0"): return tf.compat.v1.disable_eager_execution() - with tf.device('/CPU:0'): + with tf.device("/CPU:0"): x = tf.compat.v1.placeholder(tf.float32, [1, 3, 224, 224], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], - padding="SAME", data_format='NCHW') + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW") relu = tf.nn.relu(conv) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) - outputs = [relu.name.split(':')[0]] + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) + outputs = [relu.name.split(":")[0]] post_graph = ConvertLayoutOptimizer(graph_def, outputs).do_transformation() converted = False for node in post_graph.node: - if 'data_format' in node.attr and node.attr['data_format'].s == b'NHWC': + if "data_format" in node.attr and node.attr["data_format"].s == b"NHWC": converted = True self.assertEqual(converted, True) - -if __name__ == '__main__': + + +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_leakyrelu.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_leakyrelu.py index c4fcdaefa24..bf48bedc853 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_leakyrelu.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_leakyrelu.py @@ -3,22 +3,24 @@ # import unittest + import numpy as np +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.convert_leakyrelu import ConvertLeakyReluOptimizer from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util class TestConvertLeaklyRelu(unittest.TestCase): @disable_random() def test_convert_leakyrelu(self): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 3, 224, 224], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], - padding="SAME", data_format='NCHW') + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW") relu = tf.nn.relu(conv) mul = tf.math.multiply(relu, tf.constant([0.1])) maximum = tf.math.maximum(relu, mul) @@ -26,13 +28,12 @@ def test_convert_leakyrelu(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[identity.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[identity.name.split(":")[0]] + ) post_graph = ConvertLeakyReluOptimizer(graph_def).do_transformation() converted = False for node in post_graph.node: - if node.op == 'LeakyRelu': + if node.op == "LeakyRelu": converted = True self.assertEqual(converted, True) @@ -40,10 +41,10 @@ def test_convert_leakyrelu(self): def test_convert_leakyrelu_with_alpha_large_than_one(self): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 3, 224, 224], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], - padding="SAME", data_format='NCHW') + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW") relu = tf.nn.relu(conv) mul = tf.math.multiply(relu, tf.constant([3.1])) maximum = tf.math.maximum(relu, mul) @@ -51,13 +52,12 @@ def test_convert_leakyrelu_with_alpha_large_than_one(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[identity.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[identity.name.split(":")[0]] + ) post_graph = ConvertLeakyReluOptimizer(graph_def).do_transformation() converted = False for node in post_graph.node: - if node.op == 'LeakyRelu': + if node.op == "LeakyRelu": converted = True self.assertEqual(converted, False) @@ -65,10 +65,10 @@ def test_convert_leakyrelu_with_alpha_large_than_one(self): def test_convert_leakyrelu_with_invalid_maximum(self): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 3, 224, 224], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], - padding="SAME", data_format='NCHW') + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW") relu = tf.nn.relu(conv) mul = tf.math.multiply(relu, tf.constant([2.1])) @@ -77,13 +77,12 @@ def test_convert_leakyrelu_with_invalid_maximum(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[identity.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[identity.name.split(":")[0]] + ) post_graph = ConvertLeakyReluOptimizer(graph_def).do_transformation() converted = False for node in post_graph.node: - if node.op == 'LeakyRelu': + if node.op == "LeakyRelu": converted = True self.assertEqual(converted, False) @@ -91,25 +90,24 @@ def test_convert_leakyrelu_with_invalid_maximum(self): def test_convert_leakyrelu_with_invalid_maximum_ndim(self): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 3, 12, 12], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], - padding="SAME", data_format='NCHW') - mul = tf.math.multiply(conv, tf.constant(np.random.random((1,32,12,12)).astype(np.float32))) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW") + mul = tf.math.multiply(conv, tf.constant(np.random.random((1, 32, 12, 12)).astype(np.float32))) maximum = tf.math.maximum(conv, mul) identity = tf.identity(maximum) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[identity.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[identity.name.split(":")[0]] + ) post_graph = ConvertLeakyReluOptimizer(graph_def).do_transformation() converted = False for node in post_graph.node: - if node.op == 'LeakyRelu': + if node.op == "LeakyRelu": converted = True self.assertEqual(converted, False) @@ -117,10 +115,10 @@ def test_convert_leakyrelu_with_invalid_maximum_ndim(self): def test_convert_leakyrelu_with_invalid_mul(self): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 3, 224, 224], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], - padding="SAME", data_format='NCHW') + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME", data_format="NCHW") relu = tf.nn.relu(conv) const_identity = tf.identity(tf.constant([0.1])) mul = tf.math.multiply(relu, const_identity) @@ -130,15 +128,15 @@ def test_convert_leakyrelu_with_invalid_mul(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[identity.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[identity.name.split(":")[0]] + ) post_graph = ConvertLeakyReluOptimizer(graph_def).do_transformation() converted = False for node in post_graph.node: - if node.op == 'LeakyRelu': + if node.op == "LeakyRelu": converted = True self.assertEqual(converted, False) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_nan.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_nan.py index 91831a4c3fb..73481c75292 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_nan.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_convert_nan.py @@ -3,48 +3,50 @@ # import unittest -import numpy as np -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.convert_nan_to_random import ConvertNanToRandom -from neural_compressor.adaptor.tf_utils.util import disable_random +import numpy as np import tensorflow as tf from tensorflow.compat.v1 import graph_util from tensorflow.python.framework import tensor_util +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.convert_nan_to_random import ConvertNanToRandom +from neural_compressor.adaptor.tf_utils.util import disable_random + + class TestNanConvert(unittest.TestCase): @disable_random() def test_convert_nan_to_float(self): tf.compat.v1.disable_eager_execution() - x = tf.compat.v1.placeholder(tf.float32, [1,224, 224, 3], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv = tf.nn.conv2d(x, tf.constant(np.random.random([3, 3, 3, 16]), dtype=tf.float32), strides=[1, 1, 1, 1], - padding="SAME") + conv = tf.nn.conv2d( + x, tf.constant(np.random.random([3, 3, 3, 16]), dtype=tf.float32), strides=[1, 1, 1, 1], padding="SAME" + ) sub = tf.math.subtract(conv, tf.constant(np.random.random(16), dtype=tf.float32)) realdiv = tf.realdiv(sub, tf.constant(np.random.random(16), dtype=tf.float32)) mul = tf.math.multiply(realdiv, tf.constant(np.random.random(16), dtype=tf.float32)) - conv_add = tf.nn.bias_add(mul, tf.constant(np.full((16,), np.nan), dtype=tf.float32), name='bias_add') + conv_add = tf.nn.bias_add(mul, tf.constant(np.full((16,), np.nan), dtype=tf.float32), name="bias_add") relu = tf.nn.relu(conv_add) identity = tf.identity(relu) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[identity.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[identity.name.split(":")[0]] + ) post_graph = ConvertNanToRandom(graph_def).do_transformation() converted = True for node in post_graph.node: if node.op.find("Const") != -1: - const_content = tensor_util.MakeNdarray(node.attr['value'].tensor) + const_content = tensor_util.MakeNdarray(node.attr["value"].tensor) if np.any(np.isnan(const_content)): converted = False self.assertEqual(converted, True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_cse_optimization.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_cse_optimization.py index 6d0a0355203..c7bb0d7b945 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_cse_optimization.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_cse_optimization.py @@ -2,29 +2,27 @@ # -*- coding: utf-8 -*- # import unittest -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.graph_cse_optimizer import GraphCseOptimizer -from neural_compressor.adaptor.tf_utils.util import disable_random import tensorflow as tf from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import dtypes +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.graph_cse_optimizer import GraphCseOptimizer +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper +from neural_compressor.adaptor.tf_utils.util import disable_random + + class TestGraphCommonSequenceElimated(unittest.TestCase): @disable_random() def test_graph_cse(self): - input_constant_name = "input_constant" relu_name = "relu" float_graph_def = graph_pb2.GraphDef() input_constant = QuantizeGraphHelper.create_constant_node( - input_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 1]) + input_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 1] + ) float_graph_def.node.extend([input_constant]) - relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, - [input_constant_name]) + relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, [input_constant_name]) QuantizeGraphHelper.set_attr_dtype(relu_node, "T", dtypes.float32) float_graph_def.node.extend([relu_node]) @@ -32,61 +30,51 @@ def test_graph_cse(self): mat_mul_name = "mat_mul" identity_name = "identity" b_constant = QuantizeGraphHelper.create_constant_node( - b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[2, 6]) + b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[2, 6] + ) float_graph_def.node.extend([b_constant]) - mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, - [relu_name, b_constant_name]) + mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, [relu_name, b_constant_name]) QuantizeGraphHelper.set_attr_dtype(mat_mul_node, "T", dtypes.float32) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_a", False) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_b", False) float_graph_def.node.extend([mat_mul_node]) - identity_node = QuantizeGraphHelper.create_node("Identity", identity_name, - [mat_mul_name]) + identity_node = QuantizeGraphHelper.create_node("Identity", identity_name, [mat_mul_name]) float_graph_def.node.extend([identity_node]) bias_add_name = "bias_add" offset_constant_name = "offset_constant" offset_constant = QuantizeGraphHelper.create_constant_node( - offset_constant_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) + offset_constant_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) float_graph_def.node.extend([offset_constant]) - bias_add_node = QuantizeGraphHelper.create_node( - "BiasAdd", bias_add_name, [identity_name, offset_constant_name]) + bias_add_node = QuantizeGraphHelper.create_node("BiasAdd", bias_add_name, [identity_name, offset_constant_name]) QuantizeGraphHelper.set_attr_dtype(bias_add_node, "T", dtypes.float32) float_graph_def.node.extend([bias_add_node]) post_relu_name = "post_relu" - post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, - [bias_add_name]) + post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, [bias_add_name]) float_graph_def.node.extend([post_relu_node]) - last_identity_node_name = 'last_identity' - last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, - [post_relu_name]) + last_identity_node_name = "last_identity" + last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, [post_relu_name]) float_graph_def.node.extend([last_identity_node]) left_relu_name = "final_left_relu" - left_relu_node = QuantizeGraphHelper.create_node("Relu", left_relu_name, - [last_identity_node_name]) + left_relu_node = QuantizeGraphHelper.create_node("Relu", left_relu_name, [last_identity_node_name]) float_graph_def.node.extend([left_relu_node]) right_relu_name = "final_right_relu" - right_relu_node = QuantizeGraphHelper.create_node("Relu", right_relu_name, - [last_identity_node_name]) + right_relu_node = QuantizeGraphHelper.create_node("Relu", right_relu_name, [last_identity_node_name]) float_graph_def.node.extend([right_relu_node]) cse_left_node_name = "cse_left_node" - cse_left_node = QuantizeGraphHelper.create_node("Identity", cse_left_node_name, - [left_relu_name]) + cse_left_node = QuantizeGraphHelper.create_node("Identity", cse_left_node_name, [left_relu_name]) float_graph_def.node.extend([cse_left_node]) cse_right_node_name = "cse_right_node" - cse_right_node = QuantizeGraphHelper.create_node("Identity", cse_right_node_name, - [right_relu_name]) + cse_right_node = QuantizeGraphHelper.create_node("Identity", cse_right_node_name, [right_relu_name]) float_graph_def.node.extend([cse_right_node]) # post_graph = QuantizeGraphHelper().graph_cse_optimization ( @@ -102,5 +90,5 @@ def test_graph_cse(self): self.assertEqual(right_relu_optimized_flag, True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_debug_mode.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_debug_mode.py index 62683441f23..e522f47c68e 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_debug_mode.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_debug_mode.py @@ -1,18 +1,19 @@ - -import unittest +import logging import os -import yaml -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util -import logging +from neural_compressor.adaptor.tf_utils.util import disable_random + logger = logging.getLogger("neural_compressor") logger.setLevel(logging.DEBUG) + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -39,12 +40,13 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class TestTensorflowGraphAdaptorDebugMode(unittest.TestCase): @classmethod def setUpClass(self): @@ -52,31 +54,32 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_graph_adaptor_debug_mode(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="SAME") - normed = tf.nn.bias_add(conv, tf.constant([3.0, 1.2,1,2,3,4,5,6,7,8,9,0,12,2,3,4])) + normed = tf.nn.bias_add(conv, tf.constant([3.0, 1.2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 2, 3, 4])) relu = tf.nn.relu(normed + tf.constant([3.0])) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -85,11 +88,12 @@ def test_graph_adaptor_debug_mode(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op.find('QuantizedConv2D') != -1: + if i.op.find("QuantizedConv2D") != -1: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dequantize_cast_optimizer.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dequantize_cast_optimizer.py index 281bf3a638f..69cc08b2ccc 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dequantize_cast_optimizer.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dequantize_cast_optimizer.py @@ -1,73 +1,68 @@ -import unittest import os -import yaml +import unittest + import numpy as np import tensorflow as tf +import yaml from tensorflow.python.framework import dtypes -from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.dequantize_cast_optimizer import DequantizeCastOptimizer +from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_graphdef(set_min_first=False, dq_multi_outputs=False): tf.compat.v1.disable_eager_execution() - input = tf.compat.v1.placeholder(tf.float32, shape=(32, 224, 224, 3), name='input') + input = tf.compat.v1.placeholder(tf.float32, shape=(32, 224, 224, 3), name="input") graph_def = tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True) - min_input = Helper.create_constant_node( - 'test_min', - value=0., - dtype=dtypes.float32) + min_input = Helper.create_constant_node("test_min", value=0.0, dtype=dtypes.float32) - max_input = Helper.create_constant_node( - 'test_max', - value=[1], - dtype=dtypes.float32) + max_input = Helper.create_constant_node("test_max", value=[1], dtype=dtypes.float32) - quant_v2_node = Helper.create_node("QuantizeV2", 'test_quantize', - [input.name, min_input.name, max_input.name]) + quant_v2_node = Helper.create_node("QuantizeV2", "test_quantize", [input.name, min_input.name, max_input.name]) dequantize_node = Helper.create_node( - "Dequantize", 'test_dequantize', - [quant_v2_node.name, quant_v2_node.name + ':1', quant_v2_node.name + ':2']) + "Dequantize", "test_dequantize", [quant_v2_node.name, quant_v2_node.name + ":1", quant_v2_node.name + ":2"] + ) if set_min_first: - Helper.set_attr_string(dequantize_node, "mode", b'MIN_FIRST') + Helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST") - cast_node = Helper.create_node( - "Cast", 'test_cast', [dequantize_node.name]) + cast_node = Helper.create_node("Cast", "test_cast", [dequantize_node.name]) Helper.set_attr_dtype(cast_node, "DstT", dtypes.bfloat16) Helper.set_attr_dtype(cast_node, "SrcT", dtypes.float32) Helper.set_attr_bool(cast_node, "Truncate", False) - dentity_node = Helper.create_node( - "Identity", 'output', [cast_node.name]) + dentity_node = Helper.create_node("Identity", "output", [cast_node.name]) Helper.set_attr_dtype(dentity_node, "T", dtypes.bfloat16) - graph_def.node.extend([ - min_input, - max_input, - quant_v2_node, - dequantize_node, - cast_node, - dentity_node, - ]) + graph_def.node.extend( + [ + min_input, + max_input, + quant_v2_node, + dequantize_node, + cast_node, + dentity_node, + ] + ) if dq_multi_outputs: - dentity_node_2 = Helper.create_node( - "Identity", 'id_1', [dequantize_node.name]) + dentity_node_2 = Helper.create_node("Identity", "id_1", [dequantize_node.name]) Helper.set_attr_dtype(dentity_node_2, "T", dtypes.float32) graph_def.node.extend([dentity_node_2]) return graph_def -class TestDequantizeCastOptimizer(unittest.TestCase): +class TestDequantizeCastOptimizer(unittest.TestCase): @disable_random() def test_dequantize_cast_normal(self): graph_def = build_fake_graphdef() converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() for i in converted_graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": hasCast = True break self.assertEqual(hasCast, True) @@ -78,7 +73,7 @@ def test_dequantize_cast_min_first(self): converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() hasCast = False for i in converted_graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": hasCast = True break self.assertEqual(hasCast, True) @@ -89,7 +84,7 @@ def test_dequantize_cast_multiple_outputs(self): converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() hasCast = False for i in converted_graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": hasCast = True break self.assertEqual(hasCast, True) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dump_tensor.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dump_tensor.py index 96753ee13b8..6dd17807807 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dump_tensor.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dump_tensor.py @@ -3,13 +3,15 @@ # import os import unittest -import yaml + import numpy as np +import yaml + np.random.seed(0) def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -34,15 +36,15 @@ def build_fake_yaml(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml_kl(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -71,74 +73,72 @@ def build_fake_yaml_kl(): relative: 0.99 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_kl.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_kl.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_model(): import tensorflow as tf + graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: tf.compat.v1.set_random_seed(0) x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) - beta = tf.compat.v1.get_variable(name='beta', - shape=[1], - initializer=tf.compat.v1.random_normal_initializer()) - gamma = tf.compat.v1.get_variable(name='gamma', - shape=[1], - initializer=tf.compat.v1.random_normal_initializer()) + beta = tf.compat.v1.get_variable(name="beta", shape=[1], initializer=tf.compat.v1.random_normal_initializer()) + gamma = tf.compat.v1.get_variable(name="gamma", shape=[1], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) - conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.compat.v1.layers.batch_normalization(conv1) x = tf.nn.relu(conv_bias) pool = tf.nn.max_pool(x, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - final_node = tf.nn.relu(pool, name='op_to_store') + final_node = tf.nn.relu(pool, name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[final_node.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[final_node.name.split(":")[0]] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph class TestGraphDumpToDisk(unittest.TestCase): - @classmethod def setUpClass(self): self.constant_graph = build_fake_model() build_fake_yaml() build_fake_yaml_kl() - self.kl_log_path = os.path.join(os.getcwd(), 'saved/kl.log') - self.calibration_log_path = os.path.join(os.getcwd(), 'saved/requant_min_max.log') + self.kl_log_path = os.path.join(os.getcwd(), "saved/kl.log") + self.calibration_log_path = os.path.join(os.getcwd(), "saved/requant_min_max.log") @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml_kl.yaml') + os.remove("fake_yaml.yaml") + os.remove("fake_yaml_kl.yaml") os.remove(self.calibration_log_path) def test_dump_tensor_to_disk(self): - from neural_compressor.experimental import Quantization, common import tensorflow.compat.v1 as tf + + from neural_compressor.experimental import Quantization, common + tf.disable_v2_behavior() - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -149,7 +149,7 @@ def test_dump_tensor_to_disk(self): found_kl = False for i in data: - if i.find('Relu_1__print__;__KL:') != -1: + if i.find("Relu_1__print__;__KL:") != -1: found_kl = True self.assertEqual(os.path.exists(self.calibration_log_path), True) @@ -157,5 +157,5 @@ def test_dump_tensor_to_disk(self): self.assertEqual(found_kl, True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_expanddims_optimizer.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_expanddims_optimizer.py index c1d7cf8a2af..528ba3605fe 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_expanddims_optimizer.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_expanddims_optimizer.py @@ -1,14 +1,16 @@ -import unittest import os -import yaml -import numpy as np -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest +import numpy as np import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random + + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -35,9 +37,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -49,36 +51,34 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_expanddims_optimizer(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - - conv_weights = tf.constant(np.random.random((3,16,16)).astype(np.float32), name='y') - conv_weights_expand = tf.expand_dims(conv_weights, axis=0, name='expanddims') + + conv_weights = tf.constant(np.random.random((3, 16, 16)).astype(np.float32), name="y") + conv_weights_expand = tf.expand_dims(conv_weights, axis=0, name="expanddims") conv = tf.nn.conv2d(x_pad, conv_weights_expand, strides=[1, 2, 2, 1], padding="VALID") - out_name = conv.name.split(':')[0] + out_name = conv.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.expanddims_optimizer import ExpandDimsOptimizer convert_graph = ExpandDimsOptimizer(output_graph_def).do_transformation() handle_expanddims = True for node in convert_graph.node: - if node.op == 'Conv2D' and node.input[1] == 'ExpandDims': + if node.op == "Conv2D" and node.input[1] == "ExpandDims": handle_expanddims = False break self.assertEqual(handle_expanddims, True) - if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fetch_weight_from_reshape.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fetch_weight_from_reshape.py index cdae055a82e..8182ffce0be 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fetch_weight_from_reshape.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fetch_weight_from_reshape.py @@ -1,19 +1,18 @@ -import unittest import os -import yaml -import numpy as np -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest +import numpy as np import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util + +from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -40,103 +39,115 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def create_graph(): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) const_node_1 = node_def_pb2.NodeDef() const_node_1.name = "const_1" const_node_1.op = "Const" const_value_1 = np.float32(np.random.randn(128)) - const_node_1.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - const_node_1.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - const_value_1, const_value_1.dtype.type, const_value_1.shape))) + const_node_1.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + const_node_1.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(const_value_1, const_value_1.dtype.type, const_value_1.shape) + ) + ) const_node_2 = node_def_pb2.NodeDef() const_node_2.name = "const_2" const_node_2.op = "Const" const_value_2 = np.float32(np.random.randn(128)) - const_node_2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - const_node_2.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - const_value_2, const_value_2.dtype.type, const_value_2.shape))) + const_node_2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + const_node_2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(const_value_2, const_value_2.dtype.type, const_value_2.shape) + ) + ) const_node_3 = node_def_pb2.NodeDef() const_node_3.name = "const_3" const_node_3.op = "Const" const_value_3 = np.float32(np.random.randn(128)) - const_node_3.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - const_node_3.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - const_value_3, const_value_3.dtype.type, const_value_3.shape))) + const_node_3.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + const_node_3.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(const_value_3, const_value_3.dtype.type, const_value_3.shape) + ) + ) const_node_4 = node_def_pb2.NodeDef() const_node_4.name = "const_4" const_node_4.op = "Const" const_value_4 = np.float32(np.random.randn(128)) - const_node_4.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - const_node_4.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - const_value_4, const_value_4.dtype.type, const_value_4.shape))) + const_node_4.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + const_node_4.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(const_value_4, const_value_4.dtype.type, const_value_4.shape) + ) + ) pack_node = node_def_pb2.NodeDef() pack_node.name = "pack" pack_node.op = "Pack" - pack_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - pack_node.attr['axis'].CopyFrom(attr_value_pb2.AttrValue(i=1)) - pack_node.attr['N'].CopyFrom(attr_value_pb2.AttrValue(i=4)) - pack_node.input.extend([const_node_1.name, const_node_2.name, const_node_3.name,\ - const_node_4.name]) + pack_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + pack_node.attr["axis"].CopyFrom(attr_value_pb2.AttrValue(i=1)) + pack_node.attr["N"].CopyFrom(attr_value_pb2.AttrValue(i=4)) + pack_node.input.extend([const_node_1.name, const_node_2.name, const_node_3.name, const_node_4.name]) shape_node = node_def_pb2.NodeDef() shape_node.name = "const_5" shape_node.op = "Const" - value_4 = np.int32([1,1,128,4]) - shape_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum)) - shape_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - value_4, value_4.dtype.type, value_4.shape))) + value_4 = np.int32([1, 1, 128, 4]) + shape_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum)) + shape_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(value_4, value_4.dtype.type, value_4.shape)) + ) reshape_node = node_def_pb2.NodeDef() reshape_node.name = "reshape" reshape_node.op = "Reshape" - reshape_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - reshape_node.attr['Tshape'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.int32.as_datatype_enum)) + reshape_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + reshape_node.attr["Tshape"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.int32.as_datatype_enum)) reshape_node.input.extend([pack_node.name, shape_node.name]) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([input_node.name, reshape_node.name]) - conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - const_node_1, - const_node_2, - const_node_3, - const_node_4, - pack_node, - shape_node, - reshape_node, - conv2_node]) + test_graph.node.extend( + [ + input_node, + const_node_1, + const_node_2, + const_node_3, + const_node_4, + pack_node, + shape_node, + reshape_node, + conv2_node, + ] + ) return test_graph @@ -147,25 +158,25 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_FetchWeightFromReshape_Optimizer(self): self.test_graph = create_graph() - from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fetch_weight_from_reshape \ - import FetchWeightFromReshapeOptimizer + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fetch_weight_from_reshape import ( + FetchWeightFromReshapeOptimizer, + ) convert_graph = FetchWeightFromReshapeOptimizer(self.test_graph).do_transformation() handled = False for node in convert_graph.node: - if node.op == 'Conv2D' and node.input[1] == 'reshape/weight_0': + if node.op == "Conv2D" and node.input[1] == "reshape/weight_0": handled = True break self.assertEqual(handled, True) - if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fold_bn.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fold_bn.py index 1fbafb49a1d..5b58adc3224 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fold_bn.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fold_bn.py @@ -2,44 +2,38 @@ # -*- coding: utf-8 -*- # import unittest -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import \ - FoldBatchNormNodesOptimizer -from neural_compressor.adaptor.tf_utils.util import disable_random import tensorflow as tf from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import dtypes +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper +from neural_compressor.adaptor.tf_utils.util import disable_random + + class TestGraphFoldBNWithInvalidParameter(unittest.TestCase): @disable_random() def test_graph_fold_bn(self): - input_constant_name = "input_constant" relu_name = "relu" float_graph_def = graph_pb2.GraphDef() input_constant = QuantizeGraphHelper.create_constant_node( - input_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 1]) + input_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 1] + ) float_graph_def.node.extend([input_constant]) - relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, - [input_constant_name]) + relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, [input_constant_name]) QuantizeGraphHelper.set_attr_dtype(relu_node, "T", dtypes.float32) float_graph_def.node.extend([relu_node]) b_constant_name = "b_constant" conv2d_name = "conv2d_1" b_constant = QuantizeGraphHelper.create_constant_node( - b_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 3, 4]) + b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 3, 4] + ) float_graph_def.node.extend([b_constant]) - conv2d_node = QuantizeGraphHelper.create_node( - "Conv2D", conv2d_name, [relu_name, b_constant_name]) + conv2d_node = QuantizeGraphHelper.create_node("Conv2D", conv2d_name, [relu_name, b_constant_name]) QuantizeGraphHelper.set_attr_dtype(conv2d_node, "T", dtypes.float32) float_graph_def.node.extend([conv2d_node]) @@ -48,68 +42,60 @@ def test_graph_fold_bn(self): offset_constant_name = "offset_constant" offset_constant = QuantizeGraphHelper.create_constant_node( - offset_constant_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) + offset_constant_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) float_graph_def.node.extend([offset_constant]) - bias_add_node = QuantizeGraphHelper.create_node( - "BiasAdd", bias_add_name, [conv2d_name, offset_constant_name]) + bias_add_node = QuantizeGraphHelper.create_node("BiasAdd", bias_add_name, [conv2d_name, offset_constant_name]) QuantizeGraphHelper.set_attr_dtype(bias_add_node, "T", dtypes.float32) float_graph_def.node.extend([bias_add_node]) - bn_scale_name = 'bn_scale' + bn_scale_name = "bn_scale" bn_scale_node = QuantizeGraphHelper.create_constant_node( - bn_scale_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[12, 1]) - bn_offset_name = 'bn_offset' + bn_scale_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[12, 1] + ) + bn_offset_name = "bn_offset" bn_offset_node = QuantizeGraphHelper.create_constant_node( - bn_offset_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[12, 1]) - bn_mean_name = 'bn_mean' + bn_offset_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[12, 1] + ) + bn_mean_name = "bn_mean" bn_mean_node = QuantizeGraphHelper.create_constant_node( - bn_mean_name, value=[ + bn_mean_name, + value=[ 1, 2, - ], dtype=dtypes.float32, shape=[ + ], + dtype=dtypes.float32, + shape=[ 2, - ]) - bn_var_name = 'bn_var' - bn_var_node = QuantizeGraphHelper.create_constant_node( - bn_var_name, value=[], dtype=dtypes.float32, shape=[0]) - fused_bn_node_name = 'bn' + ], + ) + bn_var_name = "bn_var" + bn_var_node = QuantizeGraphHelper.create_constant_node(bn_var_name, value=[], dtype=dtypes.float32, shape=[0]) + fused_bn_node_name = "bn" fused_bn_node = QuantizeGraphHelper.create_node( - "FusedBatchNormV3", fused_bn_node_name, [ - bias_add_name, bn_scale_name, bn_offset_name, bn_mean_name, - bn_var_name - ]) + "FusedBatchNormV3", + fused_bn_node_name, + [bias_add_name, bn_scale_name, bn_offset_name, bn_mean_name, bn_var_name], + ) QuantizeGraphHelper.set_attr_dtype(fused_bn_node, "T", dtypes.float32) QuantizeGraphHelper.set_attr_dtype(fused_bn_node, "U", dtypes.float32) - float_graph_def.node.extend([ - fused_bn_node, bn_scale_node, bn_offset_node, bn_mean_node, - bn_var_node - ]) + float_graph_def.node.extend([fused_bn_node, bn_scale_node, bn_offset_node, bn_mean_node, bn_var_node]) post_relu_name = "post_relu" - post_relu_node = QuantizeGraphHelper.create_node( - "Relu", post_relu_name, [fused_bn_node_name]) + post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, [fused_bn_node_name]) float_graph_def.node.extend([post_relu_node]) post_graph = FoldBatchNormNodesOptimizer(float_graph_def).do_transformation() bn_not_fused = False for i in post_graph.node: - if i.op == 'FusedBatchNormV3': + if i.op == "FusedBatchNormV3": bn_not_fused = True break self.assertEqual(bn_not_fused, True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fuse_decomposed_bn.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fuse_decomposed_bn.py index 050ca569a44..7cb080e6a9c 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fuse_decomposed_bn.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fuse_decomposed_bn.py @@ -1,211 +1,179 @@ -import numpy as np import unittest -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_decomposed_bn import \ - FuseDecomposedBNOptimizer +import numpy as np import tensorflow.compat.v1 as tf -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import importer -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_math_ops +from tensorflow.python.framework import constant_op, dtypes, importer, ops, test_util from tensorflow.python.ops import math_ops # pylint: disable=unused-import -from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import array_ops, gen_math_ops, nn_ops from tensorflow.python.platform import test from tensorflow.python.tools import optimize_for_inference_lib +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_decomposed_bn import FuseDecomposedBNOptimizer + + class OptimizeForInferenceTest(unittest.TestCase): + def count_batchnorm_relavant_ops(self, graph_def): + """Return the count of FusedBatchNorm op and the count of primitive + ops which may make up batchnorm computation in a given graph.""" + batchnorm_count = 0 + decompose_count = 0 + for node in graph_def.node: + if node.op == "FusedBatchNorm": + batchnorm_count += 1 + if node.op in ["Add", "Rsqrt", "Mul", "Sub"]: + decompose_count += 1 + return batchnorm_count, decompose_count + + @test_util.run_deprecated_v1 + def create_base_for_fuse_batchnorm(self, pattern_match_mode="MATCH_ALL", use_reshape=True): + """Create testing graph and compute the result from original graph. + Args: + pattern_match_mode: A label string to indicate which batchnorm composition + pattern to create in the resulting graph. + "MATCH_ALL" - Create a graph matching the decomposed batchnorm pattern + with full set of primitive ops. + "MATCH_NO_GAMMA" - Create a graph matching the decomposed batchnorm + pattern when gamma factor is 1 and multiplication + with gamma is omitted. + "NO_MATCH" - Create a graph with same set of primitive ops which makes + up the decomposed batchnorm, but not matching the pattern. + Returns: + A GraphDef as original graph to run the decomposed batchnorm test cases. + Computation result from executing the original graph defined by GraphDef. + """ + ops.reset_default_graph() + with tf.Session() as sess: + inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6] + input_op = constant_op.constant(np.array(inputs), shape=[1, 1, 6, 2], dtype=dtypes.float32) + weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4] + weights_op = constant_op.constant(np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32) + conv_op = nn_ops.conv2d(input_op, weights_op, [1, 1, 1, 1], padding="SAME", name="conv_op") + + const_op_1 = constant_op.constant(np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32) + if use_reshape: + const_op_1 = array_ops.reshape(const_op_1, shape=[1, 1, 1, 2]) + const_op_2 = constant_op.constant(0.00001, dtype=dtypes.float32) + const_op_3 = constant_op.constant(np.array([10, 20]), shape=[2], dtype=dtypes.float32) + if use_reshape: + const_op_3 = array_ops.reshape(const_op_3, shape=[1, 1, 1, 2]) + const_op_4 = constant_op.constant(np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32) + if use_reshape: + const_op_4 = array_ops.reshape(const_op_4, shape=[1, 1, 1, 2]) + + add_op_1 = gen_math_ops.add(const_op_1, const_op_2) + rsqrt_op = math_ops.rsqrt(add_op_1) + + variable_op = None + if pattern_match_mode == "MATCH_NO_GAMMA": + variable_op = rsqrt_op + else: + const_op_5 = constant_op.constant(np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32) + if use_reshape: + const_op_5 = array_ops.reshape(const_op_5, shape=[1, 1, 1, 2]) + variable_op = math_ops.multiply(rsqrt_op, const_op_5) + + mul_op_1 = math_ops.multiply(conv_op, variable_op) + + mul_op_2 = None + if pattern_match_mode == "NO_MATCH": + const_op_6 = constant_op.constant(np.array([0.2, 0.5]), shape=[2], dtype=dtypes.float32) + mul_op_2 = math_ops.multiply(const_op_3, const_op_6) + else: + mul_op_2 = math_ops.multiply(const_op_3, variable_op) + + sub_op = math_ops.subtract(const_op_4, mul_op_2) + gen_math_ops.add(mul_op_1, sub_op, name="output") + + test_util.set_producer_version(ops.get_default_graph(), 8) + + original_graph = sess.graph_def + original_result = sess.run(["output:0"]) + + return original_graph, original_result + + def assertAllClose(self, first, second, rtol=1e-7, atol=0): + first_array = np.array(first) + second_array = np.array(second) + np.testing.assert_allclose(first_array, second_array, rtol, atol) + + @test_util.run_deprecated_v1 + def testFuseDecomposedBatchNorm_MatchAll(self): + for test_rehape in [False, True]: + original_graph_def, original_result = self.create_base_for_fuse_batchnorm("MATCH_ALL", test_rehape) + + # Test correctness of fusing individual ops to FusedBatchNorm + optimized_graph_def = FuseDecomposedBNOptimizer(original_graph_def).do_transformation() + + batchnorm_count, decompose_count = self.count_batchnorm_relavant_ops(optimized_graph_def) + self.assertEqual(batchnorm_count, 1) + self.assertEqual(decompose_count, 0) + + with tf.Session() as sess: + _ = importer.import_graph_def(optimized_graph_def, input_map={}, name="optimized") + optimized_result = sess.run(["optimized/output:0"]) + + self.assertAllClose(original_result, optimized_result) + + # Test correctness of fusing individual ops to FusedBatchNorm followed by + # folding FusedBatchNorm + optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(optimized_graph_def) + for node in optimized_graph_def.node: + self.assertNotEqual("FusedBatchNorm", node.op) + + with tf.Session() as sess: + _ = importer.import_graph_def(optimized_graph_def, input_map={}, name="optimized2") + optimized_result = sess.run(["optimized2/output:0"]) + + self.assertAllClose(original_result, optimized_result, rtol=1e-04, atol=1e-06) + + @test_util.run_deprecated_v1 + def testFuseDecomposedBatchNorm_MatchNoGamma(self): + for test_rehape in [False, True]: + original_graph_def, original_result = self.create_base_for_fuse_batchnorm("MATCH_NO_GAMMA", test_rehape) + + # Test correctness of fusing individual ops to FusedBatchNorm + optimized_graph_def = FuseDecomposedBNOptimizer(original_graph_def).do_transformation() + + batchnorm_count, decompose_count = self.count_batchnorm_relavant_ops(optimized_graph_def) + self.assertEqual(batchnorm_count, 1) + self.assertEqual(decompose_count, 0) + + with tf.Session() as sess: + _ = importer.import_graph_def(optimized_graph_def, input_map={}, name="optimized") + optimized_result = sess.run(["optimized/output:0"]) + + self.assertAllClose(original_result, optimized_result) + + # Test correctness of fusing individual ops to FusedBatchNorm followed by + # folding FusedBatchNorm + optimized_graph_def = optimize_for_inference_lib.fold_batch_norms(optimized_graph_def) + for node in optimized_graph_def.node: + self.assertNotEqual("FusedBatchNorm", node.op) + + with tf.Session() as sess: + _ = importer.import_graph_def(optimized_graph_def, input_map={}, name="optimized2") + optimized_result = sess.run(["optimized2/output:0"]) + + self.assertAllClose(original_result, optimized_result, rtol=1e-04, atol=1e-06) + + @test_util.run_deprecated_v1 + def testFuseDecomposedBatchNorm_NonMatchCase(self): + for test_rehape in [False, True]: + original_graph_def, original_result = self.create_base_for_fuse_batchnorm("NO_MATCH", test_rehape) + + # Test for not to fuse ops if graph has same types of ops but pattern mismatch + optimized_graph_def = FuseDecomposedBNOptimizer(original_graph_def).do_transformation() + + batchnorm_count, math_op_count = self.count_batchnorm_relavant_ops(optimized_graph_def) + self.assertEqual(batchnorm_count, 0) + self.assertEqual(math_op_count, 7) + + with tf.Session() as sess: + _ = importer.import_graph_def(optimized_graph_def, input_map={}, name="optimized") + optimized_result = sess.run(["optimized/output:0"]) + + self.assertAllClose(original_result, optimized_result) - def count_batchnorm_relavant_ops(self, graph_def): - """Return the count of FusedBatchNorm op and the count of primitive - ops which may make up batchnorm computation in a given graph. - """ - batchnorm_count = 0 - decompose_count = 0 - for node in graph_def.node: - if node.op == "FusedBatchNorm": - batchnorm_count += 1 - if node.op in ["Add", "Rsqrt", "Mul", "Sub"]: - decompose_count += 1 - return batchnorm_count, decompose_count - - @test_util.run_deprecated_v1 - def create_base_for_fuse_batchnorm(self, pattern_match_mode="MATCH_ALL", - use_reshape=True): - """Create testing graph and compute the result from original graph. - Args: - pattern_match_mode: A label string to indicate which batchnorm composition - pattern to create in the resulting graph. - "MATCH_ALL" - Create a graph matching the decomposed batchnorm pattern - with full set of primitive ops. - "MATCH_NO_GAMMA" - Create a graph matching the decomposed batchnorm - pattern when gamma factor is 1 and multiplication - with gamma is omitted. - "NO_MATCH" - Create a graph with same set of primitive ops which makes - up the decomposed batchnorm, but not matching the pattern. - Returns: - A GraphDef as original graph to run the decomposed batchnorm test cases. - Computation result from executing the original graph defined by GraphDef. - """ - ops.reset_default_graph() - with tf.Session() as sess: - inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6] - input_op = constant_op.constant( - np.array(inputs), shape=[1, 1, 6, 2], dtype=dtypes.float32) - weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4] - weights_op = constant_op.constant( - np.array(weights), shape=[1, 2, 2, 2], dtype=dtypes.float32) - conv_op = nn_ops.conv2d( - input_op, weights_op, [1, 1, 1, 1], padding="SAME", name="conv_op") - - const_op_1 = constant_op.constant( - np.array([0.25, 0.5]), shape=[2], dtype=dtypes.float32) - if use_reshape: - const_op_1 = array_ops.reshape(const_op_1, shape=[1, 1, 1, 2]) - const_op_2 = constant_op.constant(0.00001, dtype=dtypes.float32) - const_op_3 = constant_op.constant( - np.array([10, 20]), shape=[2], dtype=dtypes.float32) - if use_reshape: - const_op_3 = array_ops.reshape(const_op_3, shape=[1, 1, 1, 2]) - const_op_4 = constant_op.constant( - np.array([0.1, 0.6]), shape=[2], dtype=dtypes.float32) - if use_reshape: - const_op_4 = array_ops.reshape(const_op_4, shape=[1, 1, 1, 2]) - - add_op_1 = gen_math_ops.add(const_op_1, const_op_2) - rsqrt_op = math_ops.rsqrt(add_op_1) - - variable_op = None - if pattern_match_mode == "MATCH_NO_GAMMA": - variable_op = rsqrt_op - else: - const_op_5 = constant_op.constant( - np.array([1.0, 2.0]), shape=[2], dtype=dtypes.float32) - if use_reshape: - const_op_5 = array_ops.reshape(const_op_5, shape=[1, 1, 1, 2]) - variable_op = math_ops.multiply(rsqrt_op, const_op_5) - - mul_op_1 = math_ops.multiply(conv_op, variable_op) - - mul_op_2 = None - if pattern_match_mode == "NO_MATCH": - const_op_6 = constant_op.constant( - np.array([0.2, 0.5]), shape=[2], dtype=dtypes.float32) - mul_op_2 = math_ops.multiply(const_op_3, const_op_6) - else: - mul_op_2 = math_ops.multiply(const_op_3, variable_op) - - sub_op = math_ops.subtract(const_op_4, mul_op_2) - gen_math_ops.add(mul_op_1, sub_op, name="output") - - test_util.set_producer_version(ops.get_default_graph(), 8) - - original_graph = sess.graph_def - original_result = sess.run(["output:0"]) - - return original_graph, original_result - def assertAllClose(self, first, second, rtol=1e-7, atol=0): - first_array = np.array(first) - second_array = np.array(second) - np.testing.assert_allclose(first_array, second_array, rtol, atol) - - @test_util.run_deprecated_v1 - def testFuseDecomposedBatchNorm_MatchAll(self): - for test_rehape in [False, True]: - original_graph_def, original_result = self.create_base_for_fuse_batchnorm( - "MATCH_ALL", test_rehape) - - # Test correctness of fusing individual ops to FusedBatchNorm - optimized_graph_def = \ - FuseDecomposedBNOptimizer(original_graph_def).do_transformation() - - batchnorm_count, decompose_count = self.count_batchnorm_relavant_ops( - optimized_graph_def) - self.assertEqual(batchnorm_count, 1) - self.assertEqual(decompose_count, 0) - - with tf.Session() as sess: - _ = importer.import_graph_def( - optimized_graph_def, input_map={}, name="optimized") - optimized_result = sess.run(["optimized/output:0"]) - - self.assertAllClose(original_result, optimized_result) - - # Test correctness of fusing individual ops to FusedBatchNorm followed by - # folding FusedBatchNorm - optimized_graph_def = optimize_for_inference_lib.fold_batch_norms( - optimized_graph_def) - for node in optimized_graph_def.node: - self.assertNotEqual("FusedBatchNorm", node.op) - - with tf.Session() as sess: - _ = importer.import_graph_def( - optimized_graph_def, input_map={}, name="optimized2") - optimized_result = sess.run(["optimized2/output:0"]) - - self.assertAllClose(original_result, optimized_result, - rtol=1e-04, atol=1e-06) - - @test_util.run_deprecated_v1 - def testFuseDecomposedBatchNorm_MatchNoGamma(self): - for test_rehape in [False, True]: - original_graph_def, original_result = self.create_base_for_fuse_batchnorm( - "MATCH_NO_GAMMA", test_rehape) - - # Test correctness of fusing individual ops to FusedBatchNorm - optimized_graph_def = \ - FuseDecomposedBNOptimizer(original_graph_def).do_transformation() - - batchnorm_count, decompose_count = self.count_batchnorm_relavant_ops( - optimized_graph_def) - self.assertEqual(batchnorm_count, 1) - self.assertEqual(decompose_count, 0) - - with tf.Session() as sess: - _ = importer.import_graph_def( - optimized_graph_def, input_map={}, name="optimized") - optimized_result = sess.run(["optimized/output:0"]) - - self.assertAllClose(original_result, optimized_result) - - # Test correctness of fusing individual ops to FusedBatchNorm followed by - # folding FusedBatchNorm - optimized_graph_def = optimize_for_inference_lib.fold_batch_norms( - optimized_graph_def) - for node in optimized_graph_def.node: - self.assertNotEqual("FusedBatchNorm", node.op) - - with tf.Session() as sess: - _ = importer.import_graph_def( - optimized_graph_def, input_map={}, name="optimized2") - optimized_result = sess.run(["optimized2/output:0"]) - - self.assertAllClose(original_result, optimized_result, - rtol=1e-04, atol=1e-06) - - @test_util.run_deprecated_v1 - def testFuseDecomposedBatchNorm_NonMatchCase(self): - for test_rehape in [False, True]: - original_graph_def, original_result = self.create_base_for_fuse_batchnorm( - "NO_MATCH", test_rehape) - - # Test for not to fuse ops if graph has same types of ops but pattern mismatch - optimized_graph_def = \ - FuseDecomposedBNOptimizer(original_graph_def).do_transformation() - - batchnorm_count, math_op_count = self.count_batchnorm_relavant_ops( - optimized_graph_def) - self.assertEqual(batchnorm_count, 0) - self.assertEqual(math_op_count, 7) - - with tf.Session() as sess: - _ = importer.import_graph_def( - optimized_graph_def, input_map={}, name="optimized") - optimized_result = sess.run(["optimized/output:0"]) - - self.assertAllClose(original_result, optimized_result) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fuse_gelu.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fuse_gelu.py index c8f81efb9d0..4a0f5d6eba1 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fuse_gelu.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_fuse_gelu.py @@ -2,64 +2,74 @@ # -*- coding: utf-8 -*- # import unittest -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_gelu import FuseGeluOptimizer -from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 import tensorflow as tf from tensorflow.compat.v1 import graph_util -@unittest.skipIf(tf.version.VERSION.find('up') == -1, - "Only supports tf 1.15.up2 and 1.15.up3 and SprBase") +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_gelu import FuseGeluOptimizer +from neural_compressor.adaptor.tf_utils.util import disable_random, version1_lt_version2 + + +@unittest.skipIf(tf.version.VERSION.find("up") == -1, "Only supports tf 1.15.up2 and 1.15.up3 and SprBase") class TestGeluFusion(unittest.TestCase): def gelu(self, input_tensor, mul_value=0.5, addv2_value=1.0, sqrt_value=2.0): cdf = mul_value * (addv2_value + tf.math.erf(input_tensor / tf.sqrt(sqrt_value))) return input_tensor * cdf - def gelu_enable_approximation(self, input_tensor, - another_mul_value=0.5, - mul1_value=0.044715, - addv2_value=1.0, - mul2_value=0.7978845608028654, - pow_value=3): + def gelu_enable_approximation( + self, + input_tensor, + another_mul_value=0.5, + mul1_value=0.044715, + addv2_value=1.0, + mul2_value=0.7978845608028654, + pow_value=3, + ): coeff = tf.cast(mul1_value, input_tensor.dtype) - return another_mul_value * input_tensor * ( - addv2_value + tf.tanh(mul2_value * - (input_tensor + coeff * tf.pow(input_tensor, pow_value)))) - - - def gelu_enable_approximation_varaint(self, input_tensor, - another_mul_value=0.5, - mul1_value=0.044715, - addv2_value=1.0, - mul2_value=0.7978845608028654, - pow_value=3): + return ( + another_mul_value + * input_tensor + * (addv2_value + tf.tanh(mul2_value * (input_tensor + coeff * tf.pow(input_tensor, pow_value)))) + ) + + def gelu_enable_approximation_varaint( + self, + input_tensor, + another_mul_value=0.5, + mul1_value=0.044715, + addv2_value=1.0, + mul2_value=0.7978845608028654, + pow_value=3, + ): coeff = tf.cast(mul1_value, input_tensor.dtype) cdf = another_mul_value * ( - addv2_value + tf.tanh(mul2_value * - (input_tensor + coeff * tf.pow(input_tensor, pow_value)))) + addv2_value + tf.tanh(mul2_value * (input_tensor + coeff * tf.pow(input_tensor, pow_value))) + ) return input_tensor * cdf - def gelu_disable_approximation(self, input_tensor, - another_add_value=0.5, - mul1_value=0.044715, - addv2_value=1.0, - mul2_value=0.7978845608028654, - pow_value=3): + def gelu_disable_approximation( + self, + input_tensor, + another_add_value=0.5, + mul1_value=0.044715, + addv2_value=1.0, + mul2_value=0.7978845608028654, + pow_value=3, + ): coeff = tf.cast(mul1_value, input_tensor.dtype) return (another_add_value + input_tensor) * ( - addv2_value + tf.tanh(mul2_value * - (input_tensor + coeff * tf.pow(input_tensor, pow_value)))) + addv2_value + tf.tanh(mul2_value * (input_tensor + coeff * tf.pow(input_tensor, pow_value))) + ) @disable_random() def test_gelu_disable_approximation_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -68,15 +78,14 @@ def test_gelu_disable_approximation_fusion(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -84,13 +93,12 @@ def test_gelu_disable_approximation_fusion(self): @disable_random() def test_gelu_approximation_fusion(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -99,15 +107,14 @@ def test_gelu_approximation_fusion(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -115,13 +122,12 @@ def test_gelu_approximation_fusion(self): @disable_random() def test_gelu_approximation_fusion_varaint(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -130,28 +136,27 @@ def test_gelu_approximation_fusion_varaint(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break self.assertEqual(found_gelu, True) + @disable_random() def test_gelu_approximation_fusion_with_invalid_pow_value(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -160,15 +165,14 @@ def test_gelu_approximation_fusion_with_invalid_pow_value(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -176,13 +180,12 @@ def test_gelu_approximation_fusion_with_invalid_pow_value(self): @disable_random() def test_gelu_approximation_fusion_with_invalid_mul2_value(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -191,15 +194,14 @@ def test_gelu_approximation_fusion_with_invalid_mul2_value(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -207,13 +209,12 @@ def test_gelu_approximation_fusion_with_invalid_mul2_value(self): @disable_random() def test_gelu_approximation_fusion_with_invalid_addv2_value(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -222,15 +223,14 @@ def test_gelu_approximation_fusion_with_invalid_addv2_value(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -238,13 +238,12 @@ def test_gelu_approximation_fusion_with_invalid_addv2_value(self): @disable_random() def test_gelu_approximation_fusion_with_invalid_mul1_value(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -254,15 +253,14 @@ def test_gelu_approximation_fusion_with_invalid_mul1_value(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -270,13 +268,12 @@ def test_gelu_approximation_fusion_with_invalid_mul1_value(self): @disable_random() def test_gelu_approximation_fusion_with_invalid_another_mul(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -286,15 +283,14 @@ def test_gelu_approximation_fusion_with_invalid_another_mul(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -302,13 +298,12 @@ def test_gelu_approximation_fusion_with_invalid_another_mul(self): @disable_random() def test_gelu_fusion_with_invalid_sqrt(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -316,15 +311,14 @@ def test_gelu_fusion_with_invalid_sqrt(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[gelu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[gelu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -332,13 +326,12 @@ def test_gelu_fusion_with_invalid_sqrt(self): @disable_random() def test_gelu_fusion_with_invalid_addv2(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -346,15 +339,14 @@ def test_gelu_fusion_with_invalid_addv2(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[gelu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[gelu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -362,13 +354,12 @@ def test_gelu_fusion_with_invalid_addv2(self): @disable_random() def test_gelu_fusion_with_invalid_mul(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -376,15 +367,14 @@ def test_gelu_fusion_with_invalid_mul(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[gelu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[gelu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -392,13 +382,12 @@ def test_gelu_fusion_with_invalid_mul(self): @disable_random() def test_gelu_fusion(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -407,20 +396,19 @@ def test_gelu_fusion(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break self.assertEqual(found_gelu, True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_input_output.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_input_output.py index 2605a445c0c..1d320201488 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_input_output.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_input_output.py @@ -1,17 +1,19 @@ # # -*- coding: utf-8 -*- # -import unittest import os import platform +import unittest + +import tensorflow as tf import yaml + from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer from neural_compressor.adaptor.tf_utils.util import get_input_output_node_names -import tensorflow as tf def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -34,14 +36,15 @@ def build_fake_yaml(): relative: 0.1 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_yaml_2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -59,62 +62,65 @@ def build_fake_yaml_2(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_2.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_2.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_model_1(): - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - dataset = tf.data.Dataset.range(10) - ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) - iter_tensors = ds_iterator.get_next() - iter_tensors -= tf.compat.v1.constant([5], dtype=tf.int64) - final_node = tf.nn.relu(iter_tensors, name='op_to_store') - sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[final_node.name.split(':')[0]]) - - with tf.io.gfile.GFile('model_1.pb', mode='wb') as f: - f.write(constant_graph.SerializeToString()) + with tf.compat.v1.Session(graph=tf.Graph()) as sess: + dataset = tf.data.Dataset.range(10) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + iter_tensors -= tf.compat.v1.constant([5], dtype=tf.int64) + final_node = tf.nn.relu(iter_tensors, name="op_to_store") + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess=sess, input_graph_def=sess.graph_def, output_node_names=[final_node.name.split(":")[0]] + ) + + with tf.io.gfile.GFile("model_1.pb", mode="wb") as f: + f.write(constant_graph.SerializeToString()) + def build_fake_model_2(): - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - final_node = tf.no_op() - sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[final_node.name.split(':')[0]]) - - with tf.io.gfile.GFile('model_2.pb', mode='wb') as f: - f.write(constant_graph.SerializeToString()) - + with tf.compat.v1.Session(graph=tf.Graph()) as sess: + final_node = tf.no_op() + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess=sess, input_graph_def=sess.graph_def, output_node_names=[final_node.name.split(":")[0]] + ) + + with tf.io.gfile.GFile("model_2.pb", mode="wb") as f: + f.write(constant_graph.SerializeToString()) + + def build_fake_model_3(): - with tf.compat.v1.Session(graph=tf.Graph()) as sess: - x = [1, 2, 3] - final_node = tf.Assert(tf.less_equal(tf.reduce_max(x), 3), x) - sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[final_node.name.split(':')[0]]) - - with tf.io.gfile.GFile('model_3.pb', mode='wb') as f: - f.write(constant_graph.SerializeToString()) + with tf.compat.v1.Session(graph=tf.Graph()) as sess: + x = [1, 2, 3] + final_node = tf.Assert(tf.less_equal(tf.reduce_max(x), 3), x) + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess=sess, input_graph_def=sess.graph_def, output_node_names=[final_node.name.split(":")[0]] + ) + + with tf.io.gfile.GFile("model_3.pb", mode="wb") as f: + f.write(constant_graph.SerializeToString()) + class TestGraphInputOutputDetection(unittest.TestCase): tf.compat.v1.disable_v2_behavior() - mb_fp32_pb_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb' - pb_path = '/tmp/.neural_compressor/mobilenet_fp32.pb' + mb_fp32_pb_url = ( + "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb" + ) + pb_path = "/tmp/.neural_compressor/mobilenet_fp32.pb" platform = platform.system().lower() if platform == "windows": - pb_path = 'C:\\tmp\\.neural_compressor\\mobilenet_fp32.pb' - inputs = ['input'] - outputs = ['MobilenetV1/Predictions/Reshape_1'] + pb_path = "C:\\tmp\\.neural_compressor\\mobilenet_fp32.pb" + inputs = ["input"] + outputs = ["MobilenetV1/Predictions/Reshape_1"] @classmethod def setUpClass(self): @@ -122,10 +128,13 @@ def setUpClass(self): build_fake_yaml_2() if not os.path.exists(self.pb_path): if self.platform == "linux": - os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.mb_fp32_pb_url, self.pb_path)) + os.system( + "mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.mb_fp32_pb_url, self.pb_path) + ) elif self.platform == "windows": - os.system('md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor') + os.system("md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor") from urllib import request + request.urlretrieve(self.mb_fp32_pb_url) self.input_graph = tf.compat.v1.GraphDef() with open(self.pb_path, "rb") as f: @@ -137,11 +146,11 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml_2.yaml') - os.remove('model_1.pb') - os.remove('model_2.pb') - os.remove('model_3.pb') + os.remove("fake_yaml.yaml") + os.remove("fake_yaml_2.yaml") + os.remove("model_1.pb") + os.remove("model_2.pb") + os.remove("model_3.pb") def test_identify_input_output(self): g = GraphAnalyzer() @@ -155,20 +164,20 @@ def test_identify_input_output(self): self.assertEqual(outputs, self.outputs) input_graph = tf.compat.v1.GraphDef() - with open('model_1.pb', "rb") as f: + with open("model_1.pb", "rb") as f: input_graph.ParseFromString(f.read()) g = GraphAnalyzer() g.graph = input_graph g.parse_graph() inputs, outputs = g.get_graph_input_output() - self.assertEqual(inputs, ['sub']) - self.assertEqual(outputs, ['op_to_store']) + self.assertEqual(inputs, ["sub"]) + self.assertEqual(outputs, ["op_to_store"]) inputs, outputs = get_input_output_node_names(input_graph) - self.assertEqual(inputs, ['sub']) - self.assertEqual(outputs, ['op_to_store']) + self.assertEqual(inputs, ["sub"]) + self.assertEqual(outputs, ["op_to_store"]) input_graph = tf.compat.v1.GraphDef() - with open('model_2.pb', "rb") as f: + with open("model_2.pb", "rb") as f: input_graph.ParseFromString(f.read()) g = GraphAnalyzer() g.graph = input_graph @@ -181,7 +190,7 @@ def test_identify_input_output(self): self.assertEqual(outputs, []) input_graph = tf.compat.v1.GraphDef() - with open('model_3.pb', "rb") as f: + with open("model_3.pb", "rb") as f: input_graph.ParseFromString(f.read()) g = GraphAnalyzer() g.graph = input_graph @@ -201,8 +210,8 @@ def test_no_input_output_config(self): float_graph_def = g.dump_graph() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(20, 224, 224, 3), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(20, 224, 224, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def @@ -217,15 +226,16 @@ def test_invalid_input_output_config(self): float_graph_def = g.dump_graph() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_2.yaml') - dataset = quantizer.dataset('dummy', shape=(20, 224, 224, 3), label=True) + quantizer = Quantization("fake_yaml_2.yaml") + dataset = quantizer.dataset("dummy", shape=(20, 224, 224, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def model = quantizer.fit() # will detect the right inputs/outputs - self.assertNotEqual(model.input_node_names, ['x']) - self.assertNotEqual(model.output_node_names, ['op_to_store']) + self.assertNotEqual(model.input_node_names, ["x"]) + self.assertNotEqual(model.output_node_names, ["op_to_store"]) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_insert_logging.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_insert_logging.py index 32dfd5a0c33..6d2e45ae0c6 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_insert_logging.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_insert_logging.py @@ -1,17 +1,18 @@ - -import unittest import os +import unittest + +import tensorflow as tf import yaml +from tensorflow.compat.v1 import graph_util + import neural_compressor -from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel from neural_compressor.adaptor.tensorflow import TensorflowQuery +from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel +from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -40,12 +41,13 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class TestTensorflowGraphInsertLogging(unittest.TestCase): @classmethod def setUpClass(self): @@ -53,35 +55,39 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_graph_insert_logging(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="SAME") - normed = tf.nn.bias_add(conv, tf.constant([3.0, 1.2,1,2,3,4,5,6,7,8,9,0,12,2,3,4]), name='op_to_store') + normed = tf.nn.bias_add( + conv, tf.constant([3.0, 1.2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 2, 3, 4]), name="op_to_store" + ) - out_name = normed.name.split(':')[0] + out_name = normed.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - inputs = [x.name.split(':')[0]] + inputs = [x.name.split(":")[0]] outputs = [out_name] op_wise_config = { - "Conv2D": (False, 'minmax', False, 7.0), + "Conv2D": (False, "minmax", False, 7.0), } - op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml")).get_eightbit_patterns() - output_graph, _, _ = QuantizeGraphForIntel(output_graph_def, inputs, outputs, - op_wise_config, op_wise_sequences, 'cpu').do_transform() + op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml") + ).get_eightbit_patterns() + output_graph, _, _ = QuantizeGraphForIntel( + output_graph_def, inputs, outputs, op_wise_config, op_wise_sequences, "cpu" + ).do_transform() offset_map = { "QuantizedConv2DWithBiasSumAndRelu": 3, @@ -91,37 +97,35 @@ def test_graph_insert_logging(self): target_conv_op = [] _print_node_mapping = {} from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper + sorted_graph = QuantizeGraphHelper().get_sorted_graph(output_graph, inputs, outputs) for node in output_graph.node: if node.op in offset_map: - target_conv_op.append(node.name.split('_eightbit_')[0]) + target_conv_op.append(node.name.split("_eightbit_")[0]) - node_name_mapping = { - node.name: node for node in output_graph.node if node.op != "Const" - } + node_name_mapping = {node.name: node for node in output_graph.node if node.op != "Const"} output_node_names = [] for i in target_conv_op: - if node_name_mapping[i + "_eightbit_quantized_conv"].op == \ - 'QuantizedConv2DWithBias': + if node_name_mapping[i + "_eightbit_quantized_conv"].op == "QuantizedConv2DWithBias": output_node_names.append(node_name_mapping[i + "_eightbit_quantized_conv"].name) from neural_compressor.adaptor.tf_utils.transform_graph.insert_logging import InsertLogging - graph_def = InsertLogging(output_graph, - node_name_list=output_node_names, - message="__KL:", - summarize=-1, - dump_fp32=False).do_transformation() + + graph_def = InsertLogging( + output_graph, node_name_list=output_node_names, message="__KL:", summarize=-1, dump_fp32=False + ).do_transformation() found_conv_fusion = False for i in output_graph.node: - if i.op.find('QuantizedConv2D') != -1: + if i.op.find("QuantizedConv2D") != -1: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_library_detection.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_library_detection.py index 6b3296d017a..b915ed80a31 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_library_detection.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_library_detection.py @@ -1,48 +1,54 @@ # # -*- coding: utf-8 -*- # -import unittest import os import platform +import unittest + +import tensorflow as tf + import neural_compressor -from neural_compressor.adaptor.tf_utils.util import read_graph -from neural_compressor.adaptor.tf_utils.graph_converter import GraphConverter from neural_compressor.adaptor.tensorflow import TensorflowQuery +from neural_compressor.adaptor.tf_utils.graph_converter import GraphConverter +from neural_compressor.adaptor.tf_utils.util import read_graph from neural_compressor.model import Model -import tensorflow as tf + class TestGraphLibraryDetection(unittest.TestCase): - efficientnet_b0_model_url = 'https://raw.githubusercontent.com/SkyAI/inference_benchmark/435c7ca2577830025ca5f6cbce8480db16f76a61/efficientnet-b0.pb' - pb_path = '/tmp/.neural_compressor/efficientnet-b0.pb' + efficientnet_b0_model_url = "https://raw.githubusercontent.com/SkyAI/inference_benchmark/435c7ca2577830025ca5f6cbce8480db16f76a61/efficientnet-b0.pb" + pb_path = "/tmp/.neural_compressor/efficientnet-b0.pb" if platform.system().lower() == "windows": - pb_path = 'C:\\tmp\\.neural_compressor\\efficientnet-b0.pb' + pb_path = "C:\\tmp\\.neural_compressor\\efficientnet-b0.pb" + @classmethod def setUpClass(self): if not os.path.exists(self.pb_path) and platform.system().lower() == "linux": - os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.efficientnet_b0_model_url, self.pb_path)) + os.system( + "mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format( + self.efficientnet_b0_model_url, self.pb_path + ) + ) def test_tensorflow_graph_library_detection(self): - tf.compat.v1.disable_eager_execution() - op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml")).get_eightbit_patterns() + op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml") + ).get_eightbit_patterns() - qt_config = {'calib_iteration':1, 'op_wise_config':{}} + qt_config = {"calib_iteration": 1, "op_wise_config": {}} original_graphdef = read_graph(self.pb_path) model = Model(self.pb_path) - model.name = 'test' - model.input_tensor_names = ['input_tensor'] - model.output_tensor_names = ['softmax_tensor'] - model.workspace_path = '/tmp/test.pb' - - converter = GraphConverter(model, - int8_sequences=op_wise_sequences, - qt_config=qt_config - ) + model.name = "test" + model.input_tensor_names = ["input_tensor"] + model.output_tensor_names = ["softmax_tensor"] + model.workspace_path = "/tmp/test.pb" + + converter = GraphConverter(model, int8_sequences=op_wise_sequences, qt_config=qt_config) converted_graph = converter.convert() self.assertEqual(converted_graph.graph_def.library, original_graphdef.library) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_matmul_fusion.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_matmul_fusion.py index 658bc22ba16..c17d0facaa6 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_matmul_fusion.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_matmul_fusion.py @@ -3,17 +3,19 @@ # import os import unittest -import yaml + import numpy as np +import tensorflow.compat.v1 as tf +import yaml +from tensorflow.python.framework import dtypes + import neural_compressor from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow.compat.v1 as tf -from tensorflow.python.framework import dtypes def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -33,9 +35,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -44,40 +46,41 @@ class TestGraphMatMulFusion(unittest.TestCase): @classmethod def setUpClass(self): build_fake_yaml() - self.op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml")).get_eightbit_patterns() + self.op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(neural_compressor.__file__), "adaptor/tensorflow.yaml") + ).get_eightbit_patterns() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_matmul_biasadd_relu_requantize_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.relu(z, name='op_to_store') + z = tf.nn.relu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'QuantizedMatMulWithBiasAndReluAndRequantize': + if i.op == "QuantizedMatMulWithBiasAndReluAndRequantize": found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -86,20 +89,20 @@ def test_matmul_biasadd_relu_requantize_fusion(self): def test_first_matmul_biasadd_relu_fusion(self): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.relu(z, name='op_to_store') + z = tf.nn.relu(z, name="op_to_store") with tf.Session() as sess: - sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def @@ -107,7 +110,11 @@ def test_first_matmul_biasadd_relu_fusion(self): found_quantized_matmul = False for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2' and i.name == 'MatMul_eightbit_quantize_x' and i.attr["T"].type == dtypes.quint8: + if ( + i.op == "QuantizeV2" + and i.name == "MatMul_eightbit_quantize_x" + and i.attr["T"].type == dtypes.quint8 + ): found_quantized_matmul = True break @@ -117,14 +124,13 @@ def test_first_matmul_biasadd_relu_fusion(self): def test_matmul_biasadd_requantize_dequantize_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.identity(z, name='op_to_store') + z = tf.identity(z, name="op_to_store") found_quantized_matmul = False if tf.version.VERSION < "2.2.0": found_quantized_matmul = True @@ -134,15 +140,16 @@ def test_matmul_biasadd_requantize_dequantize_fusion(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'QuantizedMatMulWithBiasAndDequantize': + if i.op == "QuantizedMatMulWithBiasAndDequantize": found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -151,13 +158,12 @@ def test_matmul_biasadd_requantize_dequantize_fusion(self): def test_matmul_biasadd_requantize_dequantize_last_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.nn.bias_add(z, [1, 2], name='op_to_store') + z = tf.nn.bias_add(z, [1, 2], name="op_to_store") found_quantized_matmul = False if tf.version.VERSION < "2.2.0": found_quantized_matmul = True @@ -167,15 +173,16 @@ def test_matmul_biasadd_requantize_dequantize_last_fusion(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'QuantizedMatMulWithBiasAndDequantize' and i.name == 'op_to_store': + if i.op == "QuantizedMatMulWithBiasAndDequantize" and i.name == "op_to_store": found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -184,13 +191,12 @@ def test_matmul_biasadd_requantize_dequantize_last_fusion(self): def test_disable_matmul_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - z = tf.matmul(x, y, name='no_quant_matmul') - z = tf.nn.relu6(z, name='op_to_store') + z = tf.matmul(x, y, name="no_quant_matmul") + z = tf.nn.relu6(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: @@ -198,15 +204,16 @@ def test_disable_matmul_fusion(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'QuantizedMatMulWithBiasAndDequantize' and i.name == 'op_to_store': + if i.op == "QuantizedMatMulWithBiasAndDequantize" and i.name == "op_to_store": found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, False) @@ -215,13 +222,12 @@ def test_disable_matmul_fusion(self): def test_disable_matmul_fusion_with_transpose_b_true(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - z = tf.matmul(x, y, name='no_quant_matmul', transpose_b=True) - z = tf.nn.relu6(z, name='op_to_store') + z = tf.matmul(x, y, name="no_quant_matmul", transpose_b=True) + z = tf.nn.relu6(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: @@ -229,15 +235,16 @@ def test_disable_matmul_fusion_with_transpose_b_true(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'QuantizedMatMulWithBiasAndDequantize' and i.name == 'op_to_store': + if i.op == "QuantizedMatMulWithBiasAndDequantize" and i.name == "op_to_store": found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, False) @@ -247,13 +254,12 @@ def test_disable_matmul_fusion_with_transpose_b_true(self): def test_matmul_with_dummy_biasadd(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - z = tf.matmul(x, y, name='no_quant_matmul') - z = tf.identity(z, name='op_to_store') + z = tf.matmul(x, y, name="no_quant_matmul") + z = tf.identity(z, name="op_to_store") found_quantized_matmul = True with tf.Session() as sess: @@ -261,15 +267,16 @@ def test_matmul_with_dummy_biasadd(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) @@ -279,13 +286,12 @@ def test_matmul_with_dummy_biasadd(self): def test_matmul_with_nan(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) - nan_array = np.empty((2,2), dtype=np.float32) + nan_array = np.empty((2, 2), dtype=np.float32) nan_array[:] = np.NaN - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') - z = tf.matmul(x, nan_array, name='no_quant_matmul') - z = tf.identity(z, name='op_to_store') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") + z = tf.matmul(x, nan_array, name="no_quant_matmul") + z = tf.identity(z, name="op_to_store") found_quantized_matmul = True with tf.Session() as sess: @@ -293,15 +299,16 @@ def test_matmul_with_nan(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) @@ -310,15 +317,14 @@ def test_matmul_with_nan(self): def test_matmul_with_reshape_transpose(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) transpose = tf.transpose(y, perm=[1, 0]) reshape = tf.reshape(transpose, [2, 2]) - z = tf.matmul(x, reshape, name='no_quant_matmul') - z = tf.nn.bias_add(z, [1, 2], name='op_to_store') + z = tf.matmul(x, reshape, name="no_quant_matmul") + z = tf.nn.bias_add(z, [1, 2], name="op_to_store") found_quantized_matmul = True with tf.Session() as sess: @@ -326,31 +332,31 @@ def test_matmul_with_reshape_transpose(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) - @disable_random() def test_matmul_with_add(self): g = tf.Graph() with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) transpose = tf.transpose(y, perm=[1, 0]) reshape = tf.reshape(transpose, [2, 2]) - z = tf.matmul(x, reshape, name='no_quant_matmul') - z = tf.math.add(z, [1, 2], name='op_to_store') + z = tf.matmul(x, reshape, name="no_quant_matmul") + z = tf.math.add(z, [1, 2], name="op_to_store") found_quantized_matmul = True with tf.Session() as sess: @@ -358,14 +364,15 @@ def test_matmul_with_add(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) @@ -374,10 +381,9 @@ def test_matmul_with_add(self): def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) biasadd = tf.nn.bias_add(z, [1, 2]) @@ -388,7 +394,7 @@ def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): biasadd2 = tf.nn.bias_add(matmul1, [1, 1]) - z = tf.nn.softmax(biasadd2, name='op_to_store') + z = tf.nn.softmax(biasadd2, name="op_to_store") found_quantized_matmul = False if tf.version.VERSION < "2.2.0": found_quantized_matmul = False @@ -398,19 +404,20 @@ def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() - count=0 + count = 0 for i in output_graph.model.as_graph_def().node: - if i.op == 'QuantizedMatMulWithBiasAndDequantize': + if i.op == "QuantizedMatMulWithBiasAndDequantize": count += 1 found_quantized_matmul = bool(count > 1) # TF2.6 has enabled matmul_biasadd_requantize_dequantize_fusion_with_softmax - if tf.__version__ < '2.6.0': + if tf.__version__ < "2.6.0": self.assertEqual(found_quantized_matmul, False) else: self.assertEqual(found_quantized_matmul, True) @@ -418,10 +425,9 @@ def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): def test_matmul_biasadd_relu_non_const_weight(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') - y = tf.matmul(x, x, name='no_quant_matmul') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") + y = tf.matmul(x, x, name="no_quant_matmul") biasadd = tf.nn.bias_add(y, [1, 2]) z = tf.nn.relu(biasadd) found_quantized_matmul = True @@ -431,15 +437,16 @@ def test_matmul_biasadd_relu_non_const_weight(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, False) @@ -447,10 +454,9 @@ def test_matmul_biasadd_relu_non_const_weight(self): def test_matmul_biasadd_non_const_weight(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') - y = tf.matmul(x, x, name='no_quant_matmul') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") + y = tf.matmul(x, x, name="no_quant_matmul") z = tf.nn.bias_add(y, [1, 2]) found_quantized_matmul = True @@ -459,18 +465,20 @@ def test_matmul_biasadd_non_const_weight(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, False) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_meta_pass.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_meta_pass.py index a1e2eb31df3..e676371f7fe 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_meta_pass.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_meta_pass.py @@ -1,16 +1,18 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import yaml -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random + + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -35,9 +37,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -49,45 +51,44 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_tensorflow_graph_meta_pass_with_different_mode(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) sq = tf.squeeze(relu, [0]) reshape = tf.reshape(sq, [729, 16]) - conv_weights2 = tf.compat.v1.get_variable("weight2", [16, 729], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [16, 729], initializer=tf.compat.v1.random_normal_initializer() + ) matmul = tf.matmul(reshape, conv_weights2) # normed2 = tf.compat.v1.layers.batch_normalization(matmul) - bias = tf.compat.v1.get_variable("bias", [729], - initializer=tf.compat.v1.random_normal_initializer()) - normed2 = tf.nn.bias_add(matmul, bias, name='bias_add') + bias = tf.compat.v1.get_variable("bias", [729], initializer=tf.compat.v1.random_normal_initializer()) + normed2 = tf.nn.bias_add(matmul, bias, name="bias_add") relu6 = tf.nn.relu6(normed2) - reshape2 = tf.reshape(relu6, [1, 729, 729, 1], name='op_to_store') + reshape2 = tf.reshape(relu6, [1, 729, 729, 1], name="op_to_store") - out_name = reshape2.name.split(':')[0] + out_name = reshape2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -95,7 +96,7 @@ def test_tensorflow_graph_meta_pass_with_different_mode(self): found_reshape = False for i in output_graph.graph_def.node: - if i.op == 'Reshape': + if i.op == "Reshape": found_reshape = True break @@ -103,36 +104,36 @@ def test_tensorflow_graph_meta_pass_with_different_mode(self): @disable_random() def test_tensorflow_graph_meta_pass_with_same_mode(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) sq = tf.squeeze(relu, [0]) reshape = tf.reshape(sq, [1, 27, 27, 16]) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(reshape, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - relu6 = tf.nn.relu6(normed2, name='op_to_store') + relu6 = tf.nn.relu6(normed2, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -141,9 +142,9 @@ def test_tensorflow_graph_meta_pass_with_same_mode(self): dequantize_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 - if i.op == 'Dequantize': + if i.op == "Dequantize": dequantize_count += 1 self.assertEqual(quantize_count, 1) @@ -151,35 +152,35 @@ def test_tensorflow_graph_meta_pass_with_same_mode(self): @disable_random() def test_tensorflow_graph_meta_with_reshape_only(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) reshape = tf.reshape(relu, [1, 27, 27, 16]) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(reshape, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - relu6 = tf.nn.relu6(normed2, name='op_to_store') + relu6 = tf.nn.relu6(normed2, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -188,13 +189,14 @@ def test_tensorflow_graph_meta_with_reshape_only(self): dequantize_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 - if i.op == 'Dequantize': + if i.op == "Dequantize": dequantize_count += 1 self.assertEqual(quantize_count, 1) self.assertEqual(dequantize_count, 1) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_pad_conv.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_pad_conv.py index 86a4d4c3b56..2be01967846 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_pad_conv.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_pad_conv.py @@ -1,14 +1,15 @@ - -import unittest import os -import yaml -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random + + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -35,9 +36,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -49,29 +50,30 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_fold_pad_conv(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed, name='op_to_store') - out_name = relu.name.split(':')[0] + relu = tf.nn.relu(normed, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -80,7 +82,7 @@ def test_fold_pad_conv(self): if tf.__version__ >= "2.0.0": for i in output_graph.graph_def.node: - if i.op == 'Pad': + if i.op == "Pad": found_pad = True break self.assertEqual(found_pad, True) @@ -90,30 +92,32 @@ def test_fold_pad_conv2(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) paddings2 = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad2 = tf.pad(x, paddings2, "CONSTANT") - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(x_pad2, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") normed2 = tf.compat.v1.layers.batch_normalization(conv2) relu2 = tf.nn.relu(normed2) - add = tf.math.add(relu, relu2, name='op_to_store') - out_name = add.name.split(':')[0] + add = tf.math.add(relu, relu2, name="op_to_store") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -122,7 +126,7 @@ def test_fold_pad_conv2(self): if tf.__version__ >= "2.0.0": for i in output_graph.graph_def.node: - if i.op == 'Pad': + if i.op == "Pad": found_pad = True break self.assertEqual(found_pad, True) @@ -132,28 +136,30 @@ def test_fold_pad_conv3(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(x, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) relu2 = tf.nn.relu(normed2) - add = tf.math.add(relu, relu2, name='op_to_store') - out_name = add.name.split(':')[0] + add = tf.math.add(relu, relu2, name="op_to_store") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -162,7 +168,7 @@ def test_fold_pad_conv3(self): if tf.__version__ >= "2.0.0": for i in output_graph.graph_def.node: - if i.op == 'Pad': + if i.op == "Pad": found_pad = True break diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_post_cse_optimize.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_post_cse_optimize.py index ffe8374dcc6..2732bf2ca6d 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_post_cse_optimize.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_post_cse_optimize.py @@ -1,15 +1,16 @@ - -import unittest import os -import yaml -import numpy as np -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest +import numpy as np import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random + + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -36,9 +37,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -49,53 +50,48 @@ def setUpClass(self): build_fake_yaml() import tensorflow as tf - self.enable_s8 = bool( - tf.version.VERSION.find('1.15.0-up') != -1 or tf.version.VERSION >= '2.1.0') + + self.enable_s8 = bool(tf.version.VERSION.find("1.15.0-up") != -1 or tf.version.VERSION >= "2.1.0") @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_post_cse(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") x = tf.nn.relu(x) - xw = tf.constant(np.random.random((2, 2, 16, 16)), - dtype=tf.float32, name='y') - x = tf.nn.conv2d(input=x, filters=xw, strides=[ - 1, 1, 1, 1], padding='VALID') + xw = tf.constant(np.random.random((2, 2, 16, 16)), dtype=tf.float32, name="y") + x = tf.nn.conv2d(input=x, filters=xw, strides=[1, 1, 1, 1], padding="VALID") - y = tf.constant(np.random.random((1, 55, 55, 16)), - dtype=tf.float32, name='y') + y = tf.constant(np.random.random((1, 55, 55, 16)), dtype=tf.float32, name="y") - z = tf.math.add(x, y, name='add') + z = tf.math.add(x, y, name="add") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(z, conv_weights, strides=[ - 1, 2, 2, 1], padding="VALID") + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(z, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv2 = tf.nn.conv2d(z, conv_weights2, strides=[ - 1, 2, 2, 1], padding="VALID") + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv2 = tf.nn.conv2d(z, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") normed2 = tf.compat.v1.layers.batch_normalization(conv2) relu2 = tf.nn.relu(normed2) - add = tf.math.add(relu, relu2, name='op_to_store') - out_name = add.name.split(':')[0] + add = tf.math.add(relu, relu2, name="op_to_store") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -103,58 +99,54 @@ def test_post_cse(self): quantize_v2_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': - quantize_v2_count += 1 + if i.op == "QuantizeV2": + quantize_v2_count += 1 if self.enable_s8: - self.assertEqual(quantize_v2_count, 2) + self.assertEqual(quantize_v2_count, 2) else: - self.assertEqual(quantize_v2_count, 1) + self.assertEqual(quantize_v2_count, 1) @disable_random() def test_post_cse2(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") x = tf.nn.relu(x) - xw = tf.constant(np.random.random((2, 2, 16, 16)), - dtype=tf.float32, name='y') - x = tf.nn.conv2d(input=x, filters=xw, strides=[ - 1, 1, 1, 1], padding='VALID') + xw = tf.constant(np.random.random((2, 2, 16, 16)), dtype=tf.float32, name="y") + x = tf.nn.conv2d(input=x, filters=xw, strides=[1, 1, 1, 1], padding="VALID") - y = tf.constant(np.random.random((1, 55, 55, 16)), - dtype=tf.float32, name='y') + y = tf.constant(np.random.random((1, 55, 55, 16)), dtype=tf.float32, name="y") - z = tf.math.add(x, y, name='add') + z = tf.math.add(x, y, name="add") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(z, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv2 = tf.nn.conv2d(z, conv_weights2, strides=[ - 1, 2, 2, 1], padding="VALID") + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv2 = tf.nn.conv2d(z, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") normed2 = tf.compat.v1.layers.batch_normalization(conv2) relu2 = tf.nn.relu(normed2) add = tf.math.add(relu, relu2) ones_const = tf.constant(1, dtype=tf.float32) ones_const2 = tf.constant(1, dtype=tf.float32) - mul1 =tf.math.multiply(add, ones_const) - mul2 =tf.math.multiply(mul1, ones_const) - mul3 =tf.math.multiply(mul2, ones_const2, name='op_to_store') - out_name = mul3.name.split(':')[0] + mul1 = tf.math.multiply(add, ones_const) + mul2 = tf.math.multiply(mul1, ones_const) + mul3 = tf.math.multiply(mul2, ones_const2, name="op_to_store") + out_name = mul3.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -162,13 +154,13 @@ def test_post_cse2(self): quantize_v2_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': - quantize_v2_count += 1 + if i.op == "QuantizeV2": + quantize_v2_count += 1 if self.enable_s8: - self.assertEqual(quantize_v2_count, 2) + self.assertEqual(quantize_v2_count, 2) else: - self.assertEqual(quantize_v2_count, 1) + self.assertEqual(quantize_v2_count, 1) if __name__ == "__main__": diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_search_patterns.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_search_patterns.py index 225194f2235..43955572441 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_search_patterns.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_search_patterns.py @@ -2,15 +2,16 @@ # -*- coding: utf-8 -*- # import unittest -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer import tensorflow as tf from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import dtypes -class TestGraphSearchPatterns(unittest.TestCase): +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper + +class TestGraphSearchPatterns(unittest.TestCase): def test_graph_search_partten_post_branch(self): tf.compat.v1.disable_eager_execution() tf.compat.v1.reset_default_graph() @@ -19,10 +20,8 @@ def test_graph_search_partten_post_branch(self): relu_name = "relu" float_graph_def = graph_pb2.GraphDef() input_constant = QuantizeGraphHelper.create_constant_node( - input_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 1]) + input_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 1] + ) float_graph_def.node.extend([input_constant]) relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, [input_constant_name]) QuantizeGraphHelper.set_attr_dtype(relu_node, "T", dtypes.float32) @@ -31,14 +30,11 @@ def test_graph_search_partten_post_branch(self): b_constant_name = "b_constant" mat_mul_name = "mat_mul" b_constant = QuantizeGraphHelper.create_constant_node( - b_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[2, 6]) + b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[2, 6] + ) float_graph_def.node.extend([b_constant]) - mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, - [relu_name, b_constant_name]) + mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, [relu_name, b_constant_name]) QuantizeGraphHelper.set_attr_dtype(mat_mul_node, "T", dtypes.float32) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_a", False) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_b", False) @@ -47,13 +43,11 @@ def test_graph_search_partten_post_branch(self): bias_add_name = "bias_add" offset_constant_name = "offset_constant" - offset_constant = QuantizeGraphHelper.create_constant_node(offset_constant_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) + offset_constant = QuantizeGraphHelper.create_constant_node( + offset_constant_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) float_graph_def.node.extend([offset_constant]) - bias_add_node = QuantizeGraphHelper.create_node("BiasAdd", bias_add_name, - [mat_mul_name, offset_constant_name]) + bias_add_node = QuantizeGraphHelper.create_node("BiasAdd", bias_add_name, [mat_mul_name, offset_constant_name]) QuantizeGraphHelper.set_attr_dtype(bias_add_node, "T", dtypes.float32) float_graph_def.node.extend([bias_add_node]) @@ -61,19 +55,19 @@ def test_graph_search_partten_post_branch(self): post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, [bias_add_name]) float_graph_def.node.extend([post_relu_node]) - last_identity_node_name = 'last_identity' - last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, - [post_relu_name]) + last_identity_node_name = "last_identity" + last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, [post_relu_name]) float_graph_def.node.extend([last_identity_node]) - last_identity_right_node_name = 'last_identity_right' - last_identity_node_right = QuantizeGraphHelper.create_node("Identity", last_identity_right_node_name, - [post_relu_name]) + last_identity_right_node_name = "last_identity_right" + last_identity_node_right = QuantizeGraphHelper.create_node( + "Identity", last_identity_right_node_name, [post_relu_name] + ) float_graph_def.node.extend([last_identity_node_right]) analyzer = GraphAnalyzer() analyzer.graph = float_graph_def analyzer.parse_graph() - res = analyzer.query_fusion_pattern_nodes([['BiasAdd'], ("Relu"), ("Identity")]) + res = analyzer.query_fusion_pattern_nodes([["BiasAdd"], ("Relu"), ("Identity")]) self.assertEqual(2, len(res)) def test_graph_search_pattern_straight(self): @@ -84,10 +78,8 @@ def test_graph_search_pattern_straight(self): relu_name = "relu" float_graph_def = graph_pb2.GraphDef() input_constant = QuantizeGraphHelper.create_constant_node( - input_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 1]) + input_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 1] + ) float_graph_def.node.extend([input_constant]) relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, [input_constant_name]) QuantizeGraphHelper.set_attr_dtype(relu_node, "T", dtypes.float32) @@ -96,14 +88,11 @@ def test_graph_search_pattern_straight(self): b_constant_name = "b_constant" mat_mul_name = "mat_mul" b_constant = QuantizeGraphHelper.create_constant_node( - b_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[2, 6]) + b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[2, 6] + ) float_graph_def.node.extend([b_constant]) - mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, - [relu_name, b_constant_name]) + mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, [relu_name, b_constant_name]) QuantizeGraphHelper.set_attr_dtype(mat_mul_node, "T", dtypes.float32) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_a", False) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_b", False) @@ -112,13 +101,11 @@ def test_graph_search_pattern_straight(self): bias_add_name = "bias_add" offset_constant_name = "offset_constant" - offset_constant = QuantizeGraphHelper.create_constant_node(offset_constant_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) + offset_constant = QuantizeGraphHelper.create_constant_node( + offset_constant_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) float_graph_def.node.extend([offset_constant]) - bias_add_node = QuantizeGraphHelper.create_node("BiasAdd", bias_add_name, - [mat_mul_name, offset_constant_name]) + bias_add_node = QuantizeGraphHelper.create_node("BiasAdd", bias_add_name, [mat_mul_name, offset_constant_name]) QuantizeGraphHelper.set_attr_dtype(bias_add_node, "T", dtypes.float32) float_graph_def.node.extend([bias_add_node]) @@ -126,17 +113,16 @@ def test_graph_search_pattern_straight(self): post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, [bias_add_name]) float_graph_def.node.extend([post_relu_node]) - last_identity_node_name = 'last_identity' - last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, - [post_relu_name]) + last_identity_node_name = "last_identity" + last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, [post_relu_name]) float_graph_def.node.extend([last_identity_node]) analyzer = GraphAnalyzer() analyzer.graph = float_graph_def analyzer.parse_graph() - res = analyzer.query_fusion_pattern_nodes([['MatMul'], ("BiasAdd"), ("Relu")]) + res = analyzer.query_fusion_pattern_nodes([["MatMul"], ("BiasAdd"), ("Relu")]) self.assertEqual(3, len(res[0][-1])) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_switch_optimizer.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_switch_optimizer.py index 8b62fcf7a1c..712cef305c5 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_switch_optimizer.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_switch_optimizer.py @@ -1,17 +1,18 @@ - import imp -import unittest import os -import yaml -import numpy as np -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest +import numpy as np import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util from tensorflow.python.ops import control_flow_ops +from neural_compressor.adaptor.tf_utils.util import disable_random + + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -38,9 +39,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -52,7 +53,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_switch_optimizer(self): @@ -61,26 +62,25 @@ def test_switch_optimizer(self): x_pad = tf.pad(x, paddings, "CONSTANT") y = tf.compat.v1.placeholder_with_default(True, [], name="place_true") - conv_weights = tf.constant(np.random.random((3,3,16,16)).astype(np.float32), name='y') + conv_weights = tf.constant(np.random.random((3, 3, 16, 16)).astype(np.float32), name="y") _, switch_true = control_flow_ops.switch(conv_weights, y) conv = tf.nn.conv2d(x_pad, switch_true, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed, name='op_to_store') - out_name = relu.name.split(':')[0] + relu = tf.nn.relu(normed, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.switch_optimizer import SwitchOptimizer convert_graph = SwitchOptimizer(output_graph_def).do_transformation() found_switch = False for node in convert_graph.node: - if node.op == 'Switch': - found_switch = True + if node.op == "Switch": + found_switch = True self.assertEqual(found_switch, False) @disable_random() @@ -90,28 +90,29 @@ def test_switch_optimizer_with_const_boolean(self): x_pad = tf.pad(x, paddings, "CONSTANT") place = tf.constant(True) y = tf.compat.v1.placeholder_with_default(place, [], name="place_true") - conv_weights = tf.constant(np.random.random((3,3,16,16)).astype(np.float32), name='y') + conv_weights = tf.constant(np.random.random((3, 3, 16, 16)).astype(np.float32), name="y") _, switch_true = control_flow_ops.switch(conv_weights, y) conv = tf.nn.conv2d(x_pad, switch_true, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed, name='op_to_store') - out_name = relu.name.split(':')[0] + relu = tf.nn.relu(normed, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.convert_placeholder_to_const import ( + ConvertPlaceholderToConst, + ) from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.switch_optimizer import SwitchOptimizer - from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.convert_placeholder_to_const \ - import ConvertPlaceholderToConst + convert_graph = ConvertPlaceholderToConst(output_graph_def).do_transformation() convert_graph = SwitchOptimizer(convert_graph).do_transformation() found_switch = False for node in convert_graph.node: - if node.op == 'Switch': - found_switch = True + if node.op == "Switch": + found_switch = True self.assertEqual(found_switch, False) @disable_random() @@ -121,27 +122,27 @@ def test_switch_optimizer_invalid(self): x_pad = tf.pad(x, paddings, "CONSTANT") y = tf.compat.v1.placeholder_with_default(True, [], name="place_true") - conv_weights = tf.constant(np.random.random((3,3,16,16)).astype(np.float32), name='y') + conv_weights = tf.constant(np.random.random((3, 3, 16, 16)).astype(np.float32), name="y") switch_false, _ = control_flow_ops.switch(conv_weights, y) conv = tf.nn.conv2d(x_pad, switch_false, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed, name='op_to_store') - out_name = relu.name.split(':')[0] + relu = tf.nn.relu(normed, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.switch_optimizer import SwitchOptimizer convert_graph = SwitchOptimizer(output_graph_def).do_transformation() found_switch = False for node in convert_graph.node: - if node.op == 'Switch': - found_switch = True + if node.op == "Switch": + found_switch = True self.assertEqual(found_switch, True) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_util.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_util.py index 1d20ea2eaf9..16373723fbd 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_util.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_util.py @@ -1,15 +1,15 @@ -import unittest import copy import re -import numpy as np -from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer, GraphRewriterHelper +import unittest +import numpy as np import tensorflow as tf -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 from tensorflow.python.framework import tensor_util +from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer, GraphRewriterHelper + + class TestGraph_util(unittest.TestCase): x_node = node_def_pb2.NodeDef() x_node.name = "placeholder" @@ -20,16 +20,20 @@ class TestGraph_util(unittest.TestCase): input0_node.op = "Const" input0_value = np.float32(np.abs(np.random.randn(4, 3, 2))) input0_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input0_value, input0_value.dtype.type, input0_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input0_value, input0_value.dtype.type, input0_value.shape) + ) + ) input1_node = node_def_pb2.NodeDef() input1_node.name = "input1" input1_node.op = "Const" input1_value = np.float32(np.abs(np.random.randn(4, 1, 1))) input1_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input1_value, input1_value.dtype.type, input1_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input1_value, input1_value.dtype.type, input1_value.shape) + ) + ) add_node = node_def_pb2.NodeDef() add_node.op = "Add" @@ -41,16 +45,20 @@ class TestGraph_util(unittest.TestCase): input2_node.op = "Const" input2_value = np.float32(np.abs(np.random.randn(1))) input2_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input2_value, input2_value.dtype.type, input2_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input2_value, input2_value.dtype.type, input2_value.shape) + ) + ) input3_node = node_def_pb2.NodeDef() input3_node.name = "input3" input3_node.op = "Const" input3_value = np.float32(np.abs(np.random.randn(1))) input3_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - input3_value, input3_value.dtype.type, input3_value.shape))) + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(input3_value, input3_value.dtype.type, input3_value.shape) + ) + ) mul_node = node_def_pb2.NodeDef() mul_node.op = "Mul" @@ -83,10 +91,22 @@ class TestGraph_util(unittest.TestCase): end_node.input.extend([block_node.name, res_node.name]) graph_def = graph_pb2.GraphDef() - graph_def.node.extend([ - x_node, input0_node, input1_node, input2_node, input3_node, add_node, mul_node, sqrt_node, - sqrt1_node, block_node, res_node, end_node - ]) + graph_def.node.extend( + [ + x_node, + input0_node, + input1_node, + input2_node, + input3_node, + add_node, + mul_node, + sqrt_node, + sqrt1_node, + block_node, + res_node, + end_node, + ] + ) def test_replace_constant_graph_with_constant_node(self): graph_analyzer = GraphAnalyzer() @@ -97,36 +117,36 @@ def test_replace_constant_graph_with_constant_node(self): new_constant_value = np.random.random([4, 1]) new_constant_type = tf.as_dtype(np.float32(new_constant_value).dtype) new_constant_node = GraphRewriterHelper.create_constant_node( - self.add_node.name + "_const", new_constant_value, new_constant_type) - assert graph_analyzer.replace_constant_graph_with_constant_node( - new_constant_node, self.add_node.name) + self.add_node.name + "_const", new_constant_value, new_constant_type + ) + assert graph_analyzer.replace_constant_graph_with_constant_node(new_constant_node, self.add_node.name) result_graph = graph_analyzer.dump_graph() assert len(list(result_graph.node)) == 10 new_constant_value = np.random.random([4, 1]) new_constant_type = tf.as_dtype(np.float32(new_constant_value).dtype) new_constant_node = GraphRewriterHelper.create_constant_node( - self.mul_node.name + "_const", new_constant_value, new_constant_type) - assert graph_analyzer.replace_constant_graph_with_constant_node( - new_constant_node, self.mul_node.name) + self.mul_node.name + "_const", new_constant_value, new_constant_type + ) + assert graph_analyzer.replace_constant_graph_with_constant_node(new_constant_node, self.mul_node.name) result_graph = graph_analyzer.dump_graph() assert len(list(result_graph.node)) == 8 new_constant_value = np.random.random([4, 1]) new_constant_type = tf.as_dtype(np.float32(new_constant_value).dtype) new_constant_node = GraphRewriterHelper.create_constant_node( - self.sqrt_node.name + "_const", new_constant_value, new_constant_type) - assert graph_analyzer.replace_constant_graph_with_constant_node( - new_constant_node, self.sqrt_node.name) + self.sqrt_node.name + "_const", new_constant_value, new_constant_type + ) + assert graph_analyzer.replace_constant_graph_with_constant_node(new_constant_node, self.sqrt_node.name) result_graph = graph_analyzer.dump_graph() assert len(list(result_graph.node)) == 7 new_constant_value = np.random.random([4, 1]) new_constant_type = tf.as_dtype(np.float32(new_constant_value).dtype) new_constant_node = GraphRewriterHelper.create_constant_node( - self.block_node.name + "_const", new_constant_value, new_constant_type) - assert not graph_analyzer.replace_constant_graph_with_constant_node( - new_constant_node, self.block_node.name) + self.block_node.name + "_const", new_constant_value, new_constant_type + ) + assert not graph_analyzer.replace_constant_graph_with_constant_node(new_constant_node, self.block_node.name) def test_replace_node(self): graph_analyzer = GraphAnalyzer() @@ -143,16 +163,16 @@ def test_replace_node(self): assert self.add_node not in list(result_graph.node) assert new_add_node in list(result_graph.node) - def test_freeze_value_regrex(self): - sample_str_1 = ';efficientnet-b3/model/blocks_14/se/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.35420851e+09][2.59383834e+09]' - sample_str_2 = ';efficientnet-b3/model/blocks_15/se/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.254][2.59383834]' - print_suffix = '__print__' - postfix = '__requant_min_max' + sample_str_1 = ";efficientnet-b3/model/blocks_14/se/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-2.35420851e+09][2.59383834e+09]" + sample_str_2 = ";efficientnet-b3/model/blocks_15/se/conv2d/Conv2D_eightbit_requant_range__print__;__requant_min_max:[-1.254][2.59383834]" + print_suffix = "__print__" + postfix = "__requant_min_max" res_1 = re.search(r"{};{}:\[\-?\d+\.?\d*e?\+?\d*\]".format(print_suffix, postfix), sample_str_1) res_2 = re.search(r"{};{}:\[\-?\d+\.?\d*e?\+?\d*\]".format(print_suffix, postfix), sample_str_2) self.assertNotEqual(res_1, None) self.assertNotEqual(res_2, None) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_grappler_pass.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_grappler_pass.py index c1a3abf5144..1374355ef4a 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_grappler_pass.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_grappler_pass.py @@ -1,19 +1,21 @@ import unittest + import numpy as np +import tensorflow.compat.v1 as tf + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.grappler_pass import GrapplerOptimizer from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow.compat.v1 as tf + class TestGrapplerPass(unittest.TestCase): @disable_random() def test_grappler_pass(self): - g = tf.Graph() with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) z_data = np.array([[2, 4], [6, 8]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.constant(z_data, dtype=tf.float32, shape=[2, 2]) y1 = tf.math.add(y, z) @@ -21,20 +23,23 @@ def test_grappler_pass(self): z = tf.matmul(x, y2) z = tf.nn.bias_add(z, [1, 2]) p = tf.identity(z) - z = tf.identity(p, name='op_to_store') + z = tf.identity(p, name="op_to_store") with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() opt_cfg = { - 'pruning': True, - 'shape': True, 'dependency': True, 'debug_stripper': True, 'loop': True, - 'constfold': True, 'arithmetic': True + "pruning": True, + "shape": True, + "dependency": True, + "debug_stripper": True, + "loop": True, + "constfold": True, + "arithmetic": True, } - optimized_graph = GrapplerOptimizer( - float_graph_def, ['op_to_store'], opt_cfg).do_transformation() + optimized_graph = GrapplerOptimizer(float_graph_def, ["op_to_store"], opt_cfg).do_transformation() identity_count = 0 for i in optimized_graph.node: - if i.op == 'Identity': + if i.op == "Identity": identity_count += 1 self.assertEqual(identity_count, 1) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_inspect_tensor.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_inspect_tensor.py index b4d1c6d1aec..c3dea04349f 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_inspect_tensor.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_inspect_tensor.py @@ -7,19 +7,21 @@ Note: use '-s' to disable pytest capturing the sys.stderr which will be used in quantization process """ +import logging import os -import unittest -import yaml -import numpy as np import pickle -import logging import shutil +import unittest + +import numpy as np +import yaml from packaging import version + np.random.seed(0) def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -37,51 +39,56 @@ def build_fake_yaml(): accuracy: metric: topk: 1 - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', 'w', encoding='utf-8') as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_model(): import tensorflow as tf + graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: tf.compat.v1.set_random_seed(0) - x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 3], name='input') - conv_weights1 = tf.compat.v1.get_variable('weight1', [2, 2, 3, 3], - initializer=tf.compat.v1.random_normal_initializer()) - x = tf.nn.conv2d(x, conv_weights1, strides=[1, 2, 2, 1], padding='SAME', name='conv2d_1') + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 3], name="input") + conv_weights1 = tf.compat.v1.get_variable( + "weight1", [2, 2, 3, 3], initializer=tf.compat.v1.random_normal_initializer() + ) + x = tf.nn.conv2d(x, conv_weights1, strides=[1, 2, 2, 1], padding="SAME", name="conv2d_1") x = tf.nn.relu(x) - conv_weights2 = tf.compat.v1.get_variable('weight2', [3, 3, 3, 3], - initializer=tf.compat.v1.random_normal_initializer()) - x = tf.nn.conv2d(x, conv_weights2, strides=[1, 3, 3, 1], padding='SAME', name='conv2d_2') + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 3], initializer=tf.compat.v1.random_normal_initializer() + ) + x = tf.nn.conv2d(x, conv_weights2, strides=[1, 3, 3, 1], padding="SAME", name="conv2d_2") x = tf.compat.v1.layers.batch_normalization(x) x = tf.nn.relu(x) - depthwise_weights = tf.compat.v1.get_variable('depthwise_weights', [3, 3, 3, 6], - initializer=tf.compat.v1.random_normal_initializer()) - x = tf.nn.depthwise_conv2d(x, depthwise_weights, strides=[1, 1, 1, 1], padding='VALID', - name='depthwise_conv2d_1') - x = tf.nn.max_pool(x, ksize=2, strides=[1, 2, 2, 1], padding='SAME', name='pool_1') + depthwise_weights = tf.compat.v1.get_variable( + "depthwise_weights", [3, 3, 3, 6], initializer=tf.compat.v1.random_normal_initializer() + ) + x = tf.nn.depthwise_conv2d( + x, depthwise_weights, strides=[1, 1, 1, 1], padding="VALID", name="depthwise_conv2d_1" + ) + x = tf.nn.max_pool(x, ksize=2, strides=[1, 2, 2, 1], padding="SAME", name="pool_1") # TODO to support inspect max_pool - x = tf.nn.relu(x, name='output') + x = tf.nn.relu(x, name="output") sess.run(tf.compat.v1.global_variables_initializer()) constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[x.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[x.name.split(":")[0]] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def build_fake_diagnosis_yaml(): - fake_diagnosis_yaml = ''' + fake_diagnosis_yaml = """ model: name: fake_diagnosis_yaml framework: tensorflow @@ -105,15 +112,15 @@ def build_fake_diagnosis_yaml(): accuracy_criterion: relative: -0.01 diagnosis: True - ''' + """ y = yaml.load(fake_diagnosis_yaml, Loader=yaml.SafeLoader) - with open('fake_diagnosis_yaml.yaml', 'w', encoding='utf-8') as f: + with open("fake_diagnosis_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_diagnosis_yaml2(): - fake_diagnosis_yaml2 = ''' + fake_diagnosis_yaml2 = """ model: name: fake_diagnosis_yaml2 framework: tensorflow @@ -137,30 +144,31 @@ def build_fake_diagnosis_yaml2(): accuracy_criterion: relative: -0.01 diagnosis: True - ''' + """ y = yaml.load(fake_diagnosis_yaml2, Loader=yaml.SafeLoader) - with open('fake_diagnosis_yaml2.yaml', 'w', encoding='utf-8') as f: + with open("fake_diagnosis_yaml2.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() -class TestTensorflowInspectTensor(unittest.TestCase): +class TestTensorflowInspectTensor(unittest.TestCase): @classmethod def setUpClass(self): from neural_compressor.config import options + build_fake_yaml() build_fake_diagnosis_yaml() build_fake_diagnosis_yaml2() self.model = build_fake_model() - self.fp32_dumped_tensor_path = os.path.join(os.getcwd(), './fake_graph_inspect_res_fp32/') - self.quan_dumped_tensor_path = os.path.join(os.getcwd(), './fake_graph_inspect_res_quan/') - self.fp32_dumped_tensor_file_path = os.path.join(self.fp32_dumped_tensor_path, 'inspect_result.pkl') - self.quan_dumped_tensor_file_path = os.path.join(self.quan_dumped_tensor_path, 'inspect_result.pkl') + self.fp32_dumped_tensor_path = os.path.join(os.getcwd(), "./fake_graph_inspect_res_fp32/") + self.quan_dumped_tensor_path = os.path.join(os.getcwd(), "./fake_graph_inspect_res_quan/") + self.fp32_dumped_tensor_file_path = os.path.join(self.fp32_dumped_tensor_path, "inspect_result.pkl") + self.quan_dumped_tensor_file_path = os.path.join(self.quan_dumped_tensor_path, "inspect_result.pkl") self.workspace = os.path.abspath(options.workspace) @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") os.remove(self.fp32_dumped_tensor_file_path) os.rmdir(self.fp32_dumped_tensor_path) os.remove(self.quan_dumped_tensor_file_path) @@ -169,12 +177,14 @@ def tearDownClass(self): # shutil.rmtree(os.path.join(os.getcwd(), 'save_path_test')) def test_tensorflow_inspect_tensor(self): + import tensorflow.compat.v1 as tf + from neural_compressor.experimental import Quantization, common from neural_compressor.utils.utility import load_data_from_pkl - import tensorflow.compat.v1 as tf + tf.disable_v2_behavior() - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(128, 64, 64, 3), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(128, 64, 64, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.model @@ -182,72 +192,85 @@ def test_tensorflow_inspect_tensor(self): self.quan_graph_def = q_model.graph_def self.fp32_graph_def = quantizer.model.graph_def self.dataloader = quantizer.calib_dataloader - self.node_list = ['conv2d_1', 'conv2d_2', 'depthwise_conv2d_1'] + self.node_list = ["conv2d_1", "conv2d_2", "depthwise_conv2d_1"] # Tensorflow 2.5.0 enabled the s8 input for pooling op - # TODO check the specific version - if version.parse(tf.version.VERSION) >= version.parse('2.6.0'): - self.node_list.append('pool_1') + # TODO check the specific version + if version.parse(tf.version.VERSION) >= version.parse("2.6.0"): + self.node_list.append("pool_1") self.quantizer = quantizer self.iteration_list = [1, 5] - logging.getLogger().debug(f'Start to inspect tensor :{self.node_list} in fp32 model.') + logging.getLogger().debug(f"Start to inspect tensor :{self.node_list} in fp32 model.") quantizer = self.quantizer - quantizer.strategy.adaptor.inspect_tensor(self.fp32_graph_def, dataloader=self.dataloader, - op_list=self.node_list, iteration_list=self.iteration_list, - inspect_type='all', save_to_disk=True, - save_path=self.fp32_dumped_tensor_path, - quantization_cfg=quantizer.strategy.tune_cfg) + quantizer.strategy.adaptor.inspect_tensor( + self.fp32_graph_def, + dataloader=self.dataloader, + op_list=self.node_list, + iteration_list=self.iteration_list, + inspect_type="all", + save_to_disk=True, + save_path=self.fp32_dumped_tensor_path, + quantization_cfg=quantizer.strategy.tune_cfg, + ) self.assertEqual(os.path.exists(self.fp32_dumped_tensor_file_path), True) - logging.getLogger().debug(f'Start to inspect tensor :{self.node_list} in quan model.') + logging.getLogger().debug(f"Start to inspect tensor :{self.node_list} in quan model.") quantizer = self.quantizer - quantizer.strategy.adaptor.inspect_tensor(self.quan_graph_def, dataloader=self.dataloader, - op_list=self.node_list, iteration_list=self.iteration_list, - inspect_type='all', save_to_disk=True, - save_path=self.quan_dumped_tensor_path, - quantization_cfg=quantizer.strategy.tune_cfg) + quantizer.strategy.adaptor.inspect_tensor( + self.quan_graph_def, + dataloader=self.dataloader, + op_list=self.node_list, + iteration_list=self.iteration_list, + inspect_type="all", + save_to_disk=True, + save_path=self.quan_dumped_tensor_path, + quantization_cfg=quantizer.strategy.tune_cfg, + ) self.assertEqual(os.path.exists(self.quan_dumped_tensor_file_path), True) - - fp32_data = load_data_from_pkl(self.fp32_dumped_tensor_path, 'inspect_result.pkl') - quan_data = load_data_from_pkl(self.quan_dumped_tensor_path, 'inspect_result.pkl') + fp32_data = load_data_from_pkl(self.fp32_dumped_tensor_path, "inspect_result.pkl") + quan_data = load_data_from_pkl(self.quan_dumped_tensor_path, "inspect_result.pkl") self.assertEqual(fp32_data.keys(), quan_data.keys()) - self.assertIn('activation', fp32_data) - self.assertEqual(len(fp32_data['activation']), len(quan_data['activation'])) # have same itertaion index - self.assertEqual(len(self.iteration_list),len(fp32_data['activation'])) + self.assertIn("activation", fp32_data) + self.assertEqual(len(fp32_data["activation"]), len(quan_data["activation"])) # have same itertaion index + self.assertEqual(len(self.iteration_list), len(fp32_data["activation"])) for iter_indx, iter in enumerate(self.iteration_list): - fp32_iter_data = fp32_data['activation'][iter_indx] - quan_iter_data = quan_data['activation'][iter_indx] + fp32_iter_data = fp32_data["activation"][iter_indx] + quan_iter_data = quan_data["activation"][iter_indx] for node_name in fp32_iter_data.keys(): self.assertEqual(fp32_iter_data[node_name][node_name].shape, quan_iter_data[node_name][node_name].shape) def test_tensorflow_diagnosis(self): - from neural_compressor.experimental import Quantization, common import tensorflow.compat.v1 as tf + + from neural_compressor.experimental import Quantization, common + tf.disable_v2_behavior() - quantizer = Quantization('fake_diagnosis_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(128, 64, 64, 3), label=True) + quantizer = Quantization("fake_diagnosis_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(128, 64, 64, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.model quantizer.fit() - self.assertEqual(os.path.exists(os.path.join(self.workspace, 'inspect_saved/fp32/inspect_result.pkl')), True) - self.assertEqual(os.path.exists(os.path.join(self.workspace, 'inspect_saved/quan/inspect_result.pkl')), True) + self.assertEqual(os.path.exists(os.path.join(self.workspace, "inspect_saved/fp32/inspect_result.pkl")), True) + self.assertEqual(os.path.exists(os.path.join(self.workspace, "inspect_saved/quan/inspect_result.pkl")), True) def test_tensorflow_diagnosis2(self): - from neural_compressor.experimental import Quantization, common import tensorflow.compat.v1 as tf + + from neural_compressor.experimental import Quantization, common + tf.disable_v2_behavior() - quantizer = Quantization('fake_diagnosis_yaml2.yaml') - dataset = quantizer.dataset('dummy', shape=(128, 64, 64, 3), label=True) + quantizer = Quantization("fake_diagnosis_yaml2.yaml") + dataset = quantizer.dataset("dummy", shape=(128, 64, 64, 3), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.model quantizer.fit() - self.assertEqual(os.path.exists(os.path.join(self.workspace, 'inspect_saved/fp32/inspect_result.pkl')), True) - self.assertEqual(os.path.exists(os.path.join(self.workspace, 'inspect_saved/quan/inspect_result.pkl')), True) + self.assertEqual(os.path.exists(os.path.join(self.workspace, "inspect_saved/fp32/inspect_result.pkl")), True) + self.assertEqual(os.path.exists(os.path.join(self.workspace, "inspect_saved/quan/inspect_result.pkl")), True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_inspect_tensor_in_mse_tuning.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_inspect_tensor_in_mse_tuning.py index a8527417714..f11d1e3b9fd 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_inspect_tensor_in_mse_tuning.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_inspect_tensor_in_mse_tuning.py @@ -8,19 +8,21 @@ Note: use '-s' to disable pytest capturing the sys.stderr which will be used in quantization process """ +import logging import os +import pickle import platform +import shutil import unittest -import yaml + import numpy as np -import pickle -import logging -import shutil +import yaml + np.random.seed(0) def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -45,96 +47,99 @@ def build_fake_yaml(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_model(): import tensorflow as tf + graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: tf.compat.v1.set_random_seed(0) x = tf.compat.v1.placeholder(tf.float32, [1, 28, 28, 1], name="input") - conv_weights1 = tf.compat.v1.get_variable("weight1", [2, 2, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - x = tf.nn.conv2d(x, conv_weights1, strides=[1, 2, 2, 1], padding="SAME", name='conv2d_1') + conv_weights1 = tf.compat.v1.get_variable( + "weight1", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + x = tf.nn.conv2d(x, conv_weights1, strides=[1, 2, 2, 1], padding="SAME", name="conv2d_1") x = tf.nn.relu(x) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - x = tf.nn.conv2d(x, conv_weights2, strides=[1, 3, 3, 1], padding="SAME", name='conv2d_2') + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + x = tf.nn.conv2d(x, conv_weights2, strides=[1, 3, 3, 1], padding="SAME", name="conv2d_2") x = tf.compat.v1.layers.batch_normalization(x) x = tf.nn.relu(x) - x = tf.nn.max_pool(x, ksize=1, strides=[1, 2, 2, 1], padding="SAME", name='pool_1') + x = tf.nn.max_pool(x, ksize=1, strides=[1, 2, 2, 1], padding="SAME", name="pool_1") # TODO to support inspect max_pool - x = tf.nn.relu(x, name='output') + x = tf.nn.relu(x, name="output") sess.run(tf.compat.v1.global_variables_initializer()) constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[x.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[x.name.split(":")[0]] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph def load_data_from_pkl(path, filename): try: file_path = os.path.join(path, filename) - with open(file_path, 'rb') as fp: + with open(file_path, "rb") as fp: data = pickle.load(fp) return data except FileExistsError: - logging.getLogger().info('Can not open %s.' % path) + logging.getLogger().info("Can not open %s." % path) class TestTensorflowInspectTensortinMSETuning(unittest.TestCase): - @classmethod def setUpClass(self): build_fake_yaml() if platform.system().lower() == "linux": - self.cfg_path = os.path.join(os.getcwd(), './nc_workspace/') - self.dumped_tensor_path = os.path.join(os.getcwd(), './nc_workspace/') + self.cfg_path = os.path.join(os.getcwd(), "./nc_workspace/") + self.dumped_tensor_path = os.path.join(os.getcwd(), "./nc_workspace/") else: - self.cfg_path = os.path.join(os.getcwd(), 'nc_workspace\\') - self.dumped_tensor_path = os.path.join(os.getcwd(), 'nc_workspace\\') - self.cfg_file_path = os.path.join(self.cfg_path, 'cfg.pkl') - self.dumped_tensor_file_path = os.path.join(self.dumped_tensor_path, 'inspect_result.pkl') + self.cfg_path = os.path.join(os.getcwd(), "nc_workspace\\") + self.dumped_tensor_path = os.path.join(os.getcwd(), "nc_workspace\\") + self.cfg_file_path = os.path.join(self.cfg_path, "cfg.pkl") + self.dumped_tensor_file_path = os.path.join(self.dumped_tensor_path, "inspect_result.pkl") @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") os.remove(self.dumped_tensor_file_path) shutil.rmtree(self.dumped_tensor_path) def test_tensorflow_inspect_tensort_in_mse_tuning(self): - from neural_compressor.experimental import Quantization, common import tensorflow.compat.v1 as tf + + from neural_compressor.experimental import Quantization, common + tf.disable_v2_behavior() model = build_fake_model() - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(128, 28, 28, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(128, 28, 28, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = model quantizer.fit() self.assertEqual(os.path.exists(self.dumped_tensor_path), True) - data = load_data_from_pkl(self.dumped_tensor_path, 'inspect_result.pkl') - self.assertEqual('activation' in data, True) - self.assertEqual(set(data['activation'][0].keys()), set(['pool_1', 'conv2d_2', 'conv2d_1'])) - self.assertEqual(len(data['activation'][0].keys()), 3) - self.assertEqual(data['activation'][0]['pool_1']['pool_1'].shape, (1, 3, 3, 1)) - self.assertEqual(data['activation'][0]['conv2d_1']['conv2d_1'].shape, (1, 14, 14, 1)) - self.assertEqual(data['activation'][0]['conv2d_2']['conv2d_2'].shape, (1, 5, 5, 1)) + data = load_data_from_pkl(self.dumped_tensor_path, "inspect_result.pkl") + self.assertEqual("activation" in data, True) + self.assertEqual(set(data["activation"][0].keys()), set(["pool_1", "conv2d_2", "conv2d_1"])) + self.assertEqual(len(data["activation"][0].keys()), 3) + self.assertEqual(data["activation"][0]["pool_1"]["pool_1"].shape, (1, 3, 3, 1)) + self.assertEqual(data["activation"][0]["conv2d_1"]["conv2d_1"].shape, (1, 14, 14, 1)) + self.assertEqual(data["activation"][0]["conv2d_2"]["conv2d_2"].shape, (1, 5, 5, 1)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_move_squeeze_after_relu.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_move_squeeze_after_relu.py index 1bb016c4f04..9d379f09586 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_move_squeeze_after_relu.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_move_squeeze_after_relu.py @@ -1,18 +1,18 @@ - import imp -import unittest import os -from numpy.core.fromnumeric import squeeze -import yaml -import numpy as np -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest +import numpy as np import tensorflow as tf +import yaml +from numpy.core.fromnumeric import squeeze from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -39,9 +39,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -53,33 +53,34 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_move_squeeze_after_relu(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - - conv_weights = tf.constant(np.random.random((3,3,16,16)).astype(np.float32), name='y') + + conv_weights = tf.constant(np.random.random((3, 3, 16, 16)).astype(np.float32), name="y") conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") sque = tf.squeeze(conv) - bias_add = tf.nn.bias_add(sque, tf.constant(np.random.random((16)).astype(np.float32), name='bias_add')) - relu = tf.nn.relu(bias_add, name='op_to_store') - out_name = relu.name.split(':')[0] + bias_add = tf.nn.bias_add(sque, tf.constant(np.random.random((16)).astype(np.float32), name="bias_add")) + relu = tf.nn.relu(bias_add, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.move_squeeze_after_relu import MoveSqueezeAfterReluOptimizer + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.move_squeeze_after_relu import ( + MoveSqueezeAfterReluOptimizer, + ) convert_graph = MoveSqueezeAfterReluOptimizer(output_graph_def).do_transformation() move_relu = False for node in convert_graph.node: - if node.op == 'Squeeze' and node.input[0] == 'op_to_store': + if node.op == "Squeeze" and node.input[0] == "op_to_store": move_relu = True self.assertEqual(move_relu, True) @@ -88,27 +89,28 @@ def test_move_squeeze_after_relu_with_outputs(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - - conv_weights = tf.constant(np.random.random((3,3,16,16)).astype(np.float32), name='y') + + conv_weights = tf.constant(np.random.random((3, 3, 16, 16)).astype(np.float32), name="y") conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") sque = tf.squeeze(conv) - bias_add = tf.nn.bias_add(sque, tf.constant(np.random.random((16)).astype(np.float32), name='bias_add')) - relu = tf.nn.relu(bias_add, name='relu') - smax = tf.nn.softmax(relu, name='op_to_store') - out_name = smax.name.split(':')[0] + bias_add = tf.nn.bias_add(sque, tf.constant(np.random.random((16)).astype(np.float32), name="bias_add")) + relu = tf.nn.relu(bias_add, name="relu") + smax = tf.nn.softmax(relu, name="op_to_store") + out_name = smax.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.move_squeeze_after_relu import MoveSqueezeAfterReluOptimizer + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.move_squeeze_after_relu import ( + MoveSqueezeAfterReluOptimizer, + ) convert_graph = MoveSqueezeAfterReluOptimizer(output_graph_def).do_transformation() move_relu = False for node in convert_graph.node: - if node.op == 'Squeeze' and node.input[0] == 'op_to_store': + if node.op == "Squeeze" and node.input[0] == "op_to_store": move_relu = True self.assertEqual(move_relu, False) @@ -117,26 +119,27 @@ def test_move_squeeze_after_sigmoid(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - - conv_weights = tf.constant(np.random.random((3,3,16,16)).astype(np.float32), name='y') + + conv_weights = tf.constant(np.random.random((3, 3, 16, 16)).astype(np.float32), name="y") conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") sque = tf.squeeze(conv) - bias_add = tf.nn.bias_add(sque, tf.constant(np.random.random((16)).astype(np.float32), name='bias_add')) - sigmoid = tf.nn.sigmoid(bias_add, name='op_to_store') - out_name = sigmoid.name.split(':')[0] + bias_add = tf.nn.bias_add(sque, tf.constant(np.random.random((16)).astype(np.float32), name="bias_add")) + sigmoid = tf.nn.sigmoid(bias_add, name="op_to_store") + out_name = sigmoid.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.move_squeeze_after_relu import MoveSqueezeAfterReluOptimizer + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.move_squeeze_after_relu import ( + MoveSqueezeAfterReluOptimizer, + ) convert_graph = MoveSqueezeAfterReluOptimizer(output_graph_def).do_transformation() move_sigmoid = False for node in convert_graph.node: - if node.op == 'Squeeze' and node.input[0] == 'op_to_store': + if node.op == "Squeeze" and node.input[0] == "op_to_store": move_sigmoid = True self.assertEqual(move_sigmoid, True) @@ -145,28 +148,30 @@ def test_move_reshape_after_relu(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - - conv_weights = tf.constant(np.random.random((3,3,16,16)).astype(np.float32), name='y') + + conv_weights = tf.constant(np.random.random((3, 3, 16, 16)).astype(np.float32), name="y") conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - bias_add = tf.nn.bias_add(conv, tf.constant(np.random.random((16)).astype(np.float32), name='bias_add')) - reshape = tf.reshape(bias_add, [1,28,28,16]) - relu = tf.nn.relu(reshape, name='op_to_store') - out_name = relu.name.split(':')[0] + bias_add = tf.nn.bias_add(conv, tf.constant(np.random.random((16)).astype(np.float32), name="bias_add")) + reshape = tf.reshape(bias_add, [1, 28, 28, 16]) + relu = tf.nn.relu(reshape, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.move_squeeze_after_relu import MoveSqueezeAfterReluOptimizer + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.move_squeeze_after_relu import ( + MoveSqueezeAfterReluOptimizer, + ) convert_graph = MoveSqueezeAfterReluOptimizer(output_graph_def).do_transformation() move_relu = False for node in convert_graph.node: - if node.op == 'Reshape' and node.input[0] == 'op_to_store': + if node.op == "Reshape" and node.input[0] == "op_to_store": move_relu = True self.assertEqual(move_relu, True) + if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_quantize_input.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_quantize_input.py index 82b6578a497..d321254a84e 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_quantize_input.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_quantize_input.py @@ -1,15 +1,17 @@ import os +import shutil import unittest + +import tensorflow as tf import yaml -import shutil +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -25,11 +27,13 @@ def build_fake_yaml(): relative: 0.0001 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + + class TestQuantizeInput(unittest.TestCase): @classmethod def setUpClass(self): @@ -37,52 +41,62 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) + os.remove("fake_yaml.yaml") + shutil.rmtree("./saved", ignore_errors=True) @disable_random() - @unittest.skipIf(tf.version.VERSION < '2.1.0', "Quantize input needs tensorflow 2.1.0 and newer, so test_quantize_input is skipped") + @unittest.skipIf( + tf.version.VERSION < "2.1.0", + "Quantize input needs tensorflow 2.1.0 and newer, so test_quantize_input is skipped", + ) def test_quantize_input(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) conv_bias = tf.math.add(conv, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) constant_graph = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) for i in constant_graph.node: - if i.op.find('Add') != -1: - i.op = 'Add' + if i.op.find("Add") != -1: + i.op = "Add" from neural_compressor.experimental import Quantization, common + quantizer = Quantization("./fake_yaml.yaml") - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = constant_graph q_model = quantizer.fit() - framework_specific_info = {'device': 'cpu', 'approach': 'post_training_static_quant', \ - 'random_seed': 1978, 'inputs': ['input'], 'outputs': ['op_to_store'], \ - 'workspace_path': 'saved', 'format': 'default', 'backend': 'default'} + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1978, + "inputs": ["input"], + "outputs": ["op_to_store"], + "workspace_path": "saved", + "format": "default", + "backend": "default", + } quantize_input_graph, _ = TensorFlowAdaptor(framework_specific_info).quantize_input(q_model.graph) Not_found_QuantizedV2 = True for i in quantize_input_graph.as_graph_def().node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": Not_found_QuantizedV2 = False break self.assertEqual(Not_found_QuantizedV2, True) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_query_yaml.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_query_yaml.py index 1b93a01a77b..d0ef998bb31 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_query_yaml.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_query_yaml.py @@ -1,18 +1,19 @@ # # -*- coding: utf-8 -*- # -import unittest -import yaml import os -from neural_compressor.adaptor.tensorflow import TensorflowQuery -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tensorflow import TensorflowQuery +from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_yaml_on_grappler(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -41,14 +42,15 @@ def build_fake_yaml_on_grappler(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_grappler.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_grappler.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_framework_yaml(): - fake_yaml = ''' + fake_yaml = """ --- - version: @@ -56,7 +58,7 @@ def build_fake_framework_yaml(): bf16: ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool', 'DepthwiseConv2dNative'] - int8: { + int8: { 'static': { 'Conv2D': { 'weight': { @@ -93,11 +95,11 @@ def build_fake_framework_yaml(): - version: - name: ['default'] + name: ['default'] bf16: ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool', 'DepthwiseConv2dNative'] - int8: { + int8: { 'static': { 'Conv2D': { 'weight': { @@ -145,14 +147,14 @@ def build_fake_framework_yaml(): 'dynamic': { } } - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_framework.yaml', "w", encoding="utf-8") as f: + with open("fake_framework.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) - f.close() + f.close() -class TestTFQueryYaml(unittest.TestCase): +class TestTFQueryYaml(unittest.TestCase): @classmethod def setUpClass(self): self.tf_yaml_path = os.path.join(os.getcwd() + "/../neural_compressor/adaptor/tensorflow.yaml") @@ -164,70 +166,69 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml_grappler.yaml') + os.remove("fake_yaml_grappler.yaml") def test_unique_version(self): - versions = [i['version']['name'] for i in self.content] + versions = [i["version"]["name"] for i in self.content] registered_version_name = [] for i in versions: - if isinstance(i, list): - registered_version_name.extend(i) - else: - registered_version_name.append(i) + if isinstance(i, list): + registered_version_name.extend(i) + else: + registered_version_name.append(i) self.assertEqual(len(registered_version_name), len(set(registered_version_name))) def test_int8_sequences(self): patterns = self.query_handler.get_eightbit_patterns() - has_conv2d = bool('Conv2D' in patterns) - has_matmul = bool('MatMul' in patterns) + has_conv2d = bool("Conv2D" in patterns) + has_matmul = bool("MatMul" in patterns) self.assertEqual(has_conv2d, True) self.assertEqual(has_matmul, True) - self.assertGreaterEqual(len(patterns['Conv2D']), 13) - self.assertGreaterEqual(len(patterns['MatMul']), 3) - self.assertEqual(len(patterns['ConcatV2']), 1) - self.assertEqual(len(patterns['MaxPool']), 1) - self.assertEqual(len(patterns['AvgPool']), 1) + self.assertGreaterEqual(len(patterns["Conv2D"]), 13) + self.assertGreaterEqual(len(patterns["MatMul"]), 3) + self.assertEqual(len(patterns["ConcatV2"]), 1) + self.assertEqual(len(patterns["MaxPool"]), 1) + self.assertEqual(len(patterns["AvgPool"]), 1) def test_convert_internal_patterns(self): internal_patterns = self.query_handler.generate_internal_patterns() - self.assertEqual([['MaxPool']] in internal_patterns, True) - self.assertEqual([['ConcatV2']] in internal_patterns, True) - self.assertEqual([['AvgPool']] in internal_patterns, True) - self.assertEqual([['MatMul'], ('BiasAdd',), ('Relu',)] in internal_patterns, True) + self.assertEqual([["MaxPool"]] in internal_patterns, True) + self.assertEqual([["ConcatV2"]] in internal_patterns, True) + self.assertEqual([["AvgPool"]] in internal_patterns, True) + self.assertEqual([["MatMul"], ("BiasAdd",), ("Relu",)] in internal_patterns, True) @disable_random() def test_grappler_cfg(self): x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [1], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [1], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) relu2 = tf.nn.relu(relu) - pool = tf.nn.max_pool(relu2, ksize=1, strides=[1, 2, 2, 1], name='maxpool', padding="SAME") - conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + pool = tf.nn.max_pool(relu2, ksize=1, strides=[1, 2, 2, 1], name="maxpool", padding="SAME") + conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) x = tf.nn.relu(conv_bias) - final_node = tf.nn.relu(x, name='op_to_store') + final_node = tf.nn.relu(x, name="op_to_store") - out_name = final_node.name.split(':')[0] + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_grappler.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) + quantizer = Quantization("fake_yaml_grappler.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -235,15 +236,15 @@ def test_grappler_cfg(self): disable_arithmetic = False for i in output_graph.graph_def.node: - if i.name == 'maxpool_eightbit_quantize_Relu_2' and i.input[0] == 'Relu_2': - disable_arithmetic = True + if i.name == "maxpool_eightbit_quantize_Relu_2" and i.input[0] == "Relu_2": + disable_arithmetic = True # if tf.version.VERSION >= '2.3.0': # self.assertEqual(False, disable_arithmetic) # else: self.assertEqual(True, disable_arithmetic) -class TestFrameworkQueryYaml(unittest.TestCase): +class TestFrameworkQueryYaml(unittest.TestCase): @classmethod def setUpClass(self): build_fake_framework_yaml() @@ -255,13 +256,14 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_framework.yaml') + os.remove("fake_framework.yaml") def test_version_fallback(self): if self.query_handler.version >= "2.1.0": - self.assertEqual(True, 'Conv2D' in self.query_handler.get_op_types()['int8']) + self.assertEqual(True, "Conv2D" in self.query_handler.get_op_types()["int8"]) else: - self.assertEqual(True, 'BatchMatMul' in self.query_handler.get_op_types()['int8']) + self.assertEqual(True, "BatchMatMul" in self.query_handler.get_op_types()["int8"]) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_remove_training_nodes.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_remove_training_nodes.py index a82e34880a8..c372cc348e1 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_remove_training_nodes.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_remove_training_nodes.py @@ -2,13 +2,15 @@ # -*- coding: utf-8 -*- # import unittest -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper import tensorflow as tf from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import dtypes -class TestRemoveTrainingNodes(unittest.TestCase): +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper + + +class TestRemoveTrainingNodes(unittest.TestCase): def test_remove_training_nodes(self): tf.compat.v1.disable_eager_execution() @@ -16,13 +18,10 @@ def test_remove_training_nodes(self): relu_name = "relu" float_graph_def = graph_pb2.GraphDef() input_constant = QuantizeGraphHelper.create_constant_node( - input_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 1]) + input_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 1] + ) float_graph_def.node.extend([input_constant]) - relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, - [input_constant_name]) + relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, [input_constant_name]) QuantizeGraphHelper.set_attr_dtype(relu_node, "T", dtypes.float32) float_graph_def.node.extend([relu_node]) @@ -30,62 +29,55 @@ def test_remove_training_nodes(self): mat_mul_name = "mat_mul" identity_name = "identity" b_constant = QuantizeGraphHelper.create_constant_node( - b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[2, 6]) + b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[2, 6] + ) float_graph_def.node.extend([b_constant]) - mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, - [relu_name, b_constant_name]) + mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, [relu_name, b_constant_name]) QuantizeGraphHelper.set_attr_dtype(mat_mul_node, "T", dtypes.float32) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_a", False) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_b", False) float_graph_def.node.extend([mat_mul_node]) - identity_node = QuantizeGraphHelper.create_node("Identity", identity_name, - [mat_mul_name]) + identity_node = QuantizeGraphHelper.create_node("Identity", identity_name, [mat_mul_name]) float_graph_def.node.extend([identity_node]) bias_add_name = "bias_add" offset_constant_name = "offset_constant" offset_constant = QuantizeGraphHelper.create_constant_node( - offset_constant_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) + offset_constant_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) float_graph_def.node.extend([offset_constant]) - bias_add_node = QuantizeGraphHelper.create_node( - "BiasAdd", bias_add_name, [identity_name, offset_constant_name]) + bias_add_node = QuantizeGraphHelper.create_node("BiasAdd", bias_add_name, [identity_name, offset_constant_name]) QuantizeGraphHelper.set_attr_dtype(bias_add_node, "T", dtypes.float32) float_graph_def.node.extend([bias_add_node]) post_relu_name = "post_relu" - post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, - [bias_add_name]) + post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, [bias_add_name]) float_graph_def.node.extend([post_relu_node]) - last_identity_node_name = 'last_identity' - last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, - [post_relu_name]) + last_identity_node_name = "last_identity" + last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, [post_relu_name]) float_graph_def.node.extend([last_identity_node]) left_relu_name = "final_relu" - left_relu_node = QuantizeGraphHelper.create_node("Relu", left_relu_name, - [last_identity_node_name]) + left_relu_node = QuantizeGraphHelper.create_node("Relu", left_relu_name, [last_identity_node_name]) float_graph_def.node.extend([left_relu_node]) right_relu_name = "final_relu" - right_relu_node = QuantizeGraphHelper.create_node("Relu", right_relu_name, - [last_identity_node_name]) + right_relu_node = QuantizeGraphHelper.create_node("Relu", right_relu_name, [last_identity_node_name]) float_graph_def.node.extend([right_relu_node]) post_graph = QuantizeGraphHelper().remove_training_nodes( - float_graph_def, protected_nodes=[right_relu_name, left_relu_name]) + float_graph_def, protected_nodes=[right_relu_name, left_relu_name] + ) found_identity_node_name = [] for i in post_graph.node: - if i.op == 'Identity': + if i.op == "Identity": found_identity_node_name.append(i.name) break - self.assertEqual(found_identity_node_name,[]) + self.assertEqual(found_identity_node_name, []) def test_remove_training_nodes_save_last_identity(self): tf.compat.v1.disable_eager_execution() @@ -94,13 +86,10 @@ def test_remove_training_nodes_save_last_identity(self): relu_name = "relu" float_graph_def = graph_pb2.GraphDef() input_constant = QuantizeGraphHelper.create_constant_node( - input_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 1]) + input_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 1] + ) float_graph_def.node.extend([input_constant]) - relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, - [input_constant_name]) + relu_node = QuantizeGraphHelper.create_node("Relu", relu_name, [input_constant_name]) QuantizeGraphHelper.set_attr_dtype(relu_node, "T", dtypes.float32) float_graph_def.node.extend([relu_node]) @@ -108,55 +97,50 @@ def test_remove_training_nodes_save_last_identity(self): mat_mul_name = "mat_mul" identity_name = "identity" b_constant = QuantizeGraphHelper.create_constant_node( - b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[2, 6]) + b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[2, 6] + ) float_graph_def.node.extend([b_constant]) - mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, - [relu_name, b_constant_name]) + mat_mul_node = QuantizeGraphHelper.create_node("MatMul", mat_mul_name, [relu_name, b_constant_name]) QuantizeGraphHelper.set_attr_dtype(mat_mul_node, "T", dtypes.float32) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_a", False) QuantizeGraphHelper.set_attr_bool(mat_mul_node, "transpose_b", False) float_graph_def.node.extend([mat_mul_node]) - identity_node = QuantizeGraphHelper.create_node("Identity", identity_name, - [mat_mul_name]) + identity_node = QuantizeGraphHelper.create_node("Identity", identity_name, [mat_mul_name]) float_graph_def.node.extend([identity_node]) bias_add_name = "bias_add" offset_constant_name = "offset_constant" offset_constant = QuantizeGraphHelper.create_constant_node( - offset_constant_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) + offset_constant_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) float_graph_def.node.extend([offset_constant]) - bias_add_node = QuantizeGraphHelper.create_node( - "BiasAdd", bias_add_name, [identity_name, offset_constant_name]) + bias_add_node = QuantizeGraphHelper.create_node("BiasAdd", bias_add_name, [identity_name, offset_constant_name]) QuantizeGraphHelper.set_attr_dtype(bias_add_node, "T", dtypes.float32) float_graph_def.node.extend([bias_add_node]) post_relu_name = "post_relu" - post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, - [bias_add_name]) + post_relu_node = QuantizeGraphHelper.create_node("Relu", post_relu_name, [bias_add_name]) float_graph_def.node.extend([post_relu_node]) - last_identity_node_name = 'last_identity' - last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, - [post_relu_name]) + last_identity_node_name = "last_identity" + last_identity_node = QuantizeGraphHelper.create_node("Identity", last_identity_node_name, [post_relu_name]) float_graph_def.node.extend([last_identity_node]) - post_graph = QuantizeGraphHelper().remove_training_nodes( - float_graph_def, protected_nodes=[last_identity_node_name]) + float_graph_def, protected_nodes=[last_identity_node_name] + ) found_identity_node_name = [] for i in post_graph.node: - if i.op == 'Identity': + if i.op == "Identity": found_identity_node_name.append(i.name) break - self.assertEqual(found_identity_node_name[0], 'last_identity') + self.assertEqual(found_identity_node_name[0], "last_identity") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_rnn.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_rnn.py index 070e019875a..76ff838dda3 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_rnn.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_rnn.py @@ -1,16 +1,17 @@ - -import unittest import os +import unittest + import numpy as np +import tensorflow as tf import yaml -from neural_compressor.adaptor.tf_utils.util import disable_random +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -36,26 +37,28 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() -def quantize(model,q_data, e_data): - from neural_compressor.quantization import Quantization + +def quantize(model, q_data, e_data): from neural_compressor.data import DataLoader + from neural_compressor.quantization import Quantization - quantizer = Quantization('fake_yaml.yaml') + quantizer = Quantization("fake_yaml.yaml") q_dataloader = DataLoader(dataset=list(zip(q_data[0], q_data[1]))) e_dataloader = DataLoader(dataset=list(zip(e_data[0], e_data[1]))) - quantizer.model= model + quantizer.model = model quantizer.calib_dataloader = q_dataloader quantizer.eval_dataloader = e_dataloader quantized_model = quantizer.fit() return quantized_model + class TestTensorflowRnn(unittest.TestCase): @classmethod def setUpClass(self): @@ -63,53 +66,43 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") - @unittest.skipUnless(bool( - tf.version.VERSION.find('1.15.0-up2') != -1), 'not supported the current tf version.') + @unittest.skipUnless(bool(tf.version.VERSION.find("1.15.0-up2") != -1), "not supported the current tf version.") @disable_random() def test_tensorflow_dynamic_rnn(self): - X = np.random.randn(3, 6, 4) - - X[1, 4:] = 0 - X_lengths = [6, 4, 6] + X = np.random.randn(3, 6, 4) - rnn_hidden_size = 5 - rnn_type= 'ltsm1' - if rnn_type == 'lstm': - cell = tf.contrib.rnn.BasicLSTMCell(num_units=rnn_hidden_size, state_is_tuple=True) - else: - cell = tf.contrib.rnn.GRUCell(num_units=rnn_hidden_size) + X[1, 4:] = 0 + X_lengths = [6, 4, 6] - outputs, last_states = tf.nn.dynamic_rnn( - cell=cell, - dtype=tf.float64, - sequence_length=X_lengths, - inputs=X) + rnn_hidden_size = 5 + rnn_type = "ltsm1" + if rnn_type == "lstm": + cell = tf.contrib.rnn.BasicLSTMCell(num_units=rnn_hidden_size, state_is_tuple=True) + else: + cell = tf.contrib.rnn.GRUCell(num_units=rnn_hidden_size) - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - o1, s1 = sess.run([outputs, last_states]) - rs = Helper.analysis_rnn_model(sess.graph.as_graph_def()) - self.assertEqual(len(rs.keys()), 2) + outputs, last_states = tf.nn.dynamic_rnn(cell=cell, dtype=tf.float64, sequence_length=X_lengths, inputs=X) + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + o1, s1 = sess.run([outputs, last_states]) + rs = Helper.analysis_rnn_model(sess.graph.as_graph_def()) + self.assertEqual(len(rs.keys()), 2) - @unittest.skipUnless(bool( - tf.version.VERSION.find('1.15.0-up2') != -1), 'not supported the current tf version.') + @unittest.skipUnless(bool(tf.version.VERSION.find("1.15.0-up2") != -1), "not supported the current tf version.") @disable_random() def test_tensorflow_rnn(self): inp = tf.keras.layers.Input(shape=(None, 4)) - lstm_1 = tf.keras.layers.LSTM(units=10, - return_sequences=True)(inp) + lstm_1 = tf.keras.layers.LSTM(units=10, return_sequences=True)(inp) dropout_1 = tf.keras.layers.Dropout(0.2)(lstm_1) - lstm_2 = tf.keras.layers.LSTM(units=10, - return_sequences=False)(dropout_1) + lstm_2 = tf.keras.layers.LSTM(units=10, return_sequences=False)(dropout_1) dropout_2 = tf.keras.layers.Dropout(0.2)(lstm_2) out = tf.keras.layers.Dense(1)(dropout_2) model = tf.keras.models.Model(inputs=inp, outputs=out) - model.compile(loss="mse", - optimizer=tf.keras.optimizers.RMSprop()) + model.compile(loss="mse", optimizer=tf.keras.optimizers.RMSprop()) input_names = [t.name.split(":")[0] for t in model.inputs] output_names = [t.name.split(":")[0] for t in model.outputs] @@ -127,14 +120,12 @@ def test_tensorflow_rnn(self): output_names, ) with tf.Graph().as_default() as g: - tf.import_graph_def(graph_def, name='') - s = quantize(g, - q_data=(q_data, label), - e_data=(q_data, label)) + tf.import_graph_def(graph_def, name="") + s = quantize(g, q_data=(q_data, label), e_data=(q_data, label)) convert_count = 0 for i in s.graph_def.node: - if i.op == 'QuantizedMatMulWithBiasAndDequantize': + if i.op == "QuantizedMatMulWithBiasAndDequantize": convert_count += 1 self.assertEqual(convert_count, 9) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_set_tensor.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_set_tensor.py index 71e882173d8..a53f3ae724f 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_set_tensor.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_set_tensor.py @@ -1,16 +1,18 @@ import os +import shutil import unittest -import yaml + import numpy as np -import shutil +import tensorflow as tf +import yaml +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -26,11 +28,13 @@ def build_fake_yaml(): relative: 0.0001 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + + class TestSetTensor(unittest.TestCase): @classmethod def setUpClass(self): @@ -38,115 +42,132 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) - + os.remove("fake_yaml.yaml") + shutil.rmtree("./saved", ignore_errors=True) + @disable_random() def test_fp32bias(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - conv_bias = tf.compat.v1.get_variable("bias", [16], dtype=tf.float32, - initializer=tf.compat.v1.random_normal_initializer()) + conv_bias = tf.compat.v1.get_variable( + "bias", [16], dtype=tf.float32, initializer=tf.compat.v1.random_normal_initializer() + ) conv_bias = tf.math.add(conv, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) constant_graph = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common + quantizer = Quantization("./fake_yaml.yaml") - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = constant_graph q_model = quantizer.fit() - - framework_specific_info = {'device': 'cpu', 'workspace_path': 'saved',\ - 'random_seed': 1978, 'inputs': ['input'], 'outputs': ['op_to_store'], \ - 'approach': 'post_training_static_quant', 'format': 'default', - 'backend': 'default'} + + framework_specific_info = { + "device": "cpu", + "workspace_path": "saved", + "random_seed": 1978, + "inputs": ["input"], + "outputs": ["op_to_store"], + "approach": "post_training_static_quant", + "format": "default", + "backend": "default", + } adaptor = TensorFlowAdaptor(framework_specific_info) - adaptor.set_tensor(q_model, {'bias': np.random.random(16)}) + adaptor.set_tensor(q_model, {"bias": np.random.random(16)}) from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.framework import dtypes + for node in q_model.graph_def.node: - if node.name == 'bias': - self.assertEqual(node.attr['dtype'], attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + if node.name == "bias": + self.assertEqual(node.attr["dtype"], attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) @disable_random() def test_int32bias(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") conv_bias = tf.compat.v1.get_variable("bias", [16], dtype=tf.float32) conv_bias = tf.math.add(conv, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='relu_0') + relu6 = tf.nn.relu6(conv_bias, name="relu_0") - conv_weights1 = tf.compat.v1.get_variable("weight1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights1 = tf.compat.v1.get_variable( + "weight1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(relu6, conv_weights1, strides=[1, 2, 2, 1], padding="VALID") conv_bias1 = tf.compat.v1.get_variable("bias1", [16], dtype=tf.float32) conv_bias1 = tf.math.add(conv1, conv_bias1) - relu6 = tf.nn.relu6(conv_bias1, name='relu_1') + relu6 = tf.nn.relu6(conv_bias1, name="relu_1") - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(relu6, conv_weights2, strides=[1, 2, 2, 1], padding="VALID") conv_bias2 = tf.compat.v1.get_variable("bias2", [16], dtype=tf.float32) conv_bias2 = tf.math.add(conv2, conv_bias2) - relu6 = tf.nn.relu6(conv_bias2, name='op_to_store') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(conv_bias2, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) constant_graph = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) for i in constant_graph.node: - if i.op.find('Add') != -1: - i.op = 'Add' + if i.op.find("Add") != -1: + i.op = "Add" from neural_compressor.experimental import Quantization, common + quantizer = Quantization("./fake_yaml.yaml") - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = constant_graph q_model = quantizer.fit() - framework_specific_info = {'device': 'cpu', 'workspace_path': 'saved',\ - 'random_seed': 1978, 'inputs': ['input'], 'outputs': ['op_to_store'], \ - 'approach': 'post_training_static_quant', 'format': 'default', - 'backend': 'default'} + framework_specific_info = { + "device": "cpu", + "workspace_path": "saved", + "random_seed": 1978, + "inputs": ["input"], + "outputs": ["op_to_store"], + "approach": "post_training_static_quant", + "format": "default", + "backend": "default", + } adaptor = TensorFlowAdaptor(framework_specific_info) - adaptor.set_tensor(q_model, {'bias1': np.random.randint(6,size=2, dtype='int32')}) + adaptor.set_tensor(q_model, {"bias1": np.random.randint(6, size=2, dtype="int32")}) from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.framework import dtypes + for node in q_model.graph_def.node: - if node.name == 'bias2': - self.assertEqual(node.attr['dtype'], attr_value_pb2.AttrValue( - type=dtypes.qint32.as_datatype_enum)) + if node.name == "bias2": + self.assertEqual(node.attr["dtype"], attr_value_pb2.AttrValue(type=dtypes.qint32.as_datatype_enum)) if __name__ == "__main__": diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_share_nodes_graph.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_share_nodes_graph.py index 1aa856c3e04..445d957a50c 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_share_nodes_graph.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_share_nodes_graph.py @@ -1,43 +1,50 @@ # # -*- coding: utf-8 -*- # -import unittest -import os import copy +import os import platform import tarfile -from neural_compressor.adaptor.tf_utils.util import read_graph -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper +import unittest + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.split_shared_input import SplitSharedInputOptimizer +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper +from neural_compressor.adaptor.tf_utils.util import read_graph + + class TestTensorflowShareNodesGraphParsing(unittest.TestCase): - ssd_resnet50_model = 'http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz' - dst_path = '/tmp/.neural_compressor/ssd_resnet50_v1.tgz' + ssd_resnet50_model = "http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz" + dst_path = "/tmp/.neural_compressor/ssd_resnet50_v1.tgz" platform = platform.system().lower() if platform == "windows": - unzipped_folder_name = 'C:\\tmp\\.neural_compressor\ssd_resnet50_v1\\ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03' + unzipped_folder_name = "C:\\tmp\\.neural_compressor\ssd_resnet50_v1\\ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03" else: - unzipped_folder_name = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03' + unzipped_folder_name = "ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03" + @classmethod def setUpClass(self): if self.platform == "linux": if not os.path.exists(self.dst_path): - os.system( - "mkdir -p /tmp/.neural_compressor && wget {} -O {}".format( - self.ssd_resnet50_model, self.dst_path, self.dst_path)) + os.system( + "mkdir -p /tmp/.neural_compressor && wget {} -O {}".format( + self.ssd_resnet50_model, + self.dst_path, + ) + ) os.system("tar xvf {}".format(self.dst_path)) elif self.platform == "windows": if not os.path.exists(self.unzipped_folder_name): - os.system('md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor') + os.system("md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor") from urllib import request - request.urlretrieve(self.ssd_resnet50_model,self.dst_path) + + request.urlretrieve(self.ssd_resnet50_model, self.dst_path) tar = tarfile.open(self.dst_path) tar.extractall(self.unzipped_folder_name) @classmethod def tearDownClass(self): if self.platform == "linux": - os.system( - 'rm -rf {}'.format(self.unzipped_folder_name)) + os.system("rm -rf {}".format(self.unzipped_folder_name)) def test_parse_pb_contains_share_nodes(self): original_graphdef = read_graph(os.path.join(self.unzipped_folder_name, "frozen_inference_graph.pb")) @@ -47,5 +54,6 @@ def test_parse_pb_contains_share_nodes(self): self.assertGreater(len(parsed_graphdef.node), len(original_graphdef.node)) self.assertEqual(len(legacy_graphdef.node), len(parsed_graphdef.node)) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_strip_equivalent_nodes.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_strip_equivalent_nodes.py index 4b5bc3664b4..8051d92b941 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_strip_equivalent_nodes.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_strip_equivalent_nodes.py @@ -2,17 +2,20 @@ # -*- coding: utf-8 -*- # -import unittest import os -import yaml +import unittest + import numpy as np import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.util import disable_random from neural_compressor.experimental import Quantization, common + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -38,12 +41,13 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class TestConvBiasAddAddReluFusion(unittest.TestCase): @classmethod def setUpClass(self): @@ -51,7 +55,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_conv_relu_fusion(self): @@ -59,22 +63,22 @@ def test_conv_relu_fusion(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") conv2 = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") conv_add = tf.math.add(conv1, conv2) relu6 = tf.nn.relu6(conv_add) - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -87,5 +91,5 @@ def test_conv_relu_fusion(self): self.assertEqual(found_conv_num, 1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/adaptor/tensorflow_adaptor/test_tf_util.py b/test/adaptor/tensorflow_adaptor/test_tf_util.py index 89db04750fb..8fc779e5f39 100644 --- a/test/adaptor/tensorflow_adaptor/test_tf_util.py +++ b/test/adaptor/tensorflow_adaptor/test_tf_util.py @@ -1,151 +1,151 @@ import os import unittest -import numpy as np -from neural_compressor.model import Model -from neural_compressor.adaptor.tf_utils.util import get_graph_def -from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.adaptor.tf_utils.util import is_ckpt_format -from neural_compressor.adaptor.tf_utils.util import collate_tf_preds -from neural_compressor.adaptor.tf_utils.util import get_tensor_by_name -from neural_compressor.adaptor.tf_utils.util import generate_feed_dict -from neural_compressor.adaptor.tf_utils.util import get_model_input_shape -from neural_compressor.adaptor.tf_utils.util import fix_ref_type_of_graph_def -from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +import numpy as np import tensorflow as tf from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import dtypes +from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.adaptor.tf_utils.util import ( + collate_tf_preds, + disable_random, + fix_ref_type_of_graph_def, + generate_feed_dict, + get_graph_def, + get_model_input_shape, + get_tensor_by_name, + is_ckpt_format, +) +from neural_compressor.model import Model + + def build_fake_graphdef(): graph_def = graph_pb2.GraphDef() - constant_1_name = 'moving_1/switch_input_const' - constant_1 = Helper.create_constant_node( - constant_1_name, - value=0., - dtype=dtypes.float32) - - constant_3_name = 'moving_1/switch_input_const/read' - constant_3 = Helper.create_constant_node( - constant_3_name, - value=[1], - dtype=dtypes.float32) - - constant_2_name = 'switch_input_const2' - constant_2 = Helper.create_constant_node( - constant_2_name, - value=2., - dtype=dtypes.float32) - equal_name = 'equal' + constant_1_name = "moving_1/switch_input_const" + constant_1 = Helper.create_constant_node(constant_1_name, value=0.0, dtype=dtypes.float32) + + constant_3_name = "moving_1/switch_input_const/read" + constant_3 = Helper.create_constant_node(constant_3_name, value=[1], dtype=dtypes.float32) + + constant_2_name = "switch_input_const2" + constant_2 = Helper.create_constant_node(constant_2_name, value=2.0, dtype=dtypes.float32) + equal_name = "equal" equal = Helper.create_node("Equal", equal_name, [constant_1_name, constant_2_name]) - Helper.set_attr_dtype(equal, 'T', dtypes.float32) - - refswitch_name = 'refswitch' - refswitch_node = Helper.create_node("RefSwitch", refswitch_name, - [constant_1_name ,equal_name]) - Helper.set_attr_dtype(refswitch_node, 'T', dtypes.float32) - - variable_name = 'variable' - variable_node = Helper.create_node("VariableV2", variable_name, - []) - Helper.set_attr_dtype(variable_node, 'T', dtypes.float32) - - assign_name = 'assign' - assign_node = Helper.create_node("Assign", assign_name, - [variable_name,refswitch_name]) - Helper.set_attr_bool(assign_node, 'use_locking', True) - Helper.set_attr_bool(assign_node, 'validate_shape', True) - Helper.set_attr_dtype(assign_node, 'T', dtypes.float32) - - assignsub_name = 'assignsub' - assignsub_node = Helper.create_node("AssignSub", assignsub_name, - [assign_name,constant_1_name]) - Helper.set_attr_bool(assignsub_node, 'use_locking', True) - Helper.set_attr_dtype(assignsub_node, 'T', dtypes.float32) - - assignadd_name = 'assignadd' - assignadd_node = Helper.create_node("AssignAdd", assignadd_name, - [assignsub_name,constant_2_name]) - Helper.set_attr_bool(assignadd_node, 'use_locking', True) - Helper.set_attr_dtype(assignadd_node, 'T', dtypes.float32) - - graph_def.node.extend([ - constant_1, - constant_2, - constant_3, - equal, - refswitch_node, - variable_node, - assign_node, - assignsub_node, - assignadd_node - ]) + Helper.set_attr_dtype(equal, "T", dtypes.float32) + + refswitch_name = "refswitch" + refswitch_node = Helper.create_node("RefSwitch", refswitch_name, [constant_1_name, equal_name]) + Helper.set_attr_dtype(refswitch_node, "T", dtypes.float32) + + variable_name = "variable" + variable_node = Helper.create_node("VariableV2", variable_name, []) + Helper.set_attr_dtype(variable_node, "T", dtypes.float32) + + assign_name = "assign" + assign_node = Helper.create_node("Assign", assign_name, [variable_name, refswitch_name]) + Helper.set_attr_bool(assign_node, "use_locking", True) + Helper.set_attr_bool(assign_node, "validate_shape", True) + Helper.set_attr_dtype(assign_node, "T", dtypes.float32) + + assignsub_name = "assignsub" + assignsub_node = Helper.create_node("AssignSub", assignsub_name, [assign_name, constant_1_name]) + Helper.set_attr_bool(assignsub_node, "use_locking", True) + Helper.set_attr_dtype(assignsub_node, "T", dtypes.float32) + + assignadd_name = "assignadd" + assignadd_node = Helper.create_node("AssignAdd", assignadd_name, [assignsub_name, constant_2_name]) + Helper.set_attr_bool(assignadd_node, "use_locking", True) + Helper.set_attr_dtype(assignadd_node, "T", dtypes.float32) + + graph_def.node.extend( + [ + constant_1, + constant_2, + constant_3, + equal, + refswitch_node, + variable_node, + assign_node, + assignsub_node, + assignadd_node, + ] + ) return graph_def + def build_fake_graphdef2(): - input_placeholder = tf.compat.v1.placeholder(tf.float32, shape=(32, 224, 224, 3), name='input_placeholder') + input_placeholder = tf.compat.v1.placeholder(tf.float32, shape=(32, 224, 224, 3), name="input_placeholder") - conv_filter = tf.Variable(tf.random.normal([3, 3, 3, 32], stddev=0.1), name='conv_filter') - conv_bias = tf.Variable(tf.zeros([32]), name='conv_bias') - conv_output = tf.nn.conv2d(input_placeholder, conv_filter, strides=[1, 1, 1, 1], padding='SAME') + conv_filter = tf.Variable(tf.random.normal([3, 3, 3, 32], stddev=0.1), name="conv_filter") + conv_bias = tf.Variable(tf.zeros([32]), name="conv_bias") + conv_output = tf.nn.conv2d(input_placeholder, conv_filter, strides=[1, 1, 1, 1], padding="SAME") conv_output = tf.nn.bias_add(conv_output, conv_bias) - conv_output = tf.nn.relu(conv_output, name='conv_output') - - pool_output = tf.nn.max_pool2d(conv_output, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_output') - - fc_weights = tf.Variable(tf.random.normal([int(pool_output.shape[1]) * int(pool_output.shape[2]) * int(pool_output.shape[3]), 10], stddev=0.1), name='fc_weights') - fc_bias = tf.Variable(tf.zeros([10]), name='fc_bias') + conv_output = tf.nn.relu(conv_output, name="conv_output") + + pool_output = tf.nn.max_pool2d( + conv_output, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME", name="pool_output" + ) + + fc_weights = tf.Variable( + tf.random.normal( + [int(pool_output.shape[1]) * int(pool_output.shape[2]) * int(pool_output.shape[3]), 10], stddev=0.1 + ), + name="fc_weights", + ) + fc_bias = tf.Variable(tf.zeros([10]), name="fc_bias") fc_input = tf.reshape(pool_output, [-1, int(fc_weights.shape[0])]) fc_output = tf.matmul(fc_input, fc_weights) + fc_bias - - output = tf.reduce_sum(tf.nn.softmax(fc_output, name='output'), axis=-1) + + output = tf.reduce_sum(tf.nn.softmax(fc_output, name="output"), axis=-1) graph_def = tf.compat.v1.get_default_graph().as_graph_def() return graph_def -class TestTFutil(unittest.TestCase): +class TestTFutil(unittest.TestCase): @classmethod def tearDownClass(self): - os.remove('test.pb') - os.removedirs('fake_ckpt') + os.remove("test.pb") + os.removedirs("fake_ckpt") @disable_random() def test_fix_ref_type(self): graph_def = build_fake_graphdef() new_graph_def = fix_ref_type_of_graph_def(graph_def) - f = tf.io.gfile.GFile('./test.pb', 'wb') + f = tf.io.gfile.GFile("./test.pb", "wb") f.write(new_graph_def.SerializeToString()) find_Assign_prefix = False for node in new_graph_def.node: - if 'Assign' in node.op: + if "Assign" in node.op: find_Assign_prefix = True self.assertFalse(find_Assign_prefix, False) @disable_random() def test_collate_tf_preds(self): - results = [[1],[np.array([2])]] + results = [[1], [np.array([2])]] data = collate_tf_preds(results) - self.assertEqual(data,[1,np.array([2])]) + self.assertEqual(data, [1, np.array([2])]) results = [[np.array([2])], [[1]]] data = collate_tf_preds(results) - self.assertEqual(data[0].all(),np.array([2, 1]).all()) + self.assertEqual(data[0].all(), np.array([2, 1]).all()) @disable_random() def test_get_graph_def(self): - graph_def = get_graph_def('./test.pb', outputs="assignadd") - self.assertIsInstance(graph_def, tf.compat.v1.GraphDef) + graph_def = get_graph_def("./test.pb", outputs="assignadd") + self.assertIsInstance(graph_def, tf.compat.v1.GraphDef) @disable_random() def test_judge_ckpt_format(self): - os.mkdir('fake_ckpt') - ckpt_format = is_ckpt_format('fake_ckpt') + os.mkdir("fake_ckpt") + ckpt_format = is_ckpt_format("fake_ckpt") self.assertEqual(ckpt_format, False) @disable_random() def test_get_model_input_shape(self): graph_def = build_fake_graphdef2() try: - tensor = get_tensor_by_name(graph_def, 'fake:0') + tensor = get_tensor_by_name(graph_def, "fake:0") except: print("This code is for UT coverage of the exception handling") model = Model(graph_def) @@ -154,16 +154,16 @@ def test_get_model_input_shape(self): @disable_random() def test_generate_feed_dict(self): - input_0 = [[1., 3.],[3., 7.]] + input_0 = [[1.0, 3.0], [3.0, 7.0]] input_tensor_0 = tf.convert_to_tensor(input_0) - input_1 = [[1., 3.]] + input_1 = [[1.0, 3.0]] input_tensor_1 = tf.convert_to_tensor(input_1) feed_dict = generate_feed_dict([input_tensor_0], input_0) - self.assertEqual(feed_dict, {input_tensor_0:input_0}) + self.assertEqual(feed_dict, {input_tensor_0: input_0}) feed_dict = generate_feed_dict([input_tensor_0], {"Const": input_0}) - self.assertEqual(feed_dict, {input_tensor_0:input_0}) + self.assertEqual(feed_dict, {input_tensor_0: input_0}) feed_dict = generate_feed_dict([input_tensor_0, input_tensor_1], [input_0, input_1]) self.assertEqual(feed_dict[input_tensor_0], input_0) diff --git a/test/algorithm/modeling_gptj.py b/test/algorithm/modeling_gptj.py index 96478b992d9..aa9f945d9a7 100644 --- a/test/algorithm/modeling_gptj.py +++ b/test/algorithm/modeling_gptj.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" PyTorch GPT-J model.""" +"""PyTorch GPT-J model.""" import warnings from typing import Optional, Tuple, Union @@ -22,7 +22,6 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss - from transformers.activations import ACT2FN from transformers.modeling_outputs import ( BaseModelOutputWithPast, @@ -31,6 +30,7 @@ SequenceClassifierOutputWithPast, ) from transformers.modeling_utils import PreTrainedModel +from transformers.models.gptj.configuration_gptj import GPTJConfig from transformers.utils import ( add_code_sample_docstrings, add_start_docstrings, @@ -39,8 +39,6 @@ logging, ) from transformers.utils.model_parallel_utils import assert_device_map, get_device_map -from transformers.models.gptj.configuration_gptj import GPTJConfig - logger = logging.get_logger(__name__) @@ -114,9 +112,7 @@ def __init__(self, config): self.embed_positions = create_sinusoidal_positions(max_positions, pos_embd_dim) def _split_heads(self, tensor, num_attention_heads, attn_head_size, rotary): - """ - Splits hidden dim into attn_head_size and num_attention_heads - """ + """Splits hidden dim into attn_head_size and num_attention_heads.""" new_shape = tensor.size()[:-1] + (num_attention_heads, attn_head_size) tensor = tensor.view(new_shape) if rotary: @@ -129,9 +125,7 @@ def _split_heads(self, tensor, num_attention_heads, attn_head_size, rotary): raise ValueError(f"Input tensor rank should be one of [4, 5], but is: {len(tensor.shape)}") def _merge_heads(self, tensor, num_attention_heads, attn_head_size): - """ - Merges attn_head_size dim and num_attn_heads dim into hidden dim - """ + """Merges attn_head_size dim and num_attn_heads dim into hidden dim.""" if len(tensor.shape) == 5: tensor = tensor.permute(0, 1, 3, 2, 4).contiguous() elif len(tensor.shape) == 4: @@ -329,10 +323,8 @@ def forward( class GPTJPreTrainedModel(PreTrainedModel): - """ - An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained - models. - """ + """An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained + models.""" config_class = GPTJConfig base_model_prefix = "transformer" @@ -599,7 +591,9 @@ def forward( past_length = past_key_values[0][0].size(-2) if position_ids is None: - position_ids = torch.arange(past_length, torch.tensor(input_shape[-1]) + torch.tensor(past_length), dtype=torch.long, device=device) + position_ids = torch.arange( + past_length, torch.tensor(input_shape[-1]) + torch.tensor(past_length), dtype=torch.long, device=device + ) position_ids = position_ids.unsqueeze(0).view(-1, input_shape[-1]) # Attention mask. @@ -841,11 +835,11 @@ def forward( output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, CausalLMOutputWithPast]: - r""" - labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): - Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set - `labels = input_ids` Indices are selected in `[-100, 0, ..., config.vocab_size]` All labels set to `-100` - are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size]` + r"""Labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + + Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set + `labels = input_ids` Indices are selected in `[-100, 0, ..., config.vocab_size]` All labels set to `-100` + are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size]` """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -902,9 +896,10 @@ def forward( def _reorder_cache( past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor ) -> Tuple[Tuple[torch.Tensor]]: - """ - This function is used to re-order the `past_key_values` cache if [`~PretrainedModel.beam_search`] or - [`~PretrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct + """This function is used to re-order the `past_key_values` cache if [`~PretrainedModel.beam_search`] or + [`~PretrainedModel.beam_sample`] is called. + + This is required to match `past_key_values` with the correct beam_idx at every generation step. """ return tuple( @@ -966,11 +961,11 @@ def forward( output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, SequenceClassifierOutputWithPast]: - r""" - labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): - Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., - config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If - `config.num_labels > 1` a classification loss is computed (Cross-Entropy). + r"""Labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + + Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., + config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If + `config.num_labels > 1` a classification loss is computed (Cross-Entropy). """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1091,9 +1086,9 @@ def forward( output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, QuestionAnsweringModelOutput]: - r""" - start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*): - Labels for position (index) of the start of the labelled span for computing the token classification loss. + r"""start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*): + + Labels for position (index) of the start of the labelled span for computing the token classification loss. Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence are not taken into account for computing the loss. end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*): diff --git a/test/algorithm/test_algorithm.py b/test/algorithm/test_algorithm.py index 2e941345d2b..c551756d5a2 100644 --- a/test/algorithm/test_algorithm.py +++ b/test/algorithm/test_algorithm.py @@ -1,13 +1,15 @@ -"""Tests for neural_compressor quantization""" -import numpy as np -import unittest +"""Tests for neural_compressor quantization.""" +import importlib import os +import shutil +import unittest + +import numpy as np import yaml -import importlib -import shutil + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -25,17 +27,18 @@ def build_fake_yaml(): topk: 1 tuning: accuracy_criterion: - relative: 0.01 + relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -57,57 +60,63 @@ def build_fake_yaml2(): workspace: path: saved resume: ./saved/history.snapshot - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() -def build_fake_model(): + +def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), dtype=tf.float32, name='y') - relu_0 = tf.nn.relu(x, name='relu') - conv = tf.nn.conv2d(input=relu_0, filters=y, strides=[1,1,1,1], padding='VALID', name='conv') + x = tf.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), dtype=tf.float32, name="y") + relu_0 = tf.nn.relu(x, name="relu") + conv = tf.nn.conv2d(input=relu_0, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="conv") bias = tf.Variable(tf.ones([1], tf.float32)) - conv_add = tf.nn.bias_add(conv, bias, name='bias_add') + conv_add = tf.nn.bias_add(conv, bias, name="bias_add") relu = tf.nn.relu(conv_add) - op = tf.identity(relu, name='identity') + op = tf.identity(relu, name="identity") sess.run(tf.global_variables_initializer()) from tensorflow.compat.v1.graph_util import convert_variables_to_constants - constant_graph = convert_variables_to_constants(sess, sess.graph_def, ['identity']) + + constant_graph = convert_variables_to_constants(sess, sess.graph_def, ["identity"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: import tensorflow as tf + graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), dtype=tf.float32, name='y') - relu_0 = tf.nn.relu(x, name='relu') - conv = tf.nn.conv2d(input=relu_0, filters=y, strides=[1,1,1,1], padding='VALID', name='conv') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), dtype=tf.float32, name="y") + relu_0 = tf.nn.relu(x, name="relu") + conv = tf.nn.conv2d(input=relu_0, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="conv") bias = tf.Variable(tf.ones([1], tf.float32)) - conv_add = tf.nn.bias_add(conv, bias, name='bias_add') + conv_add = tf.nn.bias_add(conv, bias, name="bias_add") relu = tf.nn.relu(conv_add) - op = tf.identity(relu, name='identity') + op = tf.identity(relu, name="identity") sess.run(tf.compat.v1.global_variables_initializer()) from tensorflow.compat.v1.graph_util import convert_variables_to_constants - constant_graph = convert_variables_to_constants(sess, sess.graph_def, ['identity']) + + constant_graph = convert_variables_to_constants(sess, sess.graph_def, ["identity"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): @@ -117,14 +126,15 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - shutil.rmtree('./saved', ignore_errors=True) + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") + shutil.rmtree("./saved", ignore_errors=True) def test_fast_bias_correction(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -132,8 +142,9 @@ def test_fast_bias_correction(self): def test_weight_correction(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph diff --git a/test/algorithm/test_layer_wise_quant.py b/test/algorithm/test_layer_wise_quant.py index 774d0f694e0..e25036bb021 100644 --- a/test/algorithm/test_layer_wise_quant.py +++ b/test/algorithm/test_layer_wise_quant.py @@ -1,20 +1,20 @@ -import unittest -import sys import shutil +import sys +import unittest -sys.path.insert(0, './') +sys.path.insert(0, "./") import torch from torch.utils.data import DataLoader, Dataset from transformers import AutoModelForCausalLM, AutoTokenizer -from neural_compressor.adaptor.torch_utils.layer_wise_quant import load_shell + from neural_compressor import PostTrainingQuantConfig, quantization +from neural_compressor.adaptor.torch_utils.layer_wise_quant import load_shell from neural_compressor.utils.pytorch import load class TestLayerWise(unittest.TestCase): def test_layer_wise(self): - - model_name_or_path = 'facebook/opt-125m' + model_name_or_path = "facebook/opt-125m" fp32_model = load_shell(model_name_or_path, AutoModelForCausalLM, torchscript=True) class TestDataset(Dataset): @@ -38,9 +38,10 @@ def __len__(self): "layer_wise_quant_args": { "model_path": "facebook/opt-125m", "smooth_quant": True, - "smooth_quant_alpha": 0.5 - } - }) + "smooth_quant_alpha": 0.5, + }, + }, + ) q_model = quantization.fit( fp32_model, @@ -48,10 +49,9 @@ def __len__(self): calib_dataloader=eval_dataloader, eval_func=lambda x: 0.1, ) - ouput_dir = './saved_model' + ouput_dir = "./saved_model" q_model.save(ouput_dir) - load_model = load(ouput_dir, - AutoModelForCausalLM.from_pretrained(model_name_or_path)) + load_model = load(ouput_dir, AutoModelForCausalLM.from_pretrained(model_name_or_path)) lm_weight = q_model._model.lm_head.module.weight() test_value = load_model.lm_head.module.weight().equal(lm_weight) self.assertTrue(test_value) @@ -61,17 +61,17 @@ def test_util(self): from neural_compressor.adaptor.torch_utils.layer_wise_quant.utils import ( get_children, get_named_children, - get_super_module_by_name + get_super_module_by_name, ) - model_name_or_path = 'facebook/opt-125m' + model_name_or_path = "facebook/opt-125m" model = load_shell(model_name_or_path, AutoModelForCausalLM, torchscript=True) children = get_children(model) named_children = get_named_children(model) self.assertEqual(children, [v for k, v in named_children]) module = get_super_module_by_name(model, named_children[0][0]) - self.assertTrue(hasattr(module, named_children[0][0].split('.')[-1])) + self.assertTrue(hasattr(module, named_children[0][0].split(".")[-1])) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py index 1a34f07c0dc..926f494d35a 100644 --- a/test/algorithm/test_smooth_quant.py +++ b/test/algorithm/test_smooth_quant.py @@ -1,26 +1,31 @@ import copy +import math +import shutil +import sys import unittest + import numpy as np -import shutil import torch -import sys -import math import transformers from packaging.version import Version -sys.path.append('./') +sys.path.append("./") + +import logging + +from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer -from neural_compressor.data import Datasets from neural_compressor import PostTrainingQuantConfig, quantization -from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.adaptor.torch_utils.smooth_quant import TorchSmoothQuant from neural_compressor.adaptor.torch_utils.model_wrapper import SQLinearWrapper -from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer -import logging +from neural_compressor.adaptor.torch_utils.smooth_quant import TorchSmoothQuant +from neural_compressor.data import Datasets +from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader + logger = logging.getLogger("neural_compressor") try: import intel_extension_for_pytorch as ipex + TEST_IPEX = True except: TEST_IPEX = False @@ -37,9 +42,11 @@ def forward(self, x): out = self.fc2(out) return out + class DemoCalibDataloader: def __init__(self): self.batch_size = 1 + def __iter__(self): yield torch.randn([1, 3]) @@ -47,6 +54,7 @@ def __iter__(self): class LLMCalibDataloader: def __init__(self): self.batch_size = 1 + def __iter__(self): yield torch.ones([1, 3], dtype=torch.long) @@ -65,12 +73,13 @@ def __iter__(self): @classmethod def test_sq_dw_conv_relu6_auto(self): - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 1, 1), low=0., high=1.0) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 1, 1), low=0.0, high=1.0) dummy_dataloader = PyTorchDataLoader(dummy_dataset) class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 3, 1, 1, groups=3) @@ -89,19 +98,20 @@ def forward(self, x): output = model(data) sq = TorchSmoothQuant(model, dummy_dataloader) - sq.transform(alpha='auto', calib_iter=1, folding=True) + sq.transform(alpha="auto", calib_iter=1, folding=True) output_sq = model(data) assert torch.sum(torch.abs(output - output_sq)) < 1e-3 assert len(sq.absorb_to_layer) == 1 @classmethod def test_sq_dw_conv_relu6(self): - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 1, 1), low=0., high=1.0) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 1, 1), low=0.0, high=1.0) dummy_dataloader = PyTorchDataLoader(dummy_dataset) class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 3, 1, 1) @@ -140,12 +150,13 @@ def __iter__(self): @classmethod def test_sq_conv_relu6(self): - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 2, 2), low=0., high=1.0) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 2, 2), low=0.0, high=1.0) dummy_dataloader = PyTorchDataLoader(dummy_dataset) class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 4, 1, 1) @@ -161,7 +172,7 @@ def forward(self, x): model = Model() sq = TorchSmoothQuant(model, dummy_dataloader) - sq.transform(alpha='auto', calib_iter=3, folding=True) + sq.transform(alpha="auto", calib_iter=3, folding=True) assert len(sq.absorb_to_layer) == 1 @@ -180,7 +191,8 @@ def __iter__(self): @classmethod def test_sq_conv_relu6(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 4, 1, 1) @@ -202,7 +214,8 @@ def forward(self, x): @classmethod def test_sq_conv_relu(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 4, 1, 1) @@ -224,7 +237,8 @@ def forward(self, x): @classmethod def test_sq_conv_gelu(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 4, 1, 1) @@ -246,7 +260,8 @@ def forward(self, x): @classmethod def test_sq_conv_bn(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 4, 1, 1) @@ -269,7 +284,8 @@ def forward(self, x): def test_sq_conv_gn(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 4, 1, 1) @@ -291,11 +307,12 @@ def forward(self, x): sq = TorchSmoothQuant(model, self.conv_dl) sq.transform(alpha=0.6, calib_iter=2, folding=True) - assert len(sq.absorb_to_layer['norm']) == 2 + assert len(sq.absorb_to_layer["norm"]) == 2 def test_sq_add(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.conv1 = torch.nn.Conv2d(3, 3, 1, 1) @@ -322,9 +339,7 @@ def forward(self, x): class LlamaRMSNorm(nn.Module): def __init__(self, hidden_size, eps=1e-6): - """ - LlamaRMSNorm is equivalent to T5LayerNorm - """ + """LlamaRMSNorm is equivalent to T5LayerNorm.""" super().__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) self.variance_epsilon = eps @@ -342,8 +357,9 @@ def forward(self, hidden_states): class T5LayerNorm(nn.Module): def __init__(self, hidden_size, eps=1e-6): - """ - Construct a layernorm module in the T5 style. No bias and no subtraction of mean. + """Construct a layernorm module in the T5 style. + + No bias and no subtraction of mean. """ super().__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) @@ -388,7 +404,8 @@ def __iter__(self): @classmethod def test_sq_linear_LlamaRMSNorm(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -410,7 +427,8 @@ def forward(self, x): @classmethod def test_sq_linear_LlamaRMSNorm_tuple(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -445,7 +463,8 @@ def __iter__(self): @classmethod def test_sq_linear_LlamaRMSNorm_auto(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -461,7 +480,7 @@ def forward(self, x): model = Model() sq = TorchSmoothQuant(model, self.linear_dl) - sq.transform(alpha='auto', calib_iter=1, folding=True) + sq.transform(alpha="auto", calib_iter=1, folding=True) assert len(sq.absorb_to_layer) == 1 @@ -480,7 +499,8 @@ def __iter__(self): @classmethod def test_sq_linear_LlamaRMSNorm(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -502,7 +522,8 @@ def forward(self, x): @classmethod def test_sq_linear_T5Norm(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -524,7 +545,8 @@ def forward(self, x): @classmethod def test_sq_linear_relu6(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -546,7 +568,8 @@ def forward(self, x): @classmethod def test_sq_linear_norm(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -568,7 +591,8 @@ def forward(self, x): @classmethod def test_sq_linear_norm_linear(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.norm_1 = torch.nn.LayerNorm(3) @@ -592,7 +616,8 @@ def forward(self, x): @classmethod def test_sq_linear_gelu_norm(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -615,7 +640,8 @@ def forward(self, x): def test_sq_linear(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -629,22 +655,24 @@ def forward(self, x): model = Model() sq = TorchSmoothQuant(model, self.linear_dl) - sq.transform(alpha=0.5, calib_iter=1) # By default, folding=False + sq.transform(alpha=0.5, calib_iter=1) # By default, folding=False assert isinstance(sq.model.fc1, SQLinearWrapper) def test_sq_qkv(self): model = transformers.AutoModelForCausalLM.from_pretrained( - 'facebook/opt-125m', torchscript=True,) + "facebook/opt-125m", + torchscript=True, + ) sq = TorchSmoothQuant(model, LLMCalibDataloader()) sq.transform(alpha=0.5, calib_iter=-1, folding=False) - assert isinstance( - sq.model.model.decoder.layers[0].self_attn.k_proj, SQLinearWrapper - ) + assert isinstance(sq.model.model.decoder.layers[0].self_attn.k_proj, SQLinearWrapper) def test_sq_quant(self): from neural_compressor import PostTrainingQuantConfig, quantization + class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -658,17 +686,19 @@ def forward(self, x): input_ids = torch.randn([3, 3]) fp32_model = Model() output1 = fp32_model(input_ids) - + conf = PostTrainingQuantConfig( calibration_sampling_size=8, - recipes={"smooth_quant": True, - "smooth_quant_args": {'alpha': 'auto', 'folding': False}} - )# By default, folding args: {IPEX: False, ONNX RT: False, Stock PT: True} + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": "auto", "folding": False}}, + ) # By default, folding args: {IPEX: False, ONNX RT: False, Stock PT: True} + class CalibDataloader: def __init__(self): self.batch_size = 1 + def __iter__(self): yield input_ids + def calib_func(model): for i in range(10): model(input_ids) @@ -684,8 +714,9 @@ def calib_func(model): print(output1, output2) self.assertTrue(torch.allclose(output1, output2, atol=1e-02)) - q_model.save('saved_result') + q_model.save("saved_result") from neural_compressor.utils.pytorch import load + model_origin = Model() qdq_model = load("./saved_result", model_origin) @@ -693,9 +724,8 @@ def calib_func(model): origin_bias = float(fp32_model.fc1.bias[0]) conf = PostTrainingQuantConfig( calibration_sampling_size=8, - recipes={"smooth_quant": True, - "smooth_quant_args": {'alpha': 'auto', 'folding': True}} - )# By default, folding args: {IPEX: False, ONNX RT: False, Stock PT: True} + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": "auto", "folding": True}}, + ) # By default, folding args: {IPEX: False, ONNX RT: False, Stock PT: True} q_model = quantization.fit( fp32_model, conf, @@ -706,23 +736,23 @@ def calib_func(model): # with calib_func conf = PostTrainingQuantConfig( - example_inputs=input_ids, - recipes={"smooth_quant": True, - "smooth_quant_args": {'alpha': 'auto', 'folding': False}} - ) + example_inputs=input_ids, + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": "auto", "folding": False}}, + ) fp32_model = Model() q_model = quantization.fit( - fp32_model, - conf, - calib_func=calib_func, - eval_func=lambda x: 0.1, - ) + fp32_model, + conf, + calib_func=calib_func, + eval_func=lambda x: 0.1, + ) self.assertTrue(isinstance(q_model.model.fc1, SQLinearWrapper)) @unittest.skipIf(not TEST_IPEX, "Please install Intel extension for Pytorch") def test_sq_quant_ipex(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(3, 4) @@ -740,17 +770,19 @@ def forward(self, x): def calib_func(model): model(input_ids) - ipex_version = Version(ipex.__version__.split('+')[0]) + ipex_version = Version(ipex.__version__.split("+")[0]) # pure ipex quantization - if ipex_version >= Version('2.1.0'): + if ipex_version >= Version("2.1.0"): qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) - from intel_extension_for_pytorch.quantization import prepare, convert + from intel_extension_for_pytorch.quantization import convert, prepare + user_model = copy.deepcopy(fp32_model) user_model = prepare(user_model.eval(), qconfig, example_inputs=input_ids, inplace=True) calib_func(user_model) - user_model.save_qconf_summary(qconf_summary='ipex.json') + user_model.save_qconf_summary(qconf_summary="ipex.json") import json - with open('ipex.json', 'r') as f: + + with open("ipex.json", "r") as f: ipex_config_json = json.load(f) with torch.no_grad(): user_model = convert(user_model.eval(), inplace=True).eval() @@ -764,9 +796,9 @@ def calib_func(model): conf = PostTrainingQuantConfig( backend="ipex", calibration_sampling_size=8, - excluded_precisions=['bf16'], + excluded_precisions=["bf16"], example_inputs=(input_ids,), - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.5}} + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}, ) tmp_model = copy.deepcopy(fp32_model) q_model = quantization.fit( @@ -774,15 +806,17 @@ def calib_func(model): conf, calib_func=calib_func, ) - q_model.save('saved') + q_model.save("saved") # compare ipex and inc quantization - with open('saved/best_configure.json', 'r') as f: + with open("saved/best_configure.json", "r") as f: inc_config_json = json.load(f) inc_out = q_model.model(input_ids) - ipex_sq_weight_scale = torch.tensor(ipex_config_json[' ']['q_op_infos']['0']\ - ['weight_tensor_infos'][0]['smooth_quant_scaling_factor']) - inc_sq_weight_scale = torch.tensor(inc_config_json[' ']['q_op_infos']['0']\ - ['weight_tensor_infos'][0]['smooth_quant_scaling_factor']) + ipex_sq_weight_scale = torch.tensor( + ipex_config_json[" "]["q_op_infos"]["0"]["weight_tensor_infos"][0]["smooth_quant_scaling_factor"] + ) + inc_sq_weight_scale = torch.tensor( + inc_config_json[" "]["q_op_infos"]["0"]["weight_tensor_infos"][0]["smooth_quant_scaling_factor"] + ) self.assertTrue(torch.allclose(inc_sq_weight_scale, ipex_sq_weight_scale)) # set a big atol to avoid random issue self.assertTrue(torch.allclose(ipex_out, inc_out, atol=1e-02)) @@ -791,13 +825,15 @@ def calib_func(model): class CalibDataloader: def __init__(self): self.batch_size = 1 + def __iter__(self): yield input_ids + conf = PostTrainingQuantConfig( backend="ipex", calibration_sampling_size=8, - excluded_precisions=['bf16'], - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 'auto'}} + excluded_precisions=["bf16"], + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": "auto"}}, ) tmp_model = copy.deepcopy(fp32_model) q_model = quantization.fit( @@ -812,8 +848,8 @@ def __iter__(self): conf = PostTrainingQuantConfig( backend="ipex", calibration_sampling_size=8, - excluded_precisions=['bf16'], - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.5, 'folding': True}} + excluded_precisions=["bf16"], + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5, "folding": True}}, ) tmp_model = copy.deepcopy(fp32_model) q_model = quantization.fit( @@ -832,15 +868,17 @@ def setUpClass(self): class RandDataloader: def __init__(self): pass + def __iter__(self): yield torch.rand((1, 4)) self.linear_dl = RandDataloader() - @classmethod + @classmethod def test_sq_skip_op_auto(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.linear0 = nn.Linear(4, 4, bias=False) @@ -863,21 +901,22 @@ def forward(self, x): x = self.linear3(x) x = self.ac3(x) return x - + model = Model() sq = TorchSmoothQuant(model, self.linear_dl) - sq.transform(alpha='auto', calib_iter=1, folding=True) - #the layernorm could not used for sq-absorb because it outputs to an add op. + sq.transform(alpha="auto", calib_iter=1, folding=True) + # the layernorm could not used for sq-absorb because it outputs to an add op. assert len(sq.absorb_to_layer) == 0 def test_sq_no_skip_op_auto(self): model = transformers.AutoModelForCausalLM.from_pretrained( - 'facebook/opt-125m', torchscript=True, + "facebook/opt-125m", + torchscript=True, ) sq = TorchSmoothQuant(model, LLMCalibDataloader()) - sq.transform(alpha='auto', calib_iter=0, folding=False) + sq.transform(alpha="auto", calib_iter=0, folding=False) # folding=False will absorb all Linears with mul, kqv will use same input. - assert len(sq.absorb_to_layer['model.decoder.layers.2.self_attn.q_proj']) == 3 + assert len(sq.absorb_to_layer["model.decoder.layers.2.self_attn.q_proj"]) == 3 class TestSqSkipOp_attn(unittest.TestCase): @@ -886,23 +925,26 @@ def setUpClass(self): class RandDataloader: def __init__(self): pass + def __iter__(self): yield torch.rand((1, 4)) + self.linear_dl = RandDataloader() - @classmethod + @classmethod def test_sq_skip_op_attn_auto(self): class Model(torch.nn.Module): - device = torch.device('cpu') + device = torch.device("cpu") + def __init__(self): super(Model, self).__init__() self.hidden_size = 4 - self.linear0 = nn.Linear(self.hidden_size, self.hidden_size,bias=False) + self.linear0 = nn.Linear(self.hidden_size, self.hidden_size, bias=False) self.layernorm1 = nn.LayerNorm(self.hidden_size) self.dim_k, self.dim_v = 8, 4 self.linear_q = nn.Linear(self.hidden_size, self.dim_k, bias=False) self.linear_k = nn.Linear(self.hidden_size, self.dim_k, bias=False) - self.linear_v = nn.Linear(self.hidden_size, self.dim_v, bias=False) + self.linear_v = nn.Linear(self.hidden_size, self.dim_v, bias=False) self.ac1 = nn.ReLU() self.ac2 = nn.LeakyReLU() self.linear3 = nn.Linear(self.hidden_size, 3, bias=True) @@ -924,12 +966,11 @@ def forward(self, x): x = self.ac3(x) return x - model = Model() sq = TorchSmoothQuant(model, self.linear_dl) - sq.transform(alpha='auto', calib_iter=1, folding=True) - #the layernorm could not used for sq-absorb because it outputs to an add op. - assert len(sq.absorb_to_layer) == 0 + sq.transform(alpha="auto", calib_iter=1, folding=True) + # the layernorm could not used for sq-absorb because it outputs to an add op. + assert len(sq.absorb_to_layer) == 0 class TestTuneSqAlpha(unittest.TestCase): @@ -945,78 +986,77 @@ def tearDownClass(self): def test_sq_tune_alpha_ipex(self): from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + tuning_criterion = TuningCriterion(max_trials=5) for folding in [True, False]: for fp32_model, dataloader in [ - (DemoModel(), DemoCalibDataloader()), + (DemoModel(), DemoCalibDataloader()), ( transformers.AutoModelForCausalLM.from_pretrained( - 'facebook/opt-125m', torchscript=True,), - LLMCalibDataloader() - ) + "facebook/opt-125m", + torchscript=True, + ), + LLMCalibDataloader(), + ), ]: conf = PostTrainingQuantConfig( - backend='ipex', + backend="ipex", quant_level=1, tuning_criterion=tuning_criterion, calibration_sampling_size=8, - recipes={"smooth_quant": True, - "smooth_quant_args": {'folding': folding, - "alpha": np.arange(0.1, 0.4, 0.05).tolist()} - } + recipes={ + "smooth_quant": True, + "smooth_quant_args": {"folding": folding, "alpha": np.arange(0.1, 0.4, 0.05).tolist()}, + }, ) eval_result_lst = [0.98, 0.9, 0.8, 0.7, 1.1] + def fake_eval(model): acc = eval_result_lst.pop(0) return acc - q_model = quantization.fit( - fp32_model, - conf, - calib_dataloader=dataloader, - eval_func=fake_eval - ) + q_model = quantization.fit(fp32_model, conf, calib_dataloader=dataloader, eval_func=fake_eval) def test_sq_tune_alpha(self): from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + tuning_criterion = TuningCriterion(max_trials=5) for folding in [False, True]: for fp32_model, dataloader in [ - (DemoModel(), DemoCalibDataloader()), + (DemoModel(), DemoCalibDataloader()), ( transformers.AutoModelForCausalLM.from_pretrained( - 'facebook/opt-125m', torchscript=True,), - LLMCalibDataloader() - ) + "facebook/opt-125m", + torchscript=True, + ), + LLMCalibDataloader(), + ), ]: conf = PostTrainingQuantConfig( quant_level=1, tuning_criterion=tuning_criterion, calibration_sampling_size=8, - recipes={"smooth_quant": True, - "smooth_quant_args": {'folding': folding, - "alpha": np.arange(0.1, 0.4, 0.05).tolist()} - } + recipes={ + "smooth_quant": True, + "smooth_quant_args": {"folding": folding, "alpha": np.arange(0.1, 0.4, 0.05).tolist()}, + }, ) eval_result_lst = [0.98, 0.9, 0.8, 0.7, 1.1] + def fake_eval(model): acc = eval_result_lst.pop(0) return acc - - q_model = quantization.fit( - fp32_model, - conf, - calib_dataloader=dataloader, - eval_func=fake_eval - ) + + q_model = quantization.fit(fp32_model, conf, calib_dataloader=dataloader, eval_func=fake_eval) q_model.save(self.ns_workspace + "saved_result") def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05).tolist(), quant_level=1): from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + tuning_criterion = TuningCriterion(max_trials=8) fp32_model = DemoModel() @@ -1024,11 +1064,13 @@ def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05). quant_level=quant_level, tuning_criterion=tuning_criterion, calibration_sampling_size=8, - recipes={"smooth_quant": True, - "smooth_quant_args": {'folding': False, - "alpha": alpha, - } - } + recipes={ + "smooth_quant": True, + "smooth_quant_args": { + "folding": False, + "alpha": alpha, + }, + }, ) q_model = quantization.fit( fp32_model, @@ -1040,27 +1082,36 @@ def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05). def test_tune_sq_alpha(self): from functools import partial + def fake_eval(model, eval_result_lst): acc = eval_result_lst.pop(0) return acc - + # test for alpha is a list for eval_result_lst, note in [ - ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"), - ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.15"), - ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.10") - ]: + ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"), + ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.15"), + ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.10"), + ]: logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}") logger.info(note) - partial_fake_eval = partial(fake_eval, eval_result_lst = eval_result_lst ) + partial_fake_eval = partial(fake_eval, eval_result_lst=eval_result_lst) self._test_sq_tune_alpha_common(partial_fake_eval) - + # test for various alphas for eval_result_lst, alpha, note in [ - ([1, 0.8, 1.1, 0.7, 1.1], 0.5 ,"Expect tuning ends at 2nd trial with alpha is 0.5 and not tune sq's alpha."), - ([1, 0.8, 0.9, 0.7, 1.1], [0.5], "Expect tuning ends at 4th trial with alpha is 0.5 and not tune sq's alpha."), - ([1, 0.9, 0.8, 0.7, 1.1], [0.5, 0.7, 0.9] ,"Expect tuning ends at 4th trial with alpha is 0.5") - ]: + ( + [1, 0.8, 1.1, 0.7, 1.1], + 0.5, + "Expect tuning ends at 2nd trial with alpha is 0.5 and not tune sq's alpha.", + ), + ( + [1, 0.8, 0.9, 0.7, 1.1], + [0.5], + "Expect tuning ends at 4th trial with alpha is 0.5 and not tune sq's alpha.", + ), + ([1, 0.9, 0.8, 0.7, 1.1], [0.5, 0.7, 0.9], "Expect tuning ends at 4th trial with alpha is 0.5"), + ]: logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}, alpha: {alpha}") logger.info(note) partial_fake_eval = partial(fake_eval, eval_result_lst=eval_result_lst) @@ -1068,43 +1119,41 @@ def fake_eval(model, eval_result_lst): # test for quant_level is auto or 0 for eval_result_lst, alpha, quant_level, note in [ - ( - [1, 0.8, 1.1, 0.7, 1.1], - np.arange(0.1, 0.2, 0.05).tolist(), - "auto", - "Expect tuning ends at 2nd trial with alpha is 0.15." - ), - ( - [1, 0.8, 0.9, 0.7, 1.1], - np.arange(0.1, 0.2, 0.05).tolist(), - "auto", - "Expect tuning ends at 4th trial with alpha is 0.15 at basic strategy." - ), - ( - [1, 1.1, 0.8, 0.7, 1.1], - np.arange(0.1, 0.2, 0.05).tolist(), - 0, - "Expect tuning ends at 1th trial with alpha is 0.1") - ]: - logger.info(f"test_sq_tune_alpha_common with ") + ( + [1, 0.8, 1.1, 0.7, 1.1], + np.arange(0.1, 0.2, 0.05).tolist(), + "auto", + "Expect tuning ends at 2nd trial with alpha is 0.15.", + ), + ( + [1, 0.8, 0.9, 0.7, 1.1], + np.arange(0.1, 0.2, 0.05).tolist(), + "auto", + "Expect tuning ends at 4th trial with alpha is 0.15 at basic strategy.", + ), + ( + [1, 1.1, 0.8, 0.7, 1.1], + np.arange(0.1, 0.2, 0.05).tolist(), + 0, + "Expect tuning ends at 1th trial with alpha is 0.1", + ), + ]: + logger.info("test_sq_tune_alpha_common with ") logger.info(f"eval_result_lst: {eval_result_lst}, alpha: {alpha}, quant_level: {quant_level}") logger.info(note) partial_fake_eval = partial(fake_eval, eval_result_lst=eval_result_lst) self._test_sq_tune_alpha_common(partial_fake_eval, alpha=alpha, quant_level=quant_level) + class TestTextGeneration(unittest.TestCase): @classmethod def setUpClass(self): from modeling_gptj import GPTJForCausalLM - self.clm_model = GPTJForCausalLM.from_pretrained( - "hf-internal-testing/tiny-random-gptj", - torchscript=True - ) + + self.clm_model = GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj", torchscript=True) def test_text_generation(self): - input_ids = torch.tensor([[531, 574, 658, 492, 156], - [309, 296, 471, 817, 435], - [182, 176, 756, 944, 768]]) + input_ids = torch.tensor([[531, 574, 658, 492, 156], [309, 296, 471, 817, 435], [182, 176, 756, 944, 768]]) input_bs, input_len = input_ids.shape new_shape = [input_bs, 4, 1, 8] dummy_tensor = torch.ones(size=new_shape) @@ -1112,7 +1161,7 @@ def test_text_generation(self): past_key_values = tuple(tuple(pkv) for _ in range(28)) attention_mask = torch.ones(input_bs, input_len + 1) - attention_mask[:,0] = 0 + attention_mask[:, 0] = 0 example_inputs = ( input_ids, tuple(past_key_values), @@ -1126,7 +1175,9 @@ def calib_func(prepared_model): past_key_values=past_key_values, attention_mask=attention_mask, ) + from neural_compressor import PostTrainingQuantConfig, quantization + recipes = {"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}} conf = PostTrainingQuantConfig( backend="ipex", @@ -1145,7 +1196,6 @@ def calib_func(prepared_model): self.assertEqual(indices[1], torch.tensor([362])) self.assertEqual(indices[2], torch.tensor([504])) - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/algorithm/test_smooth_quant_onnx.py b/test/algorithm/test_smooth_quant_onnx.py index 13d1f4f5bdd..82ab5a13504 100644 --- a/test/algorithm/test_smooth_quant_onnx.py +++ b/test/algorithm/test_smooth_quant_onnx.py @@ -1,14 +1,18 @@ -import onnx -from onnx import helper, TensorProto, numpy_helper import copy +import logging +import shutil import unittest + import numpy as np -import shutil -from neural_compressor.data import Datasets, DATALOADERS +import onnx +from onnx import TensorProto, helper, numpy_helper + from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant -import logging +from neural_compressor.data import DATALOADERS, Datasets + logger = logging.getLogger("neural_compressor") + def check_model_is_same(model_proto1, model_proto2): # Compare if both models have the same number of nodes if len(model_proto1.graph.node) != len(model_proto2.graph.node): @@ -18,10 +22,12 @@ def check_model_is_same(model_proto1, model_proto2): for node1, node2 in zip(model_proto1.graph.node, model_proto2.graph.node): print(node1.name, node2.name) # Check node name, input, output, and op_type - if node1.name != node2.name or \ - node1.op_type != node2.op_type or \ - node1.input != node2.input or \ - node1.output != node2.output: + if ( + node1.name != node2.name + or node1.op_type != node2.op_type + or node1.input != node2.input + or node1.output != node2.output + ): return False # Check node attribure @@ -42,44 +48,44 @@ def check_model_is_same(model_proto1, model_proto2): init1 = {init.name: init for init in model_proto1.graph.initializer} init2 = {init.name: init for init in model_proto2.graph.initializer} for name in init1.keys(): - if name not in init2 or \ - not (numpy_helper.to_array(init1[name]) == numpy_helper.to_array(init2[name])).all(): + if name not in init2 or not (numpy_helper.to_array(init1[name]) == numpy_helper.to_array(init2[name])).all(): return False # Compare model inputs and outputs - if model_proto1.graph.input != model_proto2.graph.input or \ - model_proto1.graph.output != model_proto2.graph.output: + if model_proto1.graph.input != model_proto2.graph.input or model_proto1.graph.output != model_proto2.graph.output: return False - + return True def build_onnx_model(): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 5, 2]) - H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 5, 2]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 5, 2]) + H = helper.make_tensor_value_info("H", TensorProto.FLOAT, [1, 5, 2]) g_value = np.random.uniform(low=0.001, high=0.5, size=(25)).astype(np.float32) - G_init = helper.make_tensor('G', TensorProto.FLOAT, [5, 5], g_value.reshape(25).tolist()) - matmul_node = onnx.helper.make_node('MatMul', ['A', 'G'], ['C'], name='Matmul') + G_init = helper.make_tensor("G", TensorProto.FLOAT, [5, 5], g_value.reshape(25).tolist()) + matmul_node = onnx.helper.make_node("MatMul", ["A", "G"], ["C"], name="Matmul") b_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32) - B_init = helper.make_tensor('B', TensorProto.FLOAT, [5, 2], b_value.reshape(10).tolist()) - matmul_node2 = onnx.helper.make_node('MatMul', ['C', 'B'], ['I'], name='Matmul2') + B_init = helper.make_tensor("B", TensorProto.FLOAT, [5, 2], b_value.reshape(10).tolist()) + matmul_node2 = onnx.helper.make_node("MatMul", ["C", "B"], ["I"], name="Matmul2") e_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist()) - matmul_node3 = onnx.helper.make_node('MatMul', ['C', 'E'], ['K'], name='Matmul3') + E_init = helper.make_tensor("E", TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist()) + matmul_node3 = onnx.helper.make_node("MatMul", ["C", "E"], ["K"], name="Matmul3") - add = onnx.helper.make_node('Add', ['I', 'E'], ['D'], name='add') + add = onnx.helper.make_node("Add", ["I", "E"], ["D"], name="add") f_value = np.random.uniform(low=0.001, high=0.5, size=(10)).astype(np.float32) - F_init = helper.make_tensor('F', TensorProto.FLOAT, [5, 2], f_value.reshape(10).tolist()) - add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2') + F_init = helper.make_tensor("F", TensorProto.FLOAT, [5, 2], f_value.reshape(10).tolist()) + add2 = onnx.helper.make_node("Add", ["D", "F"], ["H"], name="add2") - graph = helper.make_graph([matmul_node, matmul_node2, matmul_node3, add, add2], 'test_graph_1', [A], [H], [B_init, E_init, F_init, G_init]) + graph = helper.make_graph( + [matmul_node, matmul_node2, matmul_node3, add, add2], "test_graph_1", [A], [H], [B_init, E_init, F_init, G_init] + ) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model @@ -87,10 +93,10 @@ class TestORTSq(unittest.TestCase): @classmethod def setUpClass(self): self.model = build_onnx_model() - dataset = Datasets("onnxrt_qdq")["dummy_v2"]((5,5), (5,1)) - self.dataloader = DATALOADERS['onnxrt_qlinearops'](dataset) - fixed_dataset = Datasets("onnxrt_qdq")['dummy'](shape=(5,5,5), label=True) - self.fixed_dataloader = DATALOADERS['onnxrt_qlinearops'](fixed_dataset) + dataset = Datasets("onnxrt_qdq")["dummy_v2"]((5, 5), (5, 1)) + self.dataloader = DATALOADERS["onnxrt_qlinearops"](dataset) + fixed_dataset = Datasets("onnxrt_qdq")["dummy"](shape=(5, 5, 5), label=True) + self.fixed_dataloader = DATALOADERS["onnxrt_qlinearops"](fixed_dataset) @classmethod def tearDownClass(self): @@ -99,7 +105,7 @@ def tearDownClass(self): def test_sq(self): sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader) model = sq.transform(calib_iter=5, scales_per_op=False) - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 1) + self.assertEqual(len([i for i in model.model.graph.node if i.op_type == "Mul"]), 1) sq.recover() self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node)) for init in self.model.graph.initializer: @@ -109,7 +115,7 @@ def test_sq(self): sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader) model = sq.transform(calib_iter=5, folding=False, scales_per_op=False) - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 2) + self.assertEqual(len([i for i in model.model.graph.node if i.op_type == "Mul"]), 2) sq.recover() self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node)) for init in self.model.graph.initializer: @@ -119,7 +125,7 @@ def test_sq(self): sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader) model = sq.transform(calib_iter=5, folding=False, scales_per_op=True) - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3) + self.assertEqual(len([i for i in model.model.graph.node if i.op_type == "Mul"]), 3) sq.recover() self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node)) for init in self.model.graph.initializer: @@ -129,7 +135,7 @@ def test_sq(self): sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader) model = sq.transform(calib_iter=5, scales_per_op=True) - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3) + self.assertEqual(len([i for i in model.model.graph.node if i.op_type == "Mul"]), 3) sq.recover() self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node)) for init in self.model.graph.initializer: @@ -138,8 +144,8 @@ def test_sq(self): self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4) sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader) - model = sq.transform(calib_iter=5, scales_per_op=True, alpha='auto') - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 3) + model = sq.transform(calib_iter=5, scales_per_op=True, alpha="auto") + self.assertEqual(len([i for i in model.model.graph.node if i.op_type == "Mul"]), 3) sq.recover() self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node)) for init in self.model.graph.initializer: @@ -147,10 +153,9 @@ def test_sq(self): sq_tensor = numpy_helper.to_array(sq.model.get_initializer(init.name)) self.assertAlmostEqual(tensor[0][0], sq_tensor[0][0], 4) - sq = ORTSmoothQuant(copy.deepcopy(self.model), self.dataloader) - model = sq.transform(calib_iter=5, alpha='auto', scales_per_op=False) - self.assertEqual(len([i for i in model.model.graph.node if i.op_type == 'Mul']), 1) + model = sq.transform(calib_iter=5, alpha="auto", scales_per_op=False) + self.assertEqual(len([i for i in model.model.graph.node if i.op_type == "Mul"]), 1) sq.recover() self.assertEqual(len(sq.model.nodes()), len(self.model.graph.node)) for init in self.model.graph.initializer: @@ -161,6 +166,7 @@ def test_sq(self): def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05).tolist(), quant_level=1): from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + tuning_criterion = TuningCriterion(max_trials=8) fp32_model = copy.deepcopy(self.model) @@ -168,9 +174,7 @@ def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05). quant_level=quant_level, tuning_criterion=tuning_criterion, calibration_sampling_size=4, - recipes={"smooth_quant": True, - "smooth_quant_args": {"alpha": alpha} - } + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": alpha}}, ) q_model = quantization.fit( fp32_model, @@ -183,34 +187,43 @@ def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05). def test_tune_sq_alpha(self): from functools import partial + def fake_eval(model, eval_result_lst): acc = eval_result_lst.pop(0) return acc - + # test for quantized models generated by int alpha and list alpha whether they are the same - partial_fake_eval = partial(fake_eval, eval_result_lst = [1, 1.1] ) + partial_fake_eval = partial(fake_eval, eval_result_lst=[1, 1.1]) q_model_without_tune = self._test_sq_tune_alpha_common(partial_fake_eval, alpha=0.5) - partial_fake_eval = partial(fake_eval, eval_result_lst = [1, 0.8, 1.1] ) + partial_fake_eval = partial(fake_eval, eval_result_lst=[1, 0.8, 1.1]) q_model_with_tune = self._test_sq_tune_alpha_common(partial_fake_eval, alpha=[0.4, 0.5]) self.assertTrue(check_model_is_same(q_model_without_tune.model, q_model_with_tune.model)) # test for alpha is a list for eval_result_lst, note in [ - ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"), - ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.15"), - ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.10") - ]: + ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"), + ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.15"), + ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.10"), + ]: logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}") logger.info(note) - partial_fake_eval = partial(fake_eval, eval_result_lst = eval_result_lst ) + partial_fake_eval = partial(fake_eval, eval_result_lst=eval_result_lst) self._test_sq_tune_alpha_common(partial_fake_eval) # test for various alphas for eval_result_lst, alpha, note in [ - ([1, 0.8, 1.1, 0.7, 1.1], 0.5 ,"Expect tuning ends at 2nd trial with alpha is 0.5 and not tune sq's alpha."), - ([1, 0.8, 0.9, 0.7, 1.1], [0.5], "Expect tuning ends at 4th trial with alpha is 0.5 and not tune sq's alpha."), - ([1, 0.9, 0.8, 0.7, 1.1], [0.5, 0.7, 0.9] ,"Expect tuning ends at 4th trial with alpha is 0.5") - ]: + ( + [1, 0.8, 1.1, 0.7, 1.1], + 0.5, + "Expect tuning ends at 2nd trial with alpha is 0.5 and not tune sq's alpha.", + ), + ( + [1, 0.8, 0.9, 0.7, 1.1], + [0.5], + "Expect tuning ends at 4th trial with alpha is 0.5 and not tune sq's alpha.", + ), + ([1, 0.9, 0.8, 0.7, 1.1], [0.5, 0.7, 0.9], "Expect tuning ends at 4th trial with alpha is 0.5"), + ]: logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}, alpha: {alpha}") logger.info(note) partial_fake_eval = partial(fake_eval, eval_result_lst=eval_result_lst) @@ -218,29 +231,31 @@ def fake_eval(model, eval_result_lst): # test for quant_level is auto or 0 for eval_result_lst, alpha, quant_level, note in [ - ( - [1, 0.8, 1.1, 0.7, 1.1], - np.arange(0.1, 0.2, 0.05).tolist(), - "auto", - "Expect tuning ends at 2nd trial with alpha is 0.15." - ), - ( - [1, 0.8, 0.9, 0.7, 1.1], - np.arange(0.1, 0.2, 0.05).tolist(), - "auto", - "Expect tuning ends at 4th trial with alpha is 0.15 at basic strategy." - ), - ( - [1, 1.1, 0.8, 0.7, 1.1], - np.arange(0.1, 0.2, 0.05).tolist(), - 0, - "Expect tuning ends at 1th trial with alpha is 0.1") - ]: - logger.info(f"test_sq_tune_alpha_common with ") + ( + [1, 0.8, 1.1, 0.7, 1.1], + np.arange(0.1, 0.2, 0.05).tolist(), + "auto", + "Expect tuning ends at 2nd trial with alpha is 0.15.", + ), + ( + [1, 0.8, 0.9, 0.7, 1.1], + np.arange(0.1, 0.2, 0.05).tolist(), + "auto", + "Expect tuning ends at 4th trial with alpha is 0.15 at basic strategy.", + ), + ( + [1, 1.1, 0.8, 0.7, 1.1], + np.arange(0.1, 0.2, 0.05).tolist(), + 0, + "Expect tuning ends at 1th trial with alpha is 0.1", + ), + ]: + logger.info("test_sq_tune_alpha_common with ") logger.info(f"eval_result_lst: {eval_result_lst}, alpha: {alpha}, quant_level: {quant_level}") logger.info(note) partial_fake_eval = partial(fake_eval, eval_result_lst=eval_result_lst) self._test_sq_tune_alpha_common(partial_fake_eval, alpha=alpha, quant_level=quant_level) - -if __name__ == '__main__': + + +if __name__ == "__main__": unittest.main() diff --git a/test/benchmark/test_benchmark.py b/test/benchmark/test_benchmark.py index 4181d4fab39..dafddb1f47b 100644 --- a/test/benchmark/test_benchmark.py +++ b/test/benchmark/test_benchmark.py @@ -1,18 +1,20 @@ -"""Tests for neural_compressor benchmark""" -import psutil -import unittest +"""Tests for neural_compressor benchmark.""" import os -import yaml -import numpy as np -import tempfile -import re import platform -from neural_compressor.adaptor.tf_utils.util import write_graph +import re +import tempfile +import unittest +import numpy as np +import psutil import tensorflow as tf +import yaml + +from neural_compressor.adaptor.tf_utils.util import write_graph + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -28,19 +30,19 @@ def build_fake_yaml(): iteration: 10 configs: cores_per_instance: 4 - num_of_instance: 2 + num_of_instance: 2 tuning: accuracy_criterion: relative: 0.01 - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_benchmark(): - seq = ''' + seq = """ from argparse import ArgumentParser arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') @@ -53,9 +55,9 @@ def build_benchmark(): benchmarker.b_dataloader = common.DataLoader(dataset, batch_size=10) benchmarker.model = args.input_model benchmarker.fit() - ''' + """ - seq1 = ''' + seq1 = """ from argparse import ArgumentParser arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') @@ -69,42 +71,41 @@ def build_benchmark(): benchmarker.b_dataloader = common.DataLoader(dataset, batch_size=10) benchmarker.model = args.input_model benchmarker.fit() - ''' + """ # test normal case - with open('fake.py', "w", encoding="utf-8") as f: + with open("fake.py", "w", encoding="utf-8") as f: f.writelines(seq) # test batchsize > len(dataset), use first batch - fake_data_5 = seq.replace('100, 32, 32, 1', '5, 32, 32, 1') - with open('fake_data_5.py', "w", encoding="utf-8") as f: + fake_data_5 = seq.replace("100, 32, 32, 1", "5, 32, 32, 1") + with open("fake_data_5.py", "w", encoding="utf-8") as f: f.writelines(fake_data_5) # test batchsize < len(dataset) < 2*batchsize, discard first batch - fake_data_15 = seq1.replace('100, 32, 32, 1', '15, 32, 32, 1') - with open('fake_data_15.py', "w", encoding="utf-8") as f: + fake_data_15 = seq1.replace("100, 32, 32, 1", "15, 32, 32, 1") + with open("fake_data_15.py", "w", encoding="utf-8") as f: f.writelines(fake_data_15) # test 2*batchsize < len(dataset) < warmup*batchsize, discard last batch - fake_data_25 = seq1.replace('100, 32, 32, 1', '25, 32, 32, 1') - with open('fake_data_25.py', "w", encoding="utf-8") as f: + fake_data_25 = seq1.replace("100, 32, 32, 1", "25, 32, 32, 1") + with open("fake_data_25.py", "w", encoding="utf-8") as f: f.writelines(fake_data_25) + def build_benchmark2(): seq = [ "from argparse import ArgumentParser\n", "arg_parser = ArgumentParser(description='Parse args')\n", "arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model')\n", "args = arg_parser.parse_args()\n", - "from neural_compressor.data import Datasets\n", "dataset = Datasets('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", - "from neural_compressor.experimental import Benchmark, common\n", "benchmarker = Benchmark()\n", "benchmarker.model = args.input_model\n", "benchmarker.b_dataloader = common.DataLoader(dataset)\n", - "benchmarker.fit()\n" + "benchmarker.fit()\n", ] - seq1 = ''' + seq1 = """ from argparse import ArgumentParser arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') @@ -116,9 +117,9 @@ def build_benchmark2(): benchmarker = Benchmark(conf) benchmarker.model = args.input_model benchmarker.fit() - ''' + """ - seq2 = ''' + seq2 = """ from argparse import ArgumentParser arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model') @@ -141,32 +142,30 @@ def result(self): benchmarker.model = args.input_model benchmarker.metric = Metric() benchmarker.fit('accuracy') - ''' + """ - with open('fake2.py', "w", encoding="utf-8") as f: + with open("fake2.py", "w", encoding="utf-8") as f: f.writelines(seq) - with open('fake3.py', "w", encoding="utf-8") as f: + with open("fake3.py", "w", encoding="utf-8") as f: f.writelines(seq1) - with open('fake4.py', "w", encoding="utf-8") as f: + with open("fake4.py", "w", encoding="utf-8") as f: f.writelines(seq2) def build_fake_model(): - graph_path = tempfile.mkstemp(suffix='.pb')[1] + graph_path = tempfile.mkstemp(suffix=".pb")[1] try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session(graph=graph) as sess: - x = tf.placeholder(tf.float64, shape=(None, 32, 32, 1), name='x') - y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_1') - y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_2') - conv1 = tf.nn.conv2d(input=x, filter=y_1, strides=[1, 1, 1, 1], \ - padding='VALID', name='conv1') - op = tf.nn.conv2d(input=conv1, filter=y_2, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(None, 32, 32, 1), name="x") + y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_1") + y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_2") + conv1 = tf.nn.conv2d(input=x, filter=y_1, strides=[1, 1, 1, 1], padding="VALID", name="conv1") + op = tf.nn.conv2d(input=conv1, filter=y_2, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) write_graph(graph_def, graph_path) @@ -174,21 +173,22 @@ def build_fake_model(): graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session(graph=graph) as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(None, 32, 32, 1), name='x') - y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_1') - y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_2') - conv1 = tf.nn.conv2d(input=x, filters=y_1, strides=[1, 1, 1, 1], \ - padding='VALID', name='conv1') - op = tf.nn.conv2d(input=conv1, filters=y_2, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(None, 32, 32, 1), name="x") + y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_1") + y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_2") + conv1 = tf.nn.conv2d(input=x, filters=y_1, strides=[1, 1, 1, 1], padding="VALID", name="conv1") + op = tf.nn.conv2d(input=conv1, filters=y_2, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) write_graph(graph_def, graph_path) return graph_path + class TestObjective(unittest.TestCase): @classmethod def setUpClass(self): @@ -201,27 +201,27 @@ def setUpClass(self): @classmethod def tearDownClass(self): - if os.path.exists('fake_yaml.yaml'): - os.remove('fake_yaml.yaml') - if os.path.exists('fake.py'): - os.remove('fake.py') - if os.path.exists('fake2.py'): - os.remove('fake2.py') - if os.path.exists('fake3.py'): - os.remove('fake3.py') - if os.path.exists('fake4.py'): - os.remove('fake4.py') - if os.path.exists('fake_data_5.py'): - os.remove('fake_data_5.py') - if os.path.exists('fake_data_15.py'): - os.remove('fake_data_15.py') - if os.path.exists('fake_data_25.py'): - os.remove('fake_data_25.py') + if os.path.exists("fake_yaml.yaml"): + os.remove("fake_yaml.yaml") + if os.path.exists("fake.py"): + os.remove("fake.py") + if os.path.exists("fake2.py"): + os.remove("fake2.py") + if os.path.exists("fake3.py"): + os.remove("fake3.py") + if os.path.exists("fake4.py"): + os.remove("fake4.py") + if os.path.exists("fake_data_5.py"): + os.remove("fake_data_5.py") + if os.path.exists("fake_data_15.py"): + os.remove("fake_data_15.py") + if os.path.exists("fake_data_25.py"): + os.remove("fake_data_25.py") def test_benchmark(self): os.system("python fake.py --input_model={}".format(self.graph_path)) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -230,7 +230,7 @@ def test_benchmark(self): def test_benchmark_data_5(self): os.system("python fake_data_5.py --input_model={}".format(self.graph_path)) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -239,7 +239,7 @@ def test_benchmark_data_5(self): def test_benchmark_data_15(self): os.system("python fake_data_15.py --input_model={}".format(self.graph_path)) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -248,7 +248,7 @@ def test_benchmark_data_15(self): def test_benchmark_data_25(self): os.system("python fake_data_25.py --input_model={}".format(self.graph_path)) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -256,7 +256,7 @@ def test_benchmark_data_25(self): def test_benchmark_without_yaml(self): os.system("python fake2.py --input_model={} 2>&1 | tee benchmark.log".format(self.graph_path)) - with open('benchmark.log', "r") as f: + with open("benchmark.log", "r") as f: for line in f: throughput = re.search(r"Throughput sum: (\d+(\.\d+)?)", line) self.assertIsNotNone(throughput) @@ -264,7 +264,7 @@ def test_benchmark_without_yaml(self): def test_benchmark_with_conf(self): os.system("python fake3.py --input_model={}".format(self.graph_path)) - with open(f'1_{self.cpu_counts}_0.log', "r") as f: + with open(f"1_{self.cpu_counts}_0.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -272,11 +272,12 @@ def test_benchmark_with_conf(self): def test_benchmark_with_custom_metric(self): os.system("python fake4.py --input_model={} 2>&1 | tee benchmark.log".format(self.graph_path)) - with open('benchmark.log', "r") as f: + with open("benchmark.log", "r") as f: for line in f: accuracy = re.search(r"Accuracy is\s+(\d+(\.\d+)?)", line) self.assertIsNotNone(accuracy) os.system("rm *.log") + if __name__ == "__main__": unittest.main() diff --git a/test/benchmark/test_benchmark_2.x.py b/test/benchmark/test_benchmark_2.x.py index ae3651b01fa..6043c349843 100644 --- a/test/benchmark/test_benchmark_2.x.py +++ b/test/benchmark/test_benchmark_2.x.py @@ -1,19 +1,21 @@ -"""Tests for neural_compressor benchmark""" +"""Tests for neural_compressor benchmark.""" import os -import psutil +import re import shutil +import tempfile import unittest + import numpy as np +import psutil import tensorflow as tf -import tempfile -import re + from neural_compressor.adaptor.tf_utils.util import write_graph from neural_compressor.benchmark import benchmark_with_raw_cmd from neural_compressor.config import BenchmarkConfig def build_benchmark(): - seq = ''' + seq = """ from argparse import ArgumentParser arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') @@ -26,9 +28,9 @@ def build_benchmark(): b_dataloader = DataLoader(framework="tensorflow", dataset=dataset, batch_size=10) conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2) fit(args.input_model, conf, b_dataloader=b_dataloader) - ''' + """ - seq1 = ''' + seq1 = """ from argparse import ArgumentParser arg_parser = ArgumentParser(description='Parse args') arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') @@ -41,22 +43,22 @@ def build_benchmark(): conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2) b_dataloader = DataLoader(framework="tensorflow", dataset=dataset, batch_size=10) fit(args.input_model, conf, b_dataloader=b_dataloader) - ''' + """ # test normal case - with open('fake.py', "w", encoding="utf-8") as f: + with open("fake.py", "w", encoding="utf-8") as f: f.writelines(seq) # test batchsize > len(dataset), use first batch - fake_data_5 = seq.replace('100, 32, 32, 1', '5, 32, 32, 1') - with open('fake_data_5.py', "w", encoding="utf-8") as f: + fake_data_5 = seq.replace("100, 32, 32, 1", "5, 32, 32, 1") + with open("fake_data_5.py", "w", encoding="utf-8") as f: f.writelines(fake_data_5) # test batchsize < len(dataset) < 2*batchsize, discard first batch - fake_data_15 = seq1.replace('100, 32, 32, 1', '15, 32, 32, 1') - with open('fake_data_15.py', "w", encoding="utf-8") as f: + fake_data_15 = seq1.replace("100, 32, 32, 1", "15, 32, 32, 1") + with open("fake_data_15.py", "w", encoding="utf-8") as f: f.writelines(fake_data_15) # test 2*batchsize < len(dataset) < warmup*batchsize, discard last batch - fake_data_25 = seq1.replace('100, 32, 32, 1', '25, 32, 32, 1') - with open('fake_data_25.py', "w", encoding="utf-8") as f: + fake_data_25 = seq1.replace("100, 32, 32, 1", "25, 32, 32, 1") + with open("fake_data_25.py", "w", encoding="utf-8") as f: f.writelines(fake_data_25) @@ -72,7 +74,6 @@ def build_benchmark2(): "from neural_compressor.data import Datasets\n", "from neural_compressor.model import Model\n", "dataset = Datasets('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", - "from neural_compressor.data.dataloaders.dataloader import DataLoader\n", "b_dataloader = DataLoader(framework='tensorflow', dataset=dataset)\n", "model = Model(args.input_model)\n", @@ -91,29 +92,27 @@ def build_benchmark2(): " break\n", "latency = np.array(latency_list).mean()\n", "print('Latency: {:.3f} ms'.format(latency * 1000))\n", - "print('Throughput: {:.3f} images/sec'.format(1. / latency))\n" + "print('Throughput: {:.3f} images/sec'.format(1. / latency))\n", ] - with open('fake_raw_cmd.py', "w", encoding="utf-8") as f: + with open("fake_raw_cmd.py", "w", encoding="utf-8") as f: f.writelines(seq) def build_fake_model(): - graph_path = tempfile.mkstemp(suffix='.pb')[1] + graph_path = tempfile.mkstemp(suffix=".pb")[1] try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session(graph=graph) as sess: - x = tf.placeholder(tf.float64, shape=(None, 32, 32, 1), name='x') - y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_1') - y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_2') - conv1 = tf.nn.conv2d(input=x, filter=y_1, strides=[1, 1, 1, 1], \ - padding='VALID', name='conv1') - op = tf.nn.conv2d(input=conv1, filter=y_2, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(None, 32, 32, 1), name="x") + y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_1") + y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_2") + conv1 = tf.nn.conv2d(input=x, filter=y_1, strides=[1, 1, 1, 1], padding="VALID", name="conv1") + op = tf.nn.conv2d(input=conv1, filter=y_2, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) write_graph(graph_def, graph_path) @@ -121,21 +120,22 @@ def build_fake_model(): graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session(graph=graph) as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(None, 32, 32, 1), name='x') - y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_1') - y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_2') - conv1 = tf.nn.conv2d(input=x, filters=y_1, strides=[1, 1, 1, 1], \ - padding='VALID', name='conv1') - op = tf.nn.conv2d(input=conv1, filters=y_2, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(None, 32, 32, 1), name="x") + y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_1") + y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_2") + conv1 = tf.nn.conv2d(input=x, filters=y_1, strides=[1, 1, 1, 1], padding="VALID", name="conv1") + op = tf.nn.conv2d(input=conv1, filters=y_2, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) write_graph(graph_def, graph_path) return graph_path + class TestObjective(unittest.TestCase): @classmethod def setUpClass(self): @@ -146,22 +146,22 @@ def setUpClass(self): @classmethod def tearDownClass(self): - if os.path.exists('fake.py'): - os.remove('fake.py') - if os.path.exists('fake_data_5.py'): - os.remove('fake_data_5.py') - if os.path.exists('fake_data_15.py'): - os.remove('fake_data_15.py') - if os.path.exists('fake_data_25.py'): - os.remove('fake_data_25.py') - if os.path.exists('fake_raw_cmd.py'): - os.remove('fake_raw_cmd.py') - shutil.rmtree('nc_workspace', ignore_errors=True) + if os.path.exists("fake.py"): + os.remove("fake.py") + if os.path.exists("fake_data_5.py"): + os.remove("fake_data_5.py") + if os.path.exists("fake_data_15.py"): + os.remove("fake_data_15.py") + if os.path.exists("fake_data_25.py"): + os.remove("fake_data_25.py") + if os.path.exists("fake_raw_cmd.py"): + os.remove("fake_raw_cmd.py") + shutil.rmtree("nc_workspace", ignore_errors=True) def test_benchmark(self): os.system("python fake.py --input_model={}".format(self.graph_path)) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -170,7 +170,7 @@ def test_benchmark(self): def test_benchmark_data_5(self): os.system("python fake_data_5.py --input_model={}".format(self.graph_path)) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -179,7 +179,7 @@ def test_benchmark_data_5(self): def test_benchmark_data_15(self): os.system("python fake_data_15.py --input_model={}".format(self.graph_path)) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -188,7 +188,7 @@ def test_benchmark_data_15(self): def test_benchmark_data_25(self): os.system("python fake_data_25.py --input_model={}".format(self.graph_path)) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) @@ -199,7 +199,7 @@ def test_benchmark_raw_cmd(self): raw_cmd = "python fake_raw_cmd.py --input_model={}".format(self.graph_path) benchmark_with_raw_cmd(raw_cmd, conf=conf) for i in range(2): - with open(f'2_4_{i}.log', "r") as f: + with open(f"2_4_{i}.log", "r") as f: for line in f: throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) self.assertIsNotNone(throughput) diff --git a/test/config/test_config_1.x.py b/test/config/test_config_1.x.py index 03adc0153d0..dd8173e544f 100644 --- a/test/config/test_config_1.x.py +++ b/test/config/test_config_1.x.py @@ -1,183 +1,198 @@ -"""Tests for 1.x config file""" -import unittest +"""Tests for 1.x config file.""" import os +import unittest + from neural_compressor.conf import config as conf from neural_compressor.utils.constant import * + def helper(content): - with open('fake_conf.yaml', 'w', encoding="utf-8") as f: + with open("fake_conf.yaml", "w", encoding="utf-8") as f: f.write(content) + class TestConfig(unittest.TestCase): def test_config(self): from neural_compressor import PostTrainingQuantConfig + config = PostTrainingQuantConfig() - self.assertEqual(config.recipes['smooth_quant'], False) - self.assertEqual(config.recipes['fast_bias_correction'], False) - self.assertEqual(config.recipes['weight_correction'], False) - self.assertEqual(config.recipes['dedicated_qdq_pair'], False) - self.assertEqual(config.recipes['add_qdq_pair_to_weight'], False) - self.assertEqual(config.recipes['graph_optimization_level'], None) + self.assertEqual(config.recipes["smooth_quant"], False) + self.assertEqual(config.recipes["fast_bias_correction"], False) + self.assertEqual(config.recipes["weight_correction"], False) + self.assertEqual(config.recipes["dedicated_qdq_pair"], False) + self.assertEqual(config.recipes["add_qdq_pair_to_weight"], False) + self.assertEqual(config.recipes["graph_optimization_level"], None) + class TestPyConf(unittest.TestCase): def test_config(self): - from neural_compressor.conf.config import conf, QuantConf, PruningConf, \ - GraphOptConf, BenchmarkConf, DistillationConf + from neural_compressor.conf.config import ( + BenchmarkConf, + DistillationConf, + GraphOptConf, + PruningConf, + QuantConf, + conf, + ) conf.tuning.accuracy_criterion.relative = 0.2 a = QuantConf(conf) self.assertEqual(a.usr_cfg.tuning.accuracy_criterion.relative, 0.2) conf.quantization.op_wise = { - 'op1': FP32, - 'op2': {'activation': INT8_SYM_KL_PERTENSOR}, - 'op3': {'activation': INT8_SYM_KL_PERCHANNEL, 'weight': INT8_SYM_MINMAX_PERTENSOR}} - conf.quantization.model_wise = { - 'activation': INT8_SYM_KL_PERTENSOR, - 'weight': INT8_SYM_MINMAX_PERTENSOR} + "op1": FP32, + "op2": {"activation": INT8_SYM_KL_PERTENSOR}, + "op3": {"activation": INT8_SYM_KL_PERCHANNEL, "weight": INT8_SYM_MINMAX_PERTENSOR}, + } + conf.quantization.model_wise = {"activation": INT8_SYM_KL_PERTENSOR, "weight": INT8_SYM_MINMAX_PERTENSOR} a = QuantConf(conf) - self.assertEqual(a.usr_cfg.quantization.model_wise.weight.scheme, ['sym']) - - conf.evaluation.performance.dataloader.dataset = {'dummy': {'shape': '224,224,3'}} - conf.evaluation.accuracy.dataloader.dataset = {'dummy': {'shape': '224,224,3', 'low': '0.1'}} - + self.assertEqual(a.usr_cfg.quantization.model_wise.weight.scheme, ["sym"]) + + conf.evaluation.performance.dataloader.dataset = {"dummy": {"shape": "224,224,3"}} + conf.evaluation.accuracy.dataloader.dataset = {"dummy": {"shape": "224,224,3", "low": "0.1"}} + conf.evaluation.performance.dataloader.transform = { - 'Resize': {'size': [100, 100]}, - 'BilinearImagenet': {'height':300, 'width':300, 'mean_value':[0.2,0.2,0.2]} - } + "Resize": {"size": [100, 100]}, + "BilinearImagenet": {"height": 300, "width": 300, "mean_value": [0.2, 0.2, 0.2]}, + } conf.evaluation.performance.dataloader.batch_size = 6 - conf.evaluation.accuracy.metric = {'RMSE': {}} - conf.tuning.strategy.name = 'mse' + conf.evaluation.accuracy.metric = {"RMSE": {}} + conf.tuning.strategy.name = "mse" a = BenchmarkConf(conf) self.assertEqual(a.usr_cfg.evaluation.performance.dataloader.batch_size, 6) - self.assertEqual(a.usr_cfg.evaluation.performance.dataloader.dataset, {'dummy': {'shape': (224,224,3)}}) - self.assertEqual(a.usr_cfg.evaluation.accuracy.metric, {'RMSE': {}}) + self.assertEqual(a.usr_cfg.evaluation.performance.dataloader.dataset, {"dummy": {"shape": (224, 224, 3)}}) + self.assertEqual(a.usr_cfg.evaluation.accuracy.metric, {"RMSE": {}}) a = QuantConf(conf) - self.assertEqual(a.usr_cfg.tuning.strategy.name, 'mse') - - conf.evaluation.accuracy.metric = {'topk': 5} - conf.graph_optimization.precisions = 'bf16' - conf.pruning.train.criterion = {'CrossEntropyLoss': {}} + self.assertEqual(a.usr_cfg.tuning.strategy.name, "mse") + + conf.evaluation.accuracy.metric = {"topk": 5} + conf.graph_optimization.precisions = "bf16" + conf.pruning.train.criterion = {"CrossEntropyLoss": {}} conf.pruning.train.optimizer = {} a = PruningConf(conf) - self.assertEqual(a.usr_cfg.pruning.train.criterion, {'CrossEntropyLoss': {'from_logits': False, 'reduction': 'mean'}}) + self.assertEqual( + a.usr_cfg.pruning.train.criterion, {"CrossEntropyLoss": {"from_logits": False, "reduction": "mean"}} + ) - self.assertEqual(a.usr_cfg.evaluation.accuracy.metric, {'topk': 5}) + self.assertEqual(a.usr_cfg.evaluation.accuracy.metric, {"topk": 5}) conf.graph_optimization.op_wise = BF16 a = GraphOptConf(conf) - self.assertEqual(a.usr_cfg.graph_optimization.op_wise, {'weight': {'dtype': ['bf16']}, 'activation': {'dtype': ['bf16']}}) + self.assertEqual( + a.usr_cfg.graph_optimization.op_wise, {"weight": {"dtype": ["bf16"]}, "activation": {"dtype": ["bf16"]}} + ) conf.distillation.train.iteration = 900 a = DistillationConf(conf) self.assertEqual(a.usr_cfg.distillation.train.iteration, 900) + class TestConf(unittest.TestCase): @classmethod def tearDownClass(self): - os.remove('fake_conf.yaml') + os.remove("fake_conf.yaml") def test_main_key(self): - test = ''' + test = """ model: name: main_key_yaml framework: pytorch test: cpu - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") def test_framework(self): - test = ''' + test = """ model: - name: framework_yaml + name: framework_yaml framework: pytorch, mxnet - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ device: cpu - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") def test_device(self): - test = ''' + test = """ model: - name: device_yaml + name: device_yaml framework: mxnet device: xpu - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: device_yaml + name: device_yaml framework: mxnet device: cpu, gpu - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") def test_version(self): - test = ''' + test = """ model: - name: version_yaml + name: version_yaml framework: mxnet - ''' + """ helper(test) - config = conf.Conf('fake_conf.yaml') + config = conf.Conf("fake_conf.yaml") self.assertEqual(config.usr_cfg.version, 2.0) - test = ''' + test = """ version: 2.0 model: - name: version_yaml + name: version_yaml framework: mxnet - ''' + """ helper(test) - config = conf.Conf('fake_conf.yaml') + config = conf.Conf("fake_conf.yaml") self.assertEqual(config.usr_cfg.version, 2.0) def test_calibration(self): - test = ''' + test = """ model: - name: calib_yaml + name: calib_yaml framework: mxnet quantization: calibration: sampling_sizes: 10 - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: calib_yaml + name: calib_yaml framework: mxnet quantization: calibration: sampling_size: - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: calib_yaml + name: calib_yaml framework: mxnet quantization: calibration: dataloader: - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: calib_yaml + name: calib_yaml framework: mxnet quantization: calibration: @@ -187,47 +202,47 @@ def test_calibration(self): } } - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") def test_quantization(self): - test = ''' + test = """ model: - name: quant_yaml + name: quant_yaml framework: mxnet quantization: model_wise: weights: granularity: per_channel - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: quant_yaml + name: quant_yaml framework: mxnet quantization: model_wise: approach: - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: quant_yaml + name: quant_yaml framework: mxnet quantization: approach: post_training_static_quant, quant_aware_training - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: quant_yaml + name: quant_yaml framework: mxnet quantization: model_wise: @@ -237,13 +252,13 @@ def test_quantization(self): weight: scheme: asym dtype: int8 - ''' + """ helper(test) - conf.Conf('fake_conf.yaml') + conf.Conf("fake_conf.yaml") - test = ''' + test = """ model: - name: quant_yaml + name: quant_yaml framework: mxnet quantization: model_wise: @@ -253,168 +268,168 @@ def test_quantization(self): weight: scheme: asym dtype: int8 - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") def test_tuning(self): - test = ''' + test = """ model: - name: tuning_yaml + name: tuning_yaml framework: mxnet tuning: accuracy_criterion: relative: 0.01 strategy: name: basic, mse - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: tuning_yaml + name: tuning_yaml framework: mxnet tuning: accuracy_criterion: relative: 0.01 - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: tuning_yaml + name: tuning_yaml framework: mxnet tuning: accuracy_criterion: relative: 0.01 strategy: name: fake - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: tuning_yaml + name: tuning_yaml framework: mxnet tuning: accuracy_criterion: relative: strategy: name: basic - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: tuning_yaml + name: tuning_yaml framework: mxnet tuning: accuracy_criterion: exit_policy: timeout: 3 - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: tuning_yaml + name: tuning_yaml framework: mxnet tuning: accuracy_criterion: relative: 0.01 absolute: 0.01 - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") def test_workspace(self): - test = ''' + test = """ model: - name: workspace_yaml + name: workspace_yaml framework: mxnet tuning: - workspace: + workspace: -path: ./workspace - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") def test_inputs_outputs(self): - test = ''' + test = """ model: - name: inout_yaml + name: inout_yaml framework: mxnet inputs: x, y - ''' + """ helper(test) - config = conf.Conf('fake_conf.yaml') - self.assertEqual(config.usr_cfg.model.inputs, ['x', 'y']) + config = conf.Conf("fake_conf.yaml") + self.assertEqual(config.usr_cfg.model.inputs, ["x", "y"]) def test_objective(self): - test = ''' + test = """ model: - name: inout_yaml + name: inout_yaml framework: mxnet inputs: x, y tuning: multi_objectives: objective: accuracy higher_is_better: True - ''' + """ helper(test) - config = conf.Conf('fake_conf.yaml') + config = conf.Conf("fake_conf.yaml") self.assertEqual(config.usr_cfg.tuning.multi_objectives.higher_is_better, [True]) - test = ''' + test = """ model: - name: inout_yaml + name: inout_yaml framework: mxnet inputs: x, y tuning: multi_objectives: objective: accuracy, performance higher_is_better: True, False - ''' + """ helper(test) - config = conf.Conf('fake_conf.yaml') + config = conf.Conf("fake_conf.yaml") self.assertEqual(config.usr_cfg.tuning.multi_objectives.higher_is_better, [True, False]) - test = ''' + test = """ model: - name: inout_yaml + name: inout_yaml framework: mxnet inputs: x, y tuning: multi_objectives: objective: accuracy, performance higher_is_better: True False - ''' + """ helper(test) - config = conf.Conf('fake_conf.yaml') + config = conf.Conf("fake_conf.yaml") self.assertEqual(config.usr_cfg.tuning.multi_objectives.higher_is_better, [True, False]) - test = ''' + test = """ model: - name: inout_yaml + name: inout_yaml framework: mxnet inputs: x, y tuning: multi_objectives: objective: accuracy, performance higher_is_better: [True, False] - ''' + """ helper(test) - config = conf.Conf('fake_conf.yaml') + config = conf.Conf("fake_conf.yaml") self.assertEqual(config.usr_cfg.tuning.multi_objectives.higher_is_better, [True, False]) - test = ''' + test = """ model: - name: inout_yaml + name: inout_yaml framework: mxnet inputs: x, y tuning: @@ -422,14 +437,14 @@ def test_objective(self): objective: accuracy, performance higher_is_better: True False weight: [0.2, 0.1, 0.7] - ''' + """ helper(test) - self.assertRaises(RuntimeError, conf.Conf, 'fake_conf.yaml') + self.assertRaises(RuntimeError, conf.Conf, "fake_conf.yaml") def test_modelwise_conf_merge(self): - test = ''' + test = """ model: - name: inout_yaml + name: inout_yaml framework: mxnet quantization: model_wise: @@ -437,52 +452,52 @@ def test_modelwise_conf_merge(self): algorithm: minmax activation: algorithm: minmax - ''' + """ helper(test) - config = conf.QuantConf('fake_conf.yaml') + config = conf.QuantConf("fake_conf.yaml") framework_modelwise_capability = { - 'CONV2D': { - 'activation': { - 'dtype': ['uint8', 'fp32'], - 'scheme': ['asym', 'sym'], - 'granularity': ['per_tensor'], - 'algorithm': ['minmax', 'kl'] + "CONV2D": { + "activation": { + "dtype": ["uint8", "fp32"], + "scheme": ["asym", "sym"], + "granularity": ["per_tensor"], + "algorithm": ["minmax", "kl"], }, - 'weight': { - 'dtype': ['int8', 'fp32'], - 'scheme': [ - 'sym', + "weight": { + "dtype": ["int8", "fp32"], + "scheme": [ + "sym", ], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax'] + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax"], }, }, } tune_space = config.modelwise_tune_space(framework_modelwise_capability) - self.assertEqual(tune_space['CONV2D']['activation']['algorithm'], ['minmax']) + self.assertEqual(tune_space["CONV2D"]["activation"]["algorithm"], ["minmax"]) def test_metric(self): - test = ''' + test = """ model: - name: metric_yaml + name: metric_yaml framework: mxnet evaluation: accuracy: multi_metrics: topk: 1 MSE: {} - ''' + """ helper(test) - metrics = {'topk': 1, 'MSE': {}} - config = conf.QuantConf('fake_conf.yaml') + metrics = {"topk": 1, "MSE": {}} + config = conf.QuantConf("fake_conf.yaml") self.assertEqual(config.usr_cfg.evaluation.accuracy.multi_metrics, metrics) - test = ''' + test = """ model: - name: metric_yaml + name: metric_yaml framework: mxnet evaluation: accuracy: @@ -490,13 +505,13 @@ def test_metric(self): weight: 0.5 0.5 0.6 topk: 1 MSE: {} - ''' + """ helper(test) - self.assertRaises((AssertionError, RuntimeError), conf.Conf, 'fake_conf.yaml') + self.assertRaises((AssertionError, RuntimeError), conf.Conf, "fake_conf.yaml") - test = ''' + test = """ model: - name: metric_yaml + name: metric_yaml framework: mxnet evaluation: accuracy: @@ -504,15 +519,15 @@ def test_metric(self): higher_is_better: True, False topk: 1 MSE: {} - ''' + """ helper(test) - config = conf.QuantConf('fake_conf.yaml') + config = conf.QuantConf("fake_conf.yaml") self.assertEqual(config.usr_cfg.evaluation.accuracy.multi_metrics.higher_is_better, [True, False]) def test_modelwise_conf_merge2(self): - test = ''' + test = """ model: - name: inout_yaml + name: inout_yaml framework: mxnet quantization: model_wise: @@ -521,38 +536,38 @@ def test_modelwise_conf_merge2(self): activation: algorithm: minmax dtype: ['uint8', 'fp32'] - ''' + """ helper(test) - config = conf.QuantConf('fake_conf.yaml') + config = conf.QuantConf("fake_conf.yaml") framework_modelwise_capability = { - 'CONV2D': { - 'activation': { - 'dtype': ['iint8', 'fp32'], - 'scheme': ['asym', 'sym'], - 'granularity': ['per_tensor'], - 'algorithm': ['minmax', 'kl'] + "CONV2D": { + "activation": { + "dtype": ["iint8", "fp32"], + "scheme": ["asym", "sym"], + "granularity": ["per_tensor"], + "algorithm": ["minmax", "kl"], }, - 'weight': { - 'dtype': ['int8', 'fp32'], - 'scheme': [ - 'sym', + "weight": { + "dtype": ["int8", "fp32"], + "scheme": [ + "sym", ], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax'] + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax"], }, }, } tune_space = config.modelwise_tune_space(framework_modelwise_capability) - self.assertEqual(tune_space['CONV2D']['activation']['dtype'], ['fp32']) - + self.assertEqual(tune_space["CONV2D"]["activation"]["dtype"], ["fp32"]) + def test_prune(self): - test_pytorch_prune = ''' + test_pytorch_prune = """ model: name: imagenet_prune framework: pytorch - + pruning: train: start_epoch: 0 @@ -565,9 +580,9 @@ def test_prune(self): optimizer: SGD: learning_rate: 0.1 - momentum: 0.1 + momentum: 0.1 nesterov: True - weight_decay: 0.1 + weight_decay: 0.1 criterion: CrossEntropyLoss: reduction: sum @@ -580,17 +595,17 @@ def test_prune(self): start_epoch: 1 end_epoch: 3 names: ['layer1.0.conv1.weight'] - + - !Pruner start_epoch: 0 end_epoch: 4 target_sparsity: 0.6 update_frequency: 2 names: ['layer1.0.conv2.weight'] - ''' + """ helper(test_pytorch_prune) - config = conf.Conf('fake_conf.yaml') - test_tensorflow_prune = ''' + config = conf.Conf("fake_conf.yaml") + test_tensorflow_prune = """ model: name: vit framework: tensorflow @@ -629,12 +644,12 @@ def test_prune(self): exit_policy: timeout: 0 random_seed: 9527 - ''' + """ helper(test_tensorflow_prune) - config = conf.Conf('fake_conf.yaml') - + config = conf.Conf("fake_conf.yaml") + def test_data_type(self): - test = ''' + test = """ model: name: test framework: tensorflow @@ -650,17 +665,17 @@ def test_data_type(self): high: [128., 127] low: 1, 0 dtype: ['float32', 'int8'] - ''' + """ helper(test) - cfg = conf.Conf('fake_conf.yaml').usr_cfg - dataset = cfg['quantization']['calibration']['dataloader']['dataset']['dummy'] - self.assertTrue(isinstance(dataset['shape'][0], tuple)) - self.assertTrue(isinstance(dataset['shape'], list)) - self.assertTrue(isinstance(dataset['high'][1], float)) - self.assertTrue(isinstance(dataset['high'][0], float)) - self.assertTrue(isinstance(dataset['low'][0], float)) + cfg = conf.Conf("fake_conf.yaml").usr_cfg + dataset = cfg["quantization"]["calibration"]["dataloader"]["dataset"]["dummy"] + self.assertTrue(isinstance(dataset["shape"][0], tuple)) + self.assertTrue(isinstance(dataset["shape"], list)) + self.assertTrue(isinstance(dataset["high"][1], float)) + self.assertTrue(isinstance(dataset["high"][0], float)) + self.assertTrue(isinstance(dataset["low"][0], float)) - test = ''' + test = """ model: name: test framework: tensorflow @@ -676,14 +691,14 @@ def test_data_type(self): high: 128 low: 0.1 dtype: ['float32', 'int8'] - ''' + """ helper(test) - cfg = conf.Conf('fake_conf.yaml').usr_cfg - dataset = cfg['quantization']['calibration']['dataloader']['dataset']['dummy'] - self.assertTrue(isinstance(dataset['shape'], tuple)) - self.assertTrue(isinstance(dataset['high'], float)) + cfg = conf.Conf("fake_conf.yaml").usr_cfg + dataset = cfg["quantization"]["calibration"]["dataloader"]["dataset"]["dummy"] + self.assertTrue(isinstance(dataset["shape"], tuple)) + self.assertTrue(isinstance(dataset["high"], float)) - test = ''' + test = """ model: name: test framework: tensorflow @@ -704,16 +719,16 @@ def test_data_type(self): size: 10 scale: [0.07, 0.99] ratio: [0.6, 0.8] - ''' + """ helper(test) - cfg = conf.Conf('fake_conf.yaml').usr_cfg - shape_cfg = cfg['quantization']['calibration']['dataloader']['dataset']['style_transfer']['resize_shape'] - self.assertTrue(isinstance(shape_cfg, list)) - transform_cfg = cfg['quantization']['calibration']['dataloader']['transform']['RandomResizedCrop'] - self.assertTrue(isinstance(transform_cfg['scale'], list)) - self.assertTrue(isinstance(transform_cfg['ratio'], list)) + cfg = conf.Conf("fake_conf.yaml").usr_cfg + shape_cfg = cfg["quantization"]["calibration"]["dataloader"]["dataset"]["style_transfer"]["resize_shape"] + self.assertTrue(isinstance(shape_cfg, list)) + transform_cfg = cfg["quantization"]["calibration"]["dataloader"]["transform"]["RandomResizedCrop"] + self.assertTrue(isinstance(transform_cfg["scale"], list)) + self.assertTrue(isinstance(transform_cfg["ratio"], list)) - test = ''' + test = """ model: name: test framework: tensorflow @@ -729,13 +744,13 @@ def test_data_type(self): style_folder: test crop_ratio: 0.5 resize_shape: [10,10] - ''' + """ helper(test) - cfg = conf.Conf('fake_conf.yaml').usr_cfg - shape_cfg = cfg['quantization']['calibration']['dataloader']['dataset']['style_transfer']['resize_shape'] - self.assertTrue(isinstance(shape_cfg, list)) + cfg = conf.Conf("fake_conf.yaml").usr_cfg + shape_cfg = cfg["quantization"]["calibration"]["dataloader"]["dataset"]["style_transfer"]["resize_shape"] + self.assertTrue(isinstance(shape_cfg, list)) - test = ''' + test = """ model: name: test framework: tensorflow @@ -753,37 +768,39 @@ def test_data_type(self): height: 224 width: 224 mean_value: 123.68 116.78 103.94 - ''' + """ helper(test) - cfg = conf.Conf('fake_conf.yaml').usr_cfg - shape_cfg = cfg['quantization']['calibration']['dataloader']['dataset']['dummy']['shape'] - self.assertTrue(isinstance(shape_cfg, tuple)) - transform_cfg = cfg['quantization']['calibration']['dataloader']['transform']['BilinearImagenet'] - self.assertTrue(isinstance(transform_cfg['mean_value'], list)) + cfg = conf.Conf("fake_conf.yaml").usr_cfg + shape_cfg = cfg["quantization"]["calibration"]["dataloader"]["dataset"]["dummy"]["shape"] + self.assertTrue(isinstance(shape_cfg, tuple)) + transform_cfg = cfg["quantization"]["calibration"]["dataloader"]["transform"]["BilinearImagenet"] + self.assertTrue(isinstance(transform_cfg["mean_value"], list)) def test_yaml_detection(self): try: - cfg = conf.Conf('not_exist.yaml').usr_cfg + cfg = conf.Conf("not_exist.yaml").usr_cfg except: pass def test_deep_set(self): - from neural_compressor.conf.dotdict import DotDict, deep_set - cfg = {'evaluation': {'accuracy': {}}} - dot_cfg = DotDict(cfg) - deep_set(dot_cfg, 'evaluation.accuracy.metric', 'iou') - deep_set(dot_cfg, 'evaluation.accuracy.multi_metrics.weight', [0.1, 0,9]) - deep_set(dot_cfg, 'evaluation.accuracy.multi_metrics.mAP.anno_path', 'anno_path_test') - self.assertTrue(dot_cfg.evaluation == dot_cfg['evaluation']) - self.assertTrue(dot_cfg.evaluation.accuracy == dot_cfg['evaluation']['accuracy']) - self.assertTrue(dot_cfg.evaluation.accuracy.metric == dot_cfg['evaluation']['accuracy']['metric']) - self.assertTrue(dot_cfg.evaluation.accuracy.multi_metrics == dot_cfg['evaluation']['accuracy']['multi_metrics']) - self.assertTrue(dot_cfg.evaluation.accuracy.multi_metrics.weight == [0.1, 0,9]) - self.assertTrue(dot_cfg.evaluation.accuracy.multi_metrics.mAP.anno_path == 'anno_path_test') - multi_metrics1 = dot_cfg.evaluation.accuracy.multi_metrics - multi_metrics2 = dot_cfg['evaluation']['accuracy']['multi_metrics'] - self.assertTrue(multi_metrics1 == multi_metrics2) - self.assertTrue(list(multi_metrics1.keys()) == ['weight', 'mAP']) + from neural_compressor.conf.dotdict import DotDict, deep_set + + cfg = {"evaluation": {"accuracy": {}}} + dot_cfg = DotDict(cfg) + deep_set(dot_cfg, "evaluation.accuracy.metric", "iou") + deep_set(dot_cfg, "evaluation.accuracy.multi_metrics.weight", [0.1, 0, 9]) + deep_set(dot_cfg, "evaluation.accuracy.multi_metrics.mAP.anno_path", "anno_path_test") + self.assertTrue(dot_cfg.evaluation == dot_cfg["evaluation"]) + self.assertTrue(dot_cfg.evaluation.accuracy == dot_cfg["evaluation"]["accuracy"]) + self.assertTrue(dot_cfg.evaluation.accuracy.metric == dot_cfg["evaluation"]["accuracy"]["metric"]) + self.assertTrue(dot_cfg.evaluation.accuracy.multi_metrics == dot_cfg["evaluation"]["accuracy"]["multi_metrics"]) + self.assertTrue(dot_cfg.evaluation.accuracy.multi_metrics.weight == [0.1, 0, 9]) + self.assertTrue(dot_cfg.evaluation.accuracy.multi_metrics.mAP.anno_path == "anno_path_test") + multi_metrics1 = dot_cfg.evaluation.accuracy.multi_metrics + multi_metrics2 = dot_cfg["evaluation"]["accuracy"]["multi_metrics"] + self.assertTrue(multi_metrics1 == multi_metrics2) + self.assertTrue(list(multi_metrics1.keys()) == ["weight", "mAP"]) + if __name__ == "__main__": unittest.main() diff --git a/test/config/test_config_2.x.py b/test/config/test_config_2.x.py index 9b23244feb2..08081710420 100644 --- a/test/config/test_config_2.x.py +++ b/test/config/test_config_2.x.py @@ -1,42 +1,38 @@ -"""Tests for 2.x config file""" +"""Tests for 2.x config file.""" import unittest -from neural_compressor import set_workspace, set_random_seed, set_resume_from, set_tensorboard + +from neural_compressor import set_random_seed, set_resume_from, set_tensorboard, set_workspace +from neural_compressor.config import BenchmarkConfig, MixedPrecisionConfig, MXNet, PostTrainingQuantConfig from neural_compressor.config import _Config as conf -from neural_compressor.config import PostTrainingQuantConfig, BenchmarkConfig, MixedPrecisionConfig, MXNet, options +from neural_compressor.config import options from neural_compressor.utils.constant import * def helper(content): - with open('fake_conf.yaml', 'w', encoding="utf-8") as f: + with open("fake_conf.yaml", "w", encoding="utf-8") as f: f.write(content) + class TestConfig(unittest.TestCase): def test_config(self): config = PostTrainingQuantConfig() - self.assertEqual(config.recipes['smooth_quant'], False) - self.assertEqual(config.recipes['fast_bias_correction'], False) - self.assertEqual(config.recipes['weight_correction'], False) - self.assertEqual(config.recipes['dedicated_qdq_pair'], False) - self.assertEqual(config.recipes['add_qdq_pair_to_weight'], False) - self.assertEqual(config.recipes['graph_optimization_level'], None) + self.assertEqual(config.recipes["smooth_quant"], False) + self.assertEqual(config.recipes["fast_bias_correction"], False) + self.assertEqual(config.recipes["weight_correction"], False) + self.assertEqual(config.recipes["dedicated_qdq_pair"], False) + self.assertEqual(config.recipes["add_qdq_pair_to_weight"], False) + self.assertEqual(config.recipes["graph_optimization_level"], None) + class TestGeneralConf(unittest.TestCase): def test_config(self): cfg = PostTrainingQuantConfig() - cfg.op_type_dict = {'Conv': { - 'weight': { - 'dtype': ['fp32']}, - 'activation': { - 'dtype': ['fp32']}} - } - cfg.op_name_dict = {"layer1.0.conv1": { - "activation": { - "dtype": ["fp32"]}, - "weight": { - "dtype": ["fp32"]}}, - } + cfg.op_type_dict = {"Conv": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}} + cfg.op_name_dict = { + "layer1.0.conv1": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, + } a = conf(quantization=cfg) - self.assertEqual(a.quantization.op_type_dict['Conv']['weight']['dtype'], ['fp32']) + self.assertEqual(a.quantization.op_type_dict["Conv"]["weight"]["dtype"], ["fp32"]) cfg = BenchmarkConfig() cfg.cores_per_instance = 4 @@ -71,6 +67,5 @@ def test_config(self): set_tensorboard(tmp_tensorboard) - if __name__ == "__main__": unittest.main() diff --git a/test/config/test_config_regex.py b/test/config/test_config_regex.py index 5587a7070fe..8001aa89778 100644 --- a/test/config/test_config_regex.py +++ b/test/config/test_config_regex.py @@ -3,13 +3,15 @@ # import os import unittest -from neural_compressor.adaptor.tf_utils.util import disable_random import tensorflow as tf from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random + + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -37,14 +39,14 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + """ + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) f.close() def build_fake_yaml_invalid_model_wise(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -76,8 +78,8 @@ def build_fake_yaml_invalid_model_wise(): relative: 0.05 workspace: path: saved - ''' - with open('fake_yaml_with_invalid_cfg.yaml', "w", encoding="utf-8") as f: + """ + with open("fake_yaml_with_invalid_cfg.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) f.close() @@ -90,8 +92,8 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml_with_invalid_cfg.yaml') + os.remove("fake_yaml.yaml") + os.remove("fake_yaml_with_invalid_cfg.yaml") @disable_random() def test_config_regex(self): @@ -99,38 +101,37 @@ def test_config_regex(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x_pad, conv_weights, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_1') + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID", name="conv1_1") normed1 = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed1) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) for i in output_graph_def.node: - if i.op.find('Add') != -1: - i.op = 'Add' + if i.op.find("Add") != -1: + i.op = "Add" from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -139,10 +140,10 @@ def test_config_regex(self): found_fp32_conv = False found_quantized_conv = False for i in output_graph.graph_def.node: - if i.op == 'Conv2D' and i.name == 'conv1_1': + if i.op == "Conv2D" and i.name == "conv1_1": found_fp32_conv = True - if i.op.find("QuantizedConv2D") != -1 and i.name == 'conv1_3_eightbit_requantize': + if i.op.find("QuantizedConv2D") != -1 and i.name == "conv1_3_eightbit_requantize": found_quantized_conv = True self.assertEqual(found_fp32_conv, True) @@ -156,38 +157,37 @@ def test_config_regex_with_invalid_cfg(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv2d(x_pad, conv_weights, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_1') + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID", name="conv1_1") normed1 = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed1) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) for i in output_graph_def.node: - if i.op.find('Add') != -1: - i.op = 'Add' + if i.op.find("Add") != -1: + i.op = "Add" from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_with_invalid_cfg.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml_with_invalid_cfg.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -196,15 +196,15 @@ def test_config_regex_with_invalid_cfg(self): found_fp32_conv = False found_quantized_conv = False for i in output_graph.graph_def.node: - if i.op == 'Conv2D' and i.name == 'conv1_1': + if i.op == "Conv2D" and i.name == "conv1_1": found_fp32_conv = True - if i.op.find("QuantizedConv2D") != -1 and i.name == 'conv1_3_eightbit_requantize': + if i.op.find("QuantizedConv2D") != -1 and i.name == "conv1_3_eightbit_requantize": found_quantized_conv = True self.assertEqual(found_fp32_conv, True) self.assertEqual(found_quantized_conv, True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/config/test_pythonic_config.py b/test/config/test_pythonic_config.py index 1c9a5fcbd27..a350cc0e4ad 100644 --- a/test/config/test_pythonic_config.py +++ b/test/config/test_pythonic_config.py @@ -1,103 +1,114 @@ -"""Tests for pythonic config file""" +"""Tests for pythonic config file.""" import copy -import unittest import os -from neural_compressor.conf.pythonic_config import config -import onnxruntime as ort -from onnx import helper, TensorProto +import shutil +import unittest + import numpy as np import onnx -import shutil -import torch +import onnxruntime as ort import tensorflow as tf -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes import torch +from onnx import TensorProto, helper +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from torch import nn -from neural_compressor.conf.pythonic_config import OpQuantConf, ActivationConf, WeightConf -from neural_compressor.data import Datasets -from neural_compressor.experimental import Quantization, Distillation, NAS, common -from neural_compressor.experimental.pruning_v2 import Pruning -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.adaptor import FRAMEWORKS from neural_compressor.adaptor.torch_utils.bf16_convert import BF16ModuleWrapper +from neural_compressor.conf.pythonic_config import ActivationConf, OpQuantConf, WeightConf, config +from neural_compressor.data import Datasets +from neural_compressor.experimental import NAS, Distillation, Quantization, common +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.experimental.pruning_v2 import Pruning -def build_matmul_model(): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B_init = helper.make_tensor('B', TensorProto.FLOAT, [1, 1, 5, 1], np.random.random([1, 1, 5, 1]).reshape(5).tolist()) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 1, 5, 1]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 1]) - H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 1, 5, 1]) - matmul_node = onnx.helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') +def build_matmul_model(): + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B_init = helper.make_tensor( + "B", TensorProto.FLOAT, [1, 1, 5, 1], np.random.random([1, 1, 5, 1]).reshape(5).tolist() + ) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 1, 5, 1]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 1]) + H = helper.make_tensor_value_info("H", TensorProto.FLOAT, [1, 1, 5, 1]) + + matmul_node = onnx.helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") e_value = np.random.randint(2, size=(5)).astype(np.float32) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) - add = onnx.helper.make_node('Add', ['C', 'E'], ['D'], name='add') + E_init = helper.make_tensor("E", TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) + add = onnx.helper.make_node("Add", ["C", "E"], ["D"], name="add") f_value = np.random.randint(2, size=(5)).astype(np.float32) - F_init = helper.make_tensor('F', TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) - add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2') - graph = helper.make_graph([matmul_node, add, add2], 'test_graph_1', [A], [H], [E_init, F_init, B_init]) + F_init = helper.make_tensor("F", TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) + add2 = onnx.helper.make_node("Add", ["D", "F"], ["H"], name="add2") + graph = helper.make_graph([matmul_node, add, add2], "test_graph_1", [A], [H], [E_init, F_init, B_init]) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) - return model + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) + return model + def build_conv2d_model(): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "out" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) - + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) + test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + ] + ) return test_graph + class ConvNet(torch.nn.Module): def __init__(self, channels, dimensions): super().__init__() @@ -115,11 +126,13 @@ def forward(self, inputs): outputs = self.activation(outputs) return outputs + def model_builder(model_arch_params): - channels = model_arch_params['channels'] - dimensions = model_arch_params['dimensions'] + channels = model_arch_params["channels"] + dimensions = model_arch_params["dimensions"] return ConvNet(channels, dimensions) + class torch_model(torch.nn.Module): def __init__(self): super().__init__() @@ -139,16 +152,16 @@ def tearDownClass(self): shutil.rmtree("./nc_workspace", ignore_errors=True) def test_config_setting(self): - config.quantization.inputs = ['image'] - config.quantization.outputs = ['out'] - config.quantization.approach = 'post_training_dynamic_quant' - config.quantization.device = 'gpu' - config.quantization.op_type_dict = {'Conv': {'weight': {'dtype': ['fp32']} - , 'activation': {'dtype': ['fp32']}}} - config.quantization.op_name_dict = {"layer1.0.conv1": {"activation": {"dtype": ["fp32"]} - ,"weight": {"dtype": ["fp32"]}}} - config.quantization.strategy = 'mse' - config.quantization.objective = 'accuracy' + config.quantization.inputs = ["image"] + config.quantization.outputs = ["out"] + config.quantization.approach = "post_training_dynamic_quant" + config.quantization.device = "gpu" + config.quantization.op_type_dict = {"Conv": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}} + config.quantization.op_name_dict = { + "layer1.0.conv1": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}} + } + config.quantization.strategy = "mse" + config.quantization.objective = "accuracy" config.quantization.timeout = 100 config.quantization.max_trials = 100 config.quantization.accuracy_criterion.relative = 0.5 @@ -156,16 +169,20 @@ def test_config_setting(self): config.quantization.use_bf16 = False config.benchmark.cores_per_instance = 10 - self.assertEqual(config.quantization.inputs, ['image']) - self.assertEqual(config.quantization.outputs, ['out']) - self.assertEqual(config.quantization.approach, 'post_training_dynamic_quant') - self.assertEqual(config.quantization.device, 'gpu') - self.assertEqual(config.quantization.op_type_dict, - {'Conv': {'weight': {'dtype': ['fp32']}, 'activation': {'dtype': ['fp32']}}}) - self.assertEqual(config.quantization.op_name_dict, - {"layer1.0.conv1": {"activation": {"dtype": ["fp32"]},"weight": {"dtype": ["fp32"]}}}) - self.assertEqual(config.quantization.strategy, 'mse') - self.assertEqual(config.quantization.objective, 'accuracy') + self.assertEqual(config.quantization.inputs, ["image"]) + self.assertEqual(config.quantization.outputs, ["out"]) + self.assertEqual(config.quantization.approach, "post_training_dynamic_quant") + self.assertEqual(config.quantization.device, "gpu") + self.assertEqual( + config.quantization.op_type_dict, + {"Conv": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}}, + ) + self.assertEqual( + config.quantization.op_name_dict, + {"layer1.0.conv1": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}}, + ) + self.assertEqual(config.quantization.strategy, "mse") + self.assertEqual(config.quantization.objective, "accuracy") self.assertEqual(config.quantization.timeout, 100) self.assertEqual(config.quantization.max_trials, 100) self.assertEqual(config.quantization.accuracy_criterion.relative, 0.5) @@ -174,15 +191,15 @@ def test_config_setting(self): config.quantization.accuracy_criterion.absolute = 0.4 self.assertEqual(config.quantization.accuracy_criterion.absolute, 0.4) self.assertEqual(config.quantization.accuracy_criterion.relative, None) - - config.onnxruntime.precisions = ['int8', 'uint8'] - config.onnxruntime.graph_optimization_level = 'DISABLE_ALL' + + config.onnxruntime.precisions = ["int8", "uint8"] + config.onnxruntime.graph_optimization_level = "DISABLE_ALL" q = Quantization(config) q.model = build_matmul_model() self.assertEqual(q.conf.usr_cfg.reduce_range, False) self.assertEqual(q.conf.usr_cfg.use_bf16, False) q.pre_process() - self.assertEqual(q.strategy.adaptor.query_handler.get_precisions(), ['int8', 'uint8']) + self.assertEqual(q.strategy.adaptor.query_handler.get_precisions(), ["int8", "uint8"]) self.assertNotEqual(config.mxnet, None) self.assertNotEqual(config.tensorflow, None) self.assertNotEqual(config.pytorch, None) @@ -192,42 +209,42 @@ def test_weight_activation_op(self): opconf = OpQuantConf() self.assertEqual(opconf.op_type, None) - opconf = OpQuantConf('MatMul') - self.assertEqual(opconf.op_type, 'MatMul') + opconf = OpQuantConf("MatMul") + self.assertEqual(opconf.op_type, "MatMul") self.assertNotEqual(opconf.weight, None) self.assertNotEqual(opconf.activation, None) - opconf.weight.datatype = ['int8'] - opconf.activation.datatype = ['uint8'] - opconf.weight.scheme = ['asym'] - opconf.activation.scheme = ['sym'] - opconf.weight.granularity = ['per_channel'] - opconf.activation.granularity = ['per_tensor'] - opconf.weight.algorithm = ['minmax'] - opconf.activation.algorithm = ['minmax'] - self.assertEqual(opconf.weight.datatype, ['int8']) - self.assertEqual(opconf.activation.datatype, ['uint8']) - self.assertEqual(opconf.weight.scheme, ['asym']) - self.assertEqual(opconf.activation.scheme, ['sym']) - self.assertEqual(opconf.weight.granularity, ['per_channel']) - self.assertEqual(opconf.activation.granularity, ['per_tensor']) - self.assertEqual(opconf.weight.algorithm, ['minmax']) - self.assertEqual(opconf.activation.algorithm, ['minmax']) - + opconf.weight.datatype = ["int8"] + opconf.activation.datatype = ["uint8"] + opconf.weight.scheme = ["asym"] + opconf.activation.scheme = ["sym"] + opconf.weight.granularity = ["per_channel"] + opconf.activation.granularity = ["per_tensor"] + opconf.weight.algorithm = ["minmax"] + opconf.activation.algorithm = ["minmax"] + self.assertEqual(opconf.weight.datatype, ["int8"]) + self.assertEqual(opconf.activation.datatype, ["uint8"]) + self.assertEqual(opconf.weight.scheme, ["asym"]) + self.assertEqual(opconf.activation.scheme, ["sym"]) + self.assertEqual(opconf.weight.granularity, ["per_channel"]) + self.assertEqual(opconf.activation.granularity, ["per_tensor"]) + self.assertEqual(opconf.weight.algorithm, ["minmax"]) + self.assertEqual(opconf.activation.algorithm, ["minmax"]) + def test_quantization(self): q = Quantization(config) q.model = build_matmul_model() q_model = q() - self.assertTrue(any([i.name.endswith('_quant') for i in q_model.nodes()])) + self.assertTrue(any([i.name.endswith("_quant") for i in q_model.nodes()])) - config.onnxruntime.precisions = ['fp32'] + config.onnxruntime.precisions = ["fp32"] q = Quantization(config) q.model = build_matmul_model() q_model = q() - self.assertTrue(all([not i.name.endswith('_quant') for i in q_model.nodes()])) + self.assertTrue(all([not i.name.endswith("_quant") for i in q_model.nodes()])) def test_distillation(self): - config.quantization.device = 'cpu' + config.quantization.device = "cpu" distiller = Distillation(config) model = ConvNet(16, 32) origin_weight = copy.deepcopy(model.out.weight) @@ -235,9 +252,10 @@ def test_distillation(self): distiller.teacher_model = ConvNet(16, 32) # Customized train, evaluation - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(32, 3, 64, 64), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(32, 3, 64, 64), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) + def train_func(model): epochs = 3 iters = 10 @@ -247,7 +265,7 @@ def train_func(model): model.train() cnt = 0 for image, target in dummy_dataloader: - print('.', end='') + print(".", end="") cnt += 1 output = model(image).unsqueeze(dim=0) loss = criterion(output, target) @@ -257,13 +275,15 @@ def train_func(model): optimizer.step() if cnt >= iters: break + def eval_func(model): model.eval() acc = 0 for image, target in dummy_dataloader: output = model(image).cpu().detach().numpy() - acc += np.sum(output==target) - return {'acc': acc / len(dummy_dataset)} + acc += np.sum(output == target) + return {"acc": acc / len(dummy_dataset)} + distiller.train_func = train_func distiller.eval_func = eval_func model = distiller() @@ -277,9 +297,10 @@ def test_pruning(self): prune.model = model # Customized train, evaluation - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(32, 3, 64, 64), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(32, 3, 64, 64), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) + def train_func(model): epochs = 3 iters = 10 @@ -290,7 +311,7 @@ def train_func(model): cnt = 0 prune.on_epoch_begin(nepoch) for image, target in dummy_dataloader: - print('.', end='') + print(".", end="") cnt += 1 prune.on_step_begin(cnt) output = model(image).unsqueeze(dim=0) @@ -302,13 +323,15 @@ def train_func(model): if cnt >= iters: break prune.on_epoch_end() + def eval_func(model): model.eval() acc = 0 for image, target in dummy_dataloader: output = model(image).cpu().detach().numpy() - acc += np.sum(output==target) - return {'acc': acc / len(dummy_dataset)} + acc += np.sum(output == target) + return {"acc": acc / len(dummy_dataset)} + prune.train_func = train_func prune.eval_func = eval_func model = prune() @@ -316,26 +339,26 @@ def eval_func(model): self.assertTrue(torch.any(weight != origin_weight)) def test_use_bf16(self): - config.quantization.device = 'cpu' - config.quantization.approach = 'post_training_dynamic_quant' + config.quantization.device = "cpu" + config.quantization.approach = "post_training_dynamic_quant" config.quantization.use_bf16 = False q = Quantization(config) q.model = torch_model() - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" q_model = q() - del os.environ['FORCE_BF16'] + del os.environ["FORCE_BF16"] self.assertEqual(isinstance(q_model.model.linear, BF16ModuleWrapper), False) def test_quantization_pytorch(self): - config.quantization.device = 'cpu' - config.quantization.backend = 'default' - config.quantization.approach = 'post_training_dynamic_quant' + config.quantization.device = "cpu" + config.quantization.backend = "default" + config.quantization.approach = "post_training_dynamic_quant" config.quantization.use_bf16 = False q = Quantization(config) q.model = torch_model() q_model = q() - self.assertEqual(isinstance(q_model.model.linear, - torch.nn.quantized.dynamic.modules.linear.Linear),True) + self.assertEqual(isinstance(q_model.model.linear, torch.nn.quantized.dynamic.modules.linear.Linear), True) + class TestTFPyhonicConf(unittest.TestCase): @classmethod @@ -343,23 +366,24 @@ def tearDownClass(self): shutil.rmtree("./nc_workspace", ignore_errors=True) def test_tf_quantization(self): - config.quantization.inputs = ['input'] - config.quantization.outputs = ['out'] - config.quantization.approach = 'post_training_static_quant' - config.quantization.device = 'cpu' - config.quantization.strategy = 'basic' - config.quantization.objective = 'accuracy' + config.quantization.inputs = ["input"] + config.quantization.outputs = ["out"] + config.quantization.approach = "post_training_static_quant" + config.quantization.device = "cpu" + config.quantization.strategy = "basic" + config.quantization.objective = "accuracy" config.quantization.timeout = 100 config.quantization.accuracy_criterion.relative = 0.5 config.quantization.reduce_range = False q = Quantization(config) q.model = build_conv2d_model() - dataset = q.dataset('dummy', shape=(1, 224, 224, 3), label=True) + dataset = q.dataset("dummy", shape=(1, 224, 224, 3), label=True) q.calib_dataloader = common.DataLoader(dataset) q_model = q() - - self.assertTrue(any([i.name.endswith('_requantize') for i in q_model.graph_def.node])) + + self.assertTrue(any([i.name.endswith("_requantize") for i in q_model.graph_def.node])) + if __name__ == "__main__": unittest.main() diff --git a/test/data/test_dataloader.py b/test/data/test_dataloader.py index 024b6408b4c..39672820e50 100644 --- a/test/data/test_dataloader.py +++ b/test/data/test_dataloader.py @@ -1,793 +1,904 @@ """Tests for the dataloader module.""" +import os import platform +import shutil import unittest -import os + import numpy as np -import shutil -from neural_compressor.utils.create_obj_from_config import create_dataset, create_dataloader -from neural_compressor.data import DataLoader -from neural_compressor.data import Datasets, DATALOADERS, TRANSFORMS from PIL import Image +from neural_compressor.data import DATALOADERS, TRANSFORMS, DataLoader, Datasets +from neural_compressor.utils.create_obj_from_config import create_dataloader, create_dataset + + class TestBuiltinDataloader(unittest.TestCase): @classmethod def tearDownClass(cls): - os.remove('./t10k-labels-idx1-ubyte.gz') - os.remove('./t10k-images-idx3-ubyte.gz') - os.remove('./train-images-idx3-ubyte.gz') - os.remove('./train-labels-idx1-ubyte.gz') - os.remove('./mnist.npz') + os.remove("./t10k-labels-idx1-ubyte.gz") + os.remove("./t10k-images-idx3-ubyte.gz") + os.remove("./train-images-idx3-ubyte.gz") + os.remove("./train-labels-idx1-ubyte.gz") + os.remove("./mnist.npz") def test_pytorch_dataset(self): dataloader_args = { - 'batch_size': 2, - 'dataset': {"CIFAR10": {'root': './', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"CIFAR10": {"root": "./", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'pytorch', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "pytorch", dataloader_args) dataloader_args = { - 'batch_size': 2, - 'dataset': {"CIFAR100": {'root': './', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"CIFAR100": {"root": "./", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'pytorch', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "pytorch", dataloader_args) dataloader_args = { - 'dataset': {"MNIST": {'root': './test', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"MNIST": {"root": "./test", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'pytorch', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "pytorch", dataloader_args) dataloader_args = { - 'batch_size': 2, - 'dataset': {"MNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"MNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('pytorch', dataloader_args) + dataloader = create_dataloader("pytorch", dataloader_args) for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24)) + self.assertEqual(data[0][0].shape, (24, 24)) break dataloader_args = { - 'batch_size': 2, - 'dataset': {"FashionMNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"FashionMNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('pytorch', dataloader_args) + dataloader = create_dataloader("pytorch", dataloader_args) for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24)) + self.assertEqual(data[0][0].shape, (24, 24)) break dataloader_args = { - 'batch_size': 2, - 'shuffle': True, - 'dataset': {"FashionMNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "shuffle": True, + "dataset": {"FashionMNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('pytorch', dataloader_args) - self.assertEqual(dataloader.dataloader.sampler.__class__.__name__, 'RandomSampler') + dataloader = create_dataloader("pytorch", dataloader_args) + self.assertEqual(dataloader.dataloader.sampler.__class__.__name__, "RandomSampler") for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24)) + self.assertEqual(data[0][0].shape, (24, 24)) break dataloader_args = { - 'batch_size': 2, - 'dataset': {"FashionMNIST": {'root': './', 'train':True, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'distributed': True + "batch_size": 2, + "dataset": {"FashionMNIST": {"root": "./", "train": True, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "distributed": True, } - dataloader = create_dataloader('pytorch', dataloader_args) - self.assertEqual(dataloader.dataloader.sampler.__class__.__name__, 'DistributedSampler') + dataloader = create_dataloader("pytorch", dataloader_args) + self.assertEqual(dataloader.dataloader.sampler.__class__.__name__, "DistributedSampler") for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24)) + self.assertEqual(data[0][0].shape, (24, 24)) break - + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_dataset(self): dataloader_args = { - 'batch_size': 2, - 'dataset': {"CIFAR10": {'root': './', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"CIFAR10": {"root": "./", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'mxnet', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "mxnet", dataloader_args) dataloader_args = { - 'batch_size': 2, - 'dataset': {"CIFAR100": {'root': './', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"CIFAR100": {"root": "./", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'mxnet', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "mxnet", dataloader_args) dataloader_args = { - 'dataset': {"MNIST": {'root': './test', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"MNIST": {"root": "./test", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'mxnet', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "mxnet", dataloader_args) dataloader_args = { - 'batch_size': 2, - 'dataset': {"MNIST": {'root': './', 'train':True, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"MNIST": {"root": "./", "train": True, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('mxnet', dataloader_args) + dataloader = create_dataloader("mxnet", dataloader_args) for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24,1)) + self.assertEqual(data[0][0].shape, (24, 24, 1)) break dataloader_args = { - 'batch_size': 2, - 'dataset': {"FashionMNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"FashionMNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('mxnet', dataloader_args) + dataloader = create_dataloader("mxnet", dataloader_args) for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24,1)) + self.assertEqual(data[0][0].shape, (24, 24, 1)) break dataloader_args = { - 'batch_size': 2, - 'shuffle': True, - 'dataset': {"MNIST": {'root': './', 'train':True, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "shuffle": True, + "dataset": {"MNIST": {"root": "./", "train": True, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } with self.assertLogs() as cm: - dataloader = create_dataloader('mxnet', dataloader_args) - self.assertEqual(cm.output, ['WARNING:root:Shuffle is not supported yet in' \ - ' MXNetDataLoader, ignoring shuffle keyword.']) + dataloader = create_dataloader("mxnet", dataloader_args) + self.assertEqual( + cm.output, ["WARNING:root:Shuffle is not supported yet in" " MXNetDataLoader, ignoring shuffle keyword."] + ) def test_tf_dataset(self): dataloader_args = { - 'batch_size': 2, - 'dataset': {"CIFAR10": {'root': './', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"CIFAR10": {"root": "./", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'tensorflow', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "tensorflow", dataloader_args) dataloader_args = { - 'batch_size': 2, - 'dataset': {"CIFAR100": {'root': './', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"CIFAR100": {"root": "./", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'tensorflow', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "tensorflow", dataloader_args) dataloader_args = { - 'dataset': {"MNIST": {'root': './test', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"MNIST": {"root": "./test", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, 'tensorflow', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "tensorflow", dataloader_args) dataloader_args = { - 'batch_size': 2, - 'dataset': {"MNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"MNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24)) + self.assertEqual(data[0][0].shape, (24, 24)) break dataloader_args = { - 'batch_size': 2, - 'dataset': {"FashionMNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"FashionMNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24)) + self.assertEqual(data[0][0].shape, (24, 24)) break dataloader_args = { - 'batch_size': 2, - 'shuffle': True, - 'dataset': {"FashionMNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "shuffle": True, + "dataset": {"FashionMNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } with self.assertLogs() as cm: - dataloader = create_dataloader('tensorflow', dataloader_args) - self.assertEqual(cm.output, ['WARNING:root:Shuffle is not supported yet in' \ - ' TensorflowDataLoader, ignoring shuffle keyword.']) + dataloader = create_dataloader("tensorflow", dataloader_args) + self.assertEqual( + cm.output, + ["WARNING:root:Shuffle is not supported yet in" " TensorflowDataLoader, ignoring shuffle keyword."], + ) def test_onnx_dataset(self): dataloader_args = { - 'batch_size': 2, - 'dataset': {"CIFAR10": {'root': './', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"CIFAR10": {"root": "./", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, - 'onnxrt_qlinearops', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "onnxrt_qlinearops", dataloader_args) dataloader_args = { - 'batch_size': 2, - 'dataset': {"CIFAR100": {'root': './', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"CIFAR100": {"root": "./", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, - 'onnxrt_qlinearops', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "onnxrt_qlinearops", dataloader_args) dataloader_args = { - 'dataset': {"MNIST": {'root': './test', 'train':False, 'download':False}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"MNIST": {"root": "./test", "train": False, "download": False}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - self.assertRaises(RuntimeError, create_dataloader, - 'onnxrt_qlinearops', dataloader_args) + self.assertRaises(RuntimeError, create_dataloader, "onnxrt_qlinearops", dataloader_args) dataloader_args = { - 'batch_size': 2, - 'dataset': {"MNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"MNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('onnxrt_qlinearops', dataloader_args) + dataloader = create_dataloader("onnxrt_qlinearops", dataloader_args) for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24,1)) + self.assertEqual(data[0][0].shape, (24, 24, 1)) break dataloader_args = { - 'batch_size': 2, - 'dataset': {"FashionMNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "dataset": {"FashionMNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('onnxrt_qlinearops', dataloader_args) + dataloader = create_dataloader("onnxrt_qlinearops", dataloader_args) for data in dataloader: self.assertEqual(len(data[0]), 2) - self.assertEqual(data[0][0].shape, (24,24,1)) + self.assertEqual(data[0][0].shape, (24, 24, 1)) break dataloader_args = { - 'batch_size': 2, - 'shuffle': True, - 'dataset': {"FashionMNIST": {'root': './', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "batch_size": 2, + "shuffle": True, + "dataset": {"FashionMNIST": {"root": "./", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } with self.assertLogs() as cm: - dataloader = create_dataloader('onnxrt_qlinearops', dataloader_args) - self.assertEqual(cm.output, ['WARNING:root:Shuffle is not supported yet' \ - ' in ONNXRTDataLoader, ignoring shuffle keyword.']) + dataloader = create_dataloader("onnxrt_qlinearops", dataloader_args) + self.assertEqual( + cm.output, ["WARNING:root:Shuffle is not supported yet" " in ONNXRTDataLoader, ignoring shuffle keyword."] + ) + class TestImagenetRaw(unittest.TestCase): @classmethod def setUpClass(cls): - os.makedirs('val', exist_ok=True) - random_array = np.random.random_sample([100,100,3]) * 255 + os.makedirs("val", exist_ok=True) + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('val/test.jpg') - with open('val/val.txt', 'w') as f: - f.write('test.jpg 0') + im.save("val/test.jpg") + with open("val/val.txt", "w") as f: + f.write("test.jpg 0") @classmethod def tearDownClass(cls): - if os.path.exists('val'): - shutil.rmtree('val') + if os.path.exists("val"): + shutil.rmtree("val") def test_tensorflow(self): dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path': './val', 'image_list':None}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "./val", "image_list": None}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path':'val', 'image_list':'val/val.txt'}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": "val/val.txt"}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break def test_pytorch(self): dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path': 'val', 'image_list':None}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": None}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('pytorch', dataloader_args) + dataloader = create_dataloader("pytorch", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path':'val', 'image_list':'val/val.txt'}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": "val/val.txt"}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('pytorch', dataloader_args) + dataloader = create_dataloader("pytorch", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break - + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet(self): import mxnet as mx + dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path': 'val', 'image_list':None}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": None}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('mxnet', dataloader_args) + dataloader = create_dataloader("mxnet", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path':'val', 'image_list':'val/val.txt'}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": "val/val.txt"}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('mxnet', dataloader_args) + dataloader = create_dataloader("mxnet", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break def test_onnx(self): dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path': 'val', 'image_list':None}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": None}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + dataloader = create_dataloader("onnxrt_integerops", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path':'val', 'image_list':'val/val.txt'}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": "val/val.txt"}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + dataloader = create_dataloader("onnxrt_integerops", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break # test old api - eval_dataset = create_dataset('onnxrt_integerops', {'Imagenet':{'root':'./'}}, None, None) - dataloader = DataLoader('onnxrt_integerops', dataset=eval_dataset, batch_size=1) + eval_dataset = create_dataset("onnxrt_integerops", {"Imagenet": {"root": "./"}}, None, None) + dataloader = DataLoader("onnxrt_integerops", dataset=eval_dataset, batch_size=1) for data in dataloader: - self.assertEqual(data[0][0].shape, (100,100,3)) + self.assertEqual(data[0][0].shape, (100, 100, 3)) break - with open('val/fake_map.txt', 'w') as f: - f.write('test.jpg 0 \n') - f.write('test2.jpg 1') + with open("val/fake_map.txt", "w") as f: + f.write("test.jpg 0 \n") + f.write("test2.jpg 1") dataset_args = { - "ImagenetRaw": {'data_path':'val', 'image_list':'val/fake_map.txt'}, + "ImagenetRaw": {"data_path": "val", "image_list": "val/fake_map.txt"}, } - dataset = create_dataset('onnxrt_integerops', dataset_args, None, None) + dataset = create_dataset("onnxrt_integerops", dataset_args, None, None) self.assertEqual(len(dataset), 1) - with open('val/fake_map.txt', 'w') as f: - f.write('test2.jpg 1') + with open("val/fake_map.txt", "w") as f: + f.write("test2.jpg 1") dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path':'val', 'image_list':'val/fake_map.txt'}}, - 'transform': None, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": "val/fake_map.txt"}}, + "transform": None, + "filter": None, } - self.assertRaises(ValueError, create_dataloader, 'onnxrt_integerops', dataloader_args) + self.assertRaises(ValueError, create_dataloader, "onnxrt_integerops", dataloader_args) - with open('val/not_found_map.txt', 'w') as f: - f.write('test.jpg 0' + '\n') - f.write('not_found.jpg 1') + with open("val/not_found_map.txt", "w") as f: + f.write("test.jpg 0" + "\n") + f.write("not_found.jpg 1") dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path':'val', 'image_list':'val/not_found_map.txt'}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": "val/not_found_map.txt"}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + dataloader = create_dataloader("onnxrt_integerops", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break - with open('val/blank.txt', 'w') as f: - f.write('blank.jpg 0') + with open("val/blank.txt", "w") as f: + f.write("blank.jpg 0") dataloader_args = { - 'dataset': {"ImagenetRaw": {'data_path':'val', 'image_list':'val/blank.txt'}}, - 'transform': None, - 'filter': None + "dataset": {"ImagenetRaw": {"data_path": "val", "image_list": "val/blank.txt"}}, + "transform": None, + "filter": None, } - self.assertRaises(ValueError, create_dataloader, - 'onnxrt_qlinearops', dataloader_args) + self.assertRaises(ValueError, create_dataloader, "onnxrt_qlinearops", dataloader_args) class TestImageFolder(unittest.TestCase): @classmethod def setUpClass(cls): - os.makedirs('val', exist_ok=True) - os.makedirs('val/0', exist_ok=True) - random_array = np.random.random_sample([100,100,3]) * 255 + os.makedirs("val", exist_ok=True) + os.makedirs("val/0", exist_ok=True) + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('val/0/test.jpg') + im.save("val/0/test.jpg") @classmethod def tearDownClass(cls): - if os.path.exists('val'): - shutil.rmtree('val') + if os.path.exists("val"): + shutil.rmtree("val") def test_tensorflow(self): dataloader_args = { - 'dataset': {"ImageFolder": {'root': './val'}}, - 'transform': {'RandomResizedCrop': {'size': 24}}, - 'filter': None + "dataset": {"ImageFolder": {"root": "./val"}}, + "transform": {"RandomResizedCrop": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break def test_pytorch(self): dataloader_args = { - 'dataset': {"ImageFolder": {'root': './val'}}, - 'transform': {'Resize': {'size': 24}, 'ToTensor':{}}, - 'filter': None + "dataset": {"ImageFolder": {"root": "./val"}}, + "transform": {"Resize": {"size": 24}, "ToTensor": {}}, + "filter": None, } - dataloader = create_dataloader('pytorch', dataloader_args) + dataloader = create_dataloader("pytorch", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (3,24,24)) + self.assertEqual(data[0][0].shape, (3, 24, 24)) break @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet(self): dataloader_args = { - 'dataset': {"ImageFolder": {'root': './val'}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImageFolder": {"root": "./val"}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('mxnet', dataloader_args) + dataloader = create_dataloader("mxnet", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break def test_onnx(self): dataloader_args = { - 'dataset': {"ImageFolder": {'root': './val'}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None + "dataset": {"ImageFolder": {"root": "./val"}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, } - dataloader = create_dataloader('onnxrt_integerops', dataloader_args) + dataloader = create_dataloader("onnxrt_integerops", dataloader_args) for data in dataloader: - self.assertEqual(data[0][0].shape, (24,24,3)) + self.assertEqual(data[0][0].shape, (24, 24, 3)) break + class TestDataloader(unittest.TestCase): def test_iterable_dataset(self): class iter_dataset(object): def __iter__(self): for i in range(100): yield np.zeros([256, 256, 3]) + dataset = iter_dataset() - data_loader = DATALOADERS['tensorflow'](dataset) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data.shape, (1, 256, 256, 3)) def test_tensorflow_bert(self): import collections - import tensorflow as tf import json - label = [{ - "paragraphs":[ - {'context': - 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.', - 'qas': [{ - 'answers': [ - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}], - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', - 'id': '56be4db0acb8001400a502ec'}] - } - ] - }] - fake_json = json.dumps({'data': label, 'version': '1.1'}) - with open('dev.json', 'w') as f: + + import tensorflow as tf + + label = [ + { + "paragraphs": [ + { + "context": "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.", + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ], + } + ] + } + ] + fake_json = json.dumps({"data": label, "version": "1.1"}) + with open("dev.json", "w") as f: f.write(fake_json) unique_id = 1000000000 example_index = 0 doc_span_index = 0 - tokens = ['[SEP]', 'super', 'bowl', '50', 'was', 'an', 'american', 'football', 'game', 'to', 'determine', \ - 'the', 'champion', 'of', 'the', 'national', 'football', 'league', '(', 'nfl', ')', 'for', 'the', \ - '2015', 'season', '.', '[SEP]'] - token_to_orig_map = {13: 0, 14: 1, 15: 2, 16: 3, 17: 4, 18: 5, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, \ - 26: 13, 27: 14, 28: 15, 29: 16, 30: 17, 31: 17, 32: 17, 33: 18, 34: 19, 35: 20, 36: 21, 37: 21, \ - 38: 22, 39: 23, 40: 24, 41: 25} - token_is_max_context = {13: True, 14: True, 15: True, 16: True, 17: True, 18: True, 19: True, 20: True, 21: True, 22: True, \ - 23: True, 24: True, 25: True, 26: True, 27: True, 28: True, 29: True, 30: True, 31: True, 32: True, 33: True, 34: True, \ - 35: True, 36: True, 37: True, 38: True, 39: True, 40: True, 41: True} - input_ids = [101, 2029, 5088, 2136, 3421, 1996, 10511, 2012, 3565, 4605, 2753, 1029, 102, 3565, 4605, 2753,\ - 1007, 2005, 1996, 2325, 2161, 1012, 1996, 2137, 2374, 3034, 1006] + tokens = [ + "[SEP]", + "super", + "bowl", + "50", + "was", + "an", + "american", + "football", + "game", + "to", + "determine", + "the", + "champion", + "of", + "the", + "national", + "football", + "league", + "(", + "nfl", + ")", + "for", + "the", + "2015", + "season", + ".", + "[SEP]", + ] + token_to_orig_map = { + 13: 0, + 14: 1, + 15: 2, + 16: 3, + 17: 4, + 18: 5, + 19: 6, + 20: 7, + 21: 8, + 22: 9, + 23: 10, + 24: 11, + 25: 12, + 26: 13, + 27: 14, + 28: 15, + 29: 16, + 30: 17, + 31: 17, + 32: 17, + 33: 18, + 34: 19, + 35: 20, + 36: 21, + 37: 21, + 38: 22, + 39: 23, + 40: 24, + 41: 25, + } + token_is_max_context = { + 13: True, + 14: True, + 15: True, + 16: True, + 17: True, + 18: True, + 19: True, + 20: True, + 21: True, + 22: True, + 23: True, + 24: True, + 25: True, + 26: True, + 27: True, + 28: True, + 29: True, + 30: True, + 31: True, + 32: True, + 33: True, + 34: True, + 35: True, + 36: True, + 37: True, + 38: True, + 39: True, + 40: True, + 41: True, + } + input_ids = [ + 101, + 2029, + 5088, + 2136, + 3421, + 1996, + 10511, + 2012, + 3565, + 4605, + 2753, + 1029, + 102, + 3565, + 4605, + 2753, + 1007, + 2005, + 1996, + 2325, + 2161, + 1012, + 1996, + 2137, + 2374, + 3034, + 1006, + ] input_mask = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] segment_ids = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - with tf.io.TFRecordWriter('./test.record') as writer: + with tf.io.TFRecordWriter("./test.record") as writer: features = collections.OrderedDict() - features["unique_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list([unique_id]))) - features["input_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(input_ids))) - features["input_mask"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(input_mask))) - features["segment_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(segment_ids))) + features["unique_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list([unique_id]))) + features["input_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(input_ids))) + features["input_mask"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(input_mask))) + features["segment_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(segment_ids))) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', - {'bert':{'root':'test.record', 'label_file': './dev.json'}}, - None, - None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) + "tensorflow", {"bert": {"root": "test.record", "label_file": "./dev.json"}}, None, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) for inputs, labels in dataloader: self.assertEqual(inputs[1], 1) self.assertEqual(len(labels), 1) - os.remove('test.record') - os.remove('dev.json') + os.remove("test.record") + os.remove("dev.json") def test_onnx_imagenet(self): - os.makedirs('val', exist_ok=True) - os.makedirs('val/0', exist_ok=True) - random_array = np.random.random_sample([100,100,3]) * 255 + os.makedirs("val", exist_ok=True) + os.makedirs("val/0", exist_ok=True) + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('val/test.jpg') - args = {'ImageFolder': {'root': './val'}} - ds = create_dataset('onnxrt_qlinearops', args, None, None) - dataloader = DATALOADERS['onnxrt_qlinearops'](ds) + im.save("val/test.jpg") + args = {"ImageFolder": {"root": "./val"}} + ds = create_dataset("onnxrt_qlinearops", args, None, None) + dataloader = DATALOADERS["onnxrt_qlinearops"](ds) for image, label in dataloader: - self.assertEqual(image[0].size, (100,100)) - shutil.rmtree('val') + self.assertEqual(image[0].size, (100, 100)) + shutil.rmtree("val") def test_voc_record(self): - import six import collections import collections.abc + + import six import tensorflow as tf + tf.compat.v1.disable_eager_execution() def _bytes_list_feature(values): def norm2bytes(value): return value.encode() if isinstance(value, str) and six.PY3 else value - return tf.train.Feature( - bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) + + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) def _int64_list_feature(values): if not isinstance(values, collections.abc.Iterable): values = [values] return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpg') - random_array = np.random.random_sample([100,100,3]) * 0 + im.save("test.jpg") + random_array = np.random.random_sample([100, 100, 3]) * 0 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.png') - image_data = tf.compat.v1.gfile.GFile('test.jpg', 'rb').read() - seg_data = tf.compat.v1.gfile.GFile('test.png', 'rb').read() - filename = 'test' - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': _bytes_list_feature(image_data), - 'image/filename': _bytes_list_feature(filename), - 'image/format': _bytes_list_feature('png'), - 'image/height': _int64_list_feature(100), - 'image/width': _int64_list_feature(100), - 'image/channels': _int64_list_feature(3), - 'image/segmentation/class/encoded': ( - _bytes_list_feature(seg_data)), - 'image/segmentation/class/format': _bytes_list_feature('png'), - })) - - if not os.path.exists('./test_record'): - os.mkdir('./test_record') - with tf.io.TFRecordWriter('./test_record/val-test.record') as writer: + im.save("test.png") + image_data = tf.compat.v1.gfile.GFile("test.jpg", "rb").read() + seg_data = tf.compat.v1.gfile.GFile("test.png", "rb").read() + filename = "test" + + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": _bytes_list_feature(image_data), + "image/filename": _bytes_list_feature(filename), + "image/format": _bytes_list_feature("png"), + "image/height": _int64_list_feature(100), + "image/width": _int64_list_feature(100), + "image/channels": _int64_list_feature(3), + "image/segmentation/class/encoded": (_bytes_list_feature(seg_data)), + "image/segmentation/class/format": _bytes_list_feature("png"), + } + ) + ) + + if not os.path.exists("./test_record"): + os.mkdir("./test_record") + with tf.io.TFRecordWriter("./test_record/val-test.record") as writer: writer.write(example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', {'VOCRecord':{'root':'./test_record'}}, {'ParseDecodeVoc':{}}, None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) - self.assertEqual(labels[0].shape, (100,100,1)) + "tensorflow", {"VOCRecord": {"root": "./test_record"}}, {"ParseDecodeVoc": {}}, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) + self.assertEqual(labels[0].shape, (100, 100, 1)) - os.remove('./test_record/val-test.record') - os.remove('test.jpg') - os.remove('test.png') - shutil.rmtree('./test_record') + os.remove("./test_record/val-test.record") + os.remove("test.jpg") + os.remove("test.png") + shutil.rmtree("./test_record") def test_coco_record(self): import tensorflow as tf - random_array = np.random.random_sample([100,100,3]) * 255 + + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpeg') - - image = tf.compat.v1.gfile.FastGFile('test.jpeg','rb').read() - source_id = '000000397133.jpg'.encode('utf-8') - label = 'person'.encode('utf-8') - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/object/class/text':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[label])), - 'image/source_id':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[source_id])), - 'image/object/bbox/xmin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/ymin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/xmax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - 'image/object/bbox/ymax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - })) - - with tf.io.TFRecordWriter('test.record') as writer: + im.save("test.jpeg") + + image = tf.compat.v1.gfile.FastGFile("test.jpeg", "rb").read() + source_id = "000000397133.jpg".encode("utf-8") + label = "person".encode("utf-8") + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/object/class/text": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label])), + "image/source_id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[source_id])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + } + ) + ) + + with tf.io.TFRecordWriter("test.record") as writer: writer.write(example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', - {'COCORecord':{'root':'test.record'}}, - {'RandomVerticalFlip': {}, - 'RandomHorizontalFlip': {}, - 'CropResize':{'x':0, 'y':0, 'width':10, 'height':10, 'size':[5,5]}, - 'Transpose':{'perm': [2, 0, 1]} + "tensorflow", + {"COCORecord": {"root": "test.record"}}, + { + "RandomVerticalFlip": {}, + "RandomHorizontalFlip": {}, + "CropResize": {"x": 0, "y": 0, "width": 10, "height": 10, "size": [5, 5]}, + "Transpose": {"perm": [2, 0, 1]}, }, - None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) + None, + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) for inputs, labels in dataloader: - self.assertEqual(inputs.shape, (1,3,5,5)) - self.assertEqual(labels[0].shape, (1,1,4)) - os.remove('test.record') - os.remove('test.jpeg') + self.assertEqual(inputs.shape, (1, 3, 5, 5)) + self.assertEqual(labels[0].shape, (1, 1, 4)) + os.remove("test.record") + os.remove("test.jpeg") def test_coco_record_disable_eager(self): import tensorflow as tf + tf.compat.v1.disable_eager_execution() self.test_coco_record() @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_coco_raw(self): - import json import collections - from neural_compressor.data import TRANSFORMS + import json + import mxnet as mx - random_array = np.random.random_sample([100,100,3]) * 255 + + from neural_compressor.data import TRANSFORMS + + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test_0.jpg') - im.save('test_1.jpg') + im.save("test_0.jpg") + im.save("test_1.jpg") fake_dict = { - 'info': { - 'description': 'COCO 2017 Dataset', - 'url': 'http://cocodataset.org', - 'version': '1.0', - 'year': 2017, - 'contributor': 'COCO Consortium', - 'date_created': '2017/09/01' - }, - 'licenses':{ - - }, - 'images':[{ - 'file_name': 'test_0.jpg', - 'height': 100, - 'width': 100, - 'id': 0 + "info": { + "description": "COCO 2017 Dataset", + "url": "http://cocodataset.org", + "version": "1.0", + "year": 2017, + "contributor": "COCO Consortium", + "date_created": "2017/09/01", }, - { - 'file_name': 'test_1.jpg', - 'height': 100, - 'width': 100, - 'id': 1 - }, - { - 'file_name': 'test_2.jpg', - 'height': 100, - 'width': 100, - 'id': 2 - }], - 'annotations':[{ - 'category_id': 18, - 'id': 1767, - 'iscrowd': 0, - 'image_id': 0, - 'bbox': [473.07, 395.93, 38.65, 28.67], - }, - { - 'category_id': 18, - 'id': 1768, - 'iscrowd': 0, - 'image_id': 1, - 'bbox': [473.07, 395.93, 38.65, 28.67], - }, - { - 'category_id': 18, - 'id': 1769, - 'iscrowd': 0, - 'image_id': 2, - 'bbox': [], - }], - 'categories':[{ - 'supercategory': 'animal', - 'id': 18, - 'name': 'dog' - }] + "licenses": {}, + "images": [ + {"file_name": "test_0.jpg", "height": 100, "width": 100, "id": 0}, + {"file_name": "test_1.jpg", "height": 100, "width": 100, "id": 1}, + {"file_name": "test_2.jpg", "height": 100, "width": 100, "id": 2}, + ], + "annotations": [ + { + "category_id": 18, + "id": 1767, + "iscrowd": 0, + "image_id": 0, + "bbox": [473.07, 395.93, 38.65, 28.67], + }, + { + "category_id": 18, + "id": 1768, + "iscrowd": 0, + "image_id": 1, + "bbox": [473.07, 395.93, 38.65, 28.67], + }, + { + "category_id": 18, + "id": 1769, + "iscrowd": 0, + "image_id": 2, + "bbox": [], + }, + ], + "categories": [{"supercategory": "animal", "id": 18, "name": "dog"}], } fake_json = json.dumps(fake_dict) - with open('anno.json', 'w') as f: + with open("anno.json", "w") as f: f.write(fake_json) - args = {'COCORaw': {'root': './', 'img_dir': '', 'anno_dir': 'anno.json'}} - ds = create_dataset('tensorflow', args, None, None) - dataloader = DATALOADERS['tensorflow'](ds) + args = {"COCORaw": {"root": "./", "img_dir": "", "anno_dir": "anno.json"}} + ds = create_dataset("tensorflow", args, None, None) + dataloader = DATALOADERS["tensorflow"](ds) for image, label in dataloader: - self.assertEqual(image[0].shape, (100,100,3)) + self.assertEqual(image[0].shape, (100, 100, 3)) - trans_args = {'Transpose': {'perm': [2, 0, 1]}} - ds = create_dataset('tensorflow', args, trans_args, None) - dataloader = DATALOADERS['tensorflow'](ds) + trans_args = {"Transpose": {"perm": [2, 0, 1]}} + ds = create_dataset("tensorflow", args, trans_args, None) + dataloader = DATALOADERS["tensorflow"](ds) for image, label in dataloader: - self.assertEqual(image[0].shape, (3,100,100)) + self.assertEqual(image[0].shape, (3, 100, 100)) - args = {'COCORaw': {'root': './', 'img_dir': '', 'anno_dir': 'anno.json'}} - ds = create_dataset('onnxrt_qlinearops', args, None, None) - dataloader = DATALOADERS['onnxrt_qlinearops'](ds) + args = {"COCORaw": {"root": "./", "img_dir": "", "anno_dir": "anno.json"}} + ds = create_dataset("onnxrt_qlinearops", args, None, None) + dataloader = DATALOADERS["onnxrt_qlinearops"](ds) for image, label in dataloader: - self.assertEqual(image[0].shape, (100,100,3)) + self.assertEqual(image[0].shape, (100, 100, 3)) + + args = {"COCORaw": {"root": "./", "img_dir": "", "anno_dir": "anno.json"}} + ds = create_dataset("mxnet", args, None, None) - args = {'COCORaw': {'root': './', 'img_dir': '', 'anno_dir': 'anno.json'}} - ds = create_dataset('mxnet', args, None, None) def collate(batch): elem = batch[0] if isinstance(elem, mx.ndarray.NDArray): @@ -801,12 +912,14 @@ def collate(batch): return np.stack(batch) else: return batch - dataloader = DATALOADERS['mxnet'](ds, collate_fn=collate) + + dataloader = DATALOADERS["mxnet"](ds, collate_fn=collate) for image, label in dataloader: - self.assertEqual(image[0].shape, (100,100,3)) + self.assertEqual(image[0].shape, (100, 100, 3)) + + args = {"COCORaw": {"root": "./", "img_dir": "", "anno_dir": "anno.json"}} + ds = create_dataset("pytorch", args, None, None) - args = {'COCORaw': {'root': './', 'img_dir': '', 'anno_dir': 'anno.json'}} - ds = create_dataset('pytorch', args, None, None) def collate(batch): elem = batch[0] if isinstance(elem, collections.abc.Mapping): @@ -818,30 +931,33 @@ def collate(batch): return np.stack(batch) else: return batch - dataloader = DATALOADERS['pytorch'](dataset=ds, collate_fn=collate) + + dataloader = DATALOADERS["pytorch"](dataset=ds, collate_fn=collate) for image, label in dataloader: - self.assertEqual(image[0].shape, (100,100,3)) + self.assertEqual(image[0].shape, (100, 100, 3)) - os.remove('test_0.jpg') - os.remove('test_1.jpg') - os.remove('anno.json') + os.remove("test_0.jpg") + os.remove("test_1.jpg") + os.remove("anno.json") @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_coco_npy(self): - import json import collections - from neural_compressor.data import TRANSFORMS - import mxnet as mx + import json + import cv2 + import mxnet as mx import numpy as np + from neural_compressor.data import TRANSFORMS + def maybe_resize(img, dims): img = np.array(img, dtype=np.float32) if len(img.shape) < 3 or img.shape[2] != 3: # some images might be grayscale img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - if dims != None: + if dims is not None: im_height, im_width, _ = dims img = cv2.resize(img, (im_width, im_height), interpolation=cv2.INTER_LINEAR) return img @@ -849,101 +965,85 @@ def maybe_resize(img, dims): def convert_npy(src): img = cv2.imread(src) dims = [100, 100, 3] - image_format="NHWC" + image_format = "NHWC" need_transpose = True if image_format == "NCHW" else False img = maybe_resize(img, dims) mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) std = np.array([0.229, 0.224, 0.225], dtype=np.float32) - img = img / 255. - mean + img = img / 255.0 - mean img = img / std if need_transpose: img = img.transpose([2, 0, 1]) return img - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test_0.jpg') - im.save('test_1.jpg') - im_npy = convert_npy('test_0.jpg') - np.save('test_0.jpg.npy', im_npy) - np.save('test_1.jpg.npy', im_npy) + im.save("test_0.jpg") + im.save("test_1.jpg") + im_npy = convert_npy("test_0.jpg") + np.save("test_0.jpg.npy", im_npy) + np.save("test_1.jpg.npy", im_npy) fake_dict = { - 'info': { - 'description': 'COCO 2017 Dataset', - 'url': 'http://cocodataset.org', - 'version': '1.0', - 'year': 2017, - 'contributor': 'COCO Consortium', - 'date_created': '2017/09/01' - }, - 'licenses':{ - - }, - 'images':[{ - 'file_name': 'test_0.jpg', - 'height': 100, - 'width': 100, - 'id': 0 - }, - { - 'file_name': 'test_1.jpg', - 'height': 100, - 'width': 100, - 'id': 1 + "info": { + "description": "COCO 2017 Dataset", + "url": "http://cocodataset.org", + "version": "1.0", + "year": 2017, + "contributor": "COCO Consortium", + "date_created": "2017/09/01", }, - { - 'file_name': 'test_2.jpg', - 'height': 100, - 'width': 100, - 'id': 2 - }], - 'annotations':[{ - 'category_id': 18, - 'id': 1767, - 'iscrowd': 0, - 'image_id': 0, - 'bbox': [473.07, 395.93, 38.65, 28.67], - }, - { - 'category_id': 18, - 'id': 1768, - 'iscrowd': 0, - 'image_id': 1, - 'bbox': [473.07, 395.93, 38.65, 28.67], - }, - { - 'category_id': 18, - 'id': 1769, - 'iscrowd': 0, - 'image_id': 2, - 'bbox': [], - }], - 'categories':[{ - 'supercategory': 'animal', - 'id': 18, - 'name': 'dog' - }] + "licenses": {}, + "images": [ + {"file_name": "test_0.jpg", "height": 100, "width": 100, "id": 0}, + {"file_name": "test_1.jpg", "height": 100, "width": 100, "id": 1}, + {"file_name": "test_2.jpg", "height": 100, "width": 100, "id": 2}, + ], + "annotations": [ + { + "category_id": 18, + "id": 1767, + "iscrowd": 0, + "image_id": 0, + "bbox": [473.07, 395.93, 38.65, 28.67], + }, + { + "category_id": 18, + "id": 1768, + "iscrowd": 0, + "image_id": 1, + "bbox": [473.07, 395.93, 38.65, 28.67], + }, + { + "category_id": 18, + "id": 1769, + "iscrowd": 0, + "image_id": 2, + "bbox": [], + }, + ], + "categories": [{"supercategory": "animal", "id": 18, "name": "dog"}], } fake_json = json.dumps(fake_dict) - with open('anno.json', 'w') as f: + with open("anno.json", "w") as f: f.write(fake_json) - args = {'COCONpy': {'root': './', 'npy_dir': '', 'anno_dir': 'anno.json'}} - ds = create_dataset('tensorflow', args, None, None) - dataloader = DATALOADERS['tensorflow'](ds) + args = {"COCONpy": {"root": "./", "npy_dir": "", "anno_dir": "anno.json"}} + ds = create_dataset("tensorflow", args, None, None) + dataloader = DATALOADERS["tensorflow"](ds) for image, label in dataloader: - self.assertEqual(image[0].shape, (100,100,3)) + self.assertEqual(image[0].shape, (100, 100, 3)) - args = {'COCONpy': {'root': './', 'npy_dir': '', 'anno_dir': 'anno.json'}} - ds = create_dataset('onnxrt_qlinearops', args, None, None) - dataloader = DATALOADERS['onnxrt_qlinearops'](ds) + args = {"COCONpy": {"root": "./", "npy_dir": "", "anno_dir": "anno.json"}} + ds = create_dataset("onnxrt_qlinearops", args, None, None) + dataloader = DATALOADERS["onnxrt_qlinearops"](ds) for image, label in dataloader: - self.assertEqual(image[0].shape, (100,100,3)) + self.assertEqual(image[0].shape, (100, 100, 3)) + + args = {"COCONpy": {"root": "./", "npy_dir": "", "anno_dir": "anno.json"}} + ds = create_dataset("mxnet", args, None, None) - args = {'COCONpy': {'root': './', 'npy_dir': '', 'anno_dir': 'anno.json'}} - ds = create_dataset('mxnet', args, None, None) def collate(batch): elem = batch[0] if isinstance(elem, mx.ndarray.NDArray): @@ -957,12 +1057,14 @@ def collate(batch): return np.stack(batch) else: return batch - dataloader = DATALOADERS['mxnet'](ds, collate_fn=collate) + + dataloader = DATALOADERS["mxnet"](ds, collate_fn=collate) for image, label in dataloader: - self.assertEqual(image[0].shape, (100,100,3)) + self.assertEqual(image[0].shape, (100, 100, 3)) + + args = {"COCONpy": {"root": "./", "npy_dir": "", "anno_dir": "anno.json"}} + ds = create_dataset("pytorch", args, None, None) - args = {'COCONpy': {'root': './', 'npy_dir': '', 'anno_dir': 'anno.json'}} - ds = create_dataset('pytorch', args, None, None) def collate(batch): elem = batch[0] if isinstance(elem, collections.abc.Mapping): @@ -974,168 +1076,159 @@ def collate(batch): return np.stack(batch) else: return batch - dataloader = DATALOADERS['pytorch'](dataset=ds, collate_fn=collate) + + dataloader = DATALOADERS["pytorch"](dataset=ds, collate_fn=collate) for image, label in dataloader: - self.assertEqual(image[0].shape, (100,100,3)) + self.assertEqual(image[0].shape, (100, 100, 3)) - os.remove('test_0.jpg') - os.remove('test_1.jpg') - os.remove('test_0.jpg.npy') - os.remove('test_1.jpg.npy') - os.remove('anno.json') + os.remove("test_0.jpg") + os.remove("test_1.jpg") + os.remove("test_0.jpg.npy") + os.remove("test_1.jpg.npy") + os.remove("anno.json") def test_tensorflow_imagenet_dataset(self): import tensorflow as tf + tf.compat.v1.disable_eager_execution() - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpeg') - - image = tf.compat.v1.gfile.FastGFile('test.jpeg','rb').read() - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/class/label':tf.train.Feature( - int64_list=tf.train.Int64List(value=[1])), - })) + im.save("test.jpeg") + + image = tf.compat.v1.gfile.FastGFile("test.jpeg", "rb").read() + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/class/label": tf.train.Feature(int64_list=tf.train.Int64List(value=[1])), + } + ) + ) - with tf.io.TFRecordWriter('validation-00000-of-00000') as writer: + with tf.io.TFRecordWriter("validation-00000-of-00000") as writer: writer.write(example.SerializeToString()) - eval_dataset = create_dataset( - 'tensorflow', {'ImageRecord':{'root':'./'}}, None, None) + eval_dataset = create_dataset("tensorflow", {"ImageRecord": {"root": "./"}}, None, None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) self.assertEqual(labels.shape, (1, 1)) # test old api - eval_dataset = create_dataset( - 'tensorflow', {'Imagenet':{'root':'./'}}, None, None) - dataloader = DataLoader('tensorflow', dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) + eval_dataset = create_dataset("tensorflow", {"Imagenet": {"root": "./"}}, None, None) + dataloader = DataLoader("tensorflow", dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) self.assertEqual(labels.shape, (1, 1)) - os.remove('validation-00000-of-00000') - os.remove('test.jpeg') + os.remove("validation-00000-of-00000") + os.remove("test.jpeg") def test_pytorch_bert_dataset(self): - dataset = [[ - [101,2043,2001], - [1,1,1], - [[0,0,0,0,0,0,0], - [0,0,0,0,0,0,0], - [0,0,0,0,0,0,0]], - [1,1,1], - [1,1,1], - [[0,0,0,0,0,0,0], - [0,0,0,0,0,0,0], - [0,0,0,0,0,0,0]] - ]] + dataset = [ + [ + [101, 2043, 2001], + [1, 1, 1], + [[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]], + [1, 1, 1], + [1, 1, 1], + [[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]], + ] + ] with self.assertRaises(AssertionError): - create_dataset('pytorch', {'bert': {'dataset':dataset, 'task':'test'}}, - None, None) + create_dataset("pytorch", {"bert": {"dataset": dataset, "task": "test"}}, None, None) ds = create_dataset( - 'pytorch', - {'bert': {'dataset':dataset, 'task':'classifier', 'model_type':'distilbert'}}, - None, None) + "pytorch", {"bert": {"dataset": dataset, "task": "classifier", "model_type": "distilbert"}}, None, None + ) self.assertEqual(len(ds), 1) self.assertEqual(3, len(ds[0][0])) ds = create_dataset( - 'pytorch', - {'bert': {'dataset':dataset, 'task':'classifier', 'model_type':'bert'}}, - None, None) + "pytorch", {"bert": {"dataset": dataset, "task": "classifier", "model_type": "bert"}}, None, None + ) self.assertEqual(4, len(ds[0][0])) - ds = create_dataset( - 'pytorch', {'bert': {'dataset':dataset, 'task':'squad'}}, None, None) + ds = create_dataset("pytorch", {"bert": {"dataset": dataset, "task": "squad"}}, None, None) self.assertEqual(3, len(ds[0][0])) ds = create_dataset( - 'pytorch', - {'bert': {'dataset':dataset, 'task':'squad', 'model_type':'distilbert'}}, - None, None) + "pytorch", {"bert": {"dataset": dataset, "task": "squad", "model_type": "distilbert"}}, None, None + ) self.assertEqual(2, len(ds[0][0])) ds = create_dataset( - 'pytorch', - {'bert': {'dataset':dataset, 'task':'squad', 'model_type':'xlnet'}}, - None, None) + "pytorch", {"bert": {"dataset": dataset, "task": "squad", "model_type": "xlnet"}}, None, None + ) self.assertEqual(5, len(ds[0][0])) def test_tensorflow_dummy(self): - datasets = Datasets('tensorflow') - dataset = datasets['dummy'](shape=(4, 256, 256, 3)) + datasets = Datasets("tensorflow") + dataset = datasets["dummy"](shape=(4, 256, 256, 3)) - data_loader = DATALOADERS['tensorflow'](dataset) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](shape=[(4, 256, 256, 3), (256, 256, 3)]) + dataset = datasets["dummy"](shape=[(4, 256, 256, 3), (256, 256, 3)]) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](shape=(4, 256, 256, 3), low=[1., 0.]) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), low=[1.0, 0.0]) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](shape=(4, 256, 256, 3), high=[128., 127.]) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), high=[128.0, 127.0]) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](shape=(4, 256, 256, 3), dtype=['float32', 'int8']) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), dtype=["float32", "int8"]) def test_tensorflow_dummy_v2(self): - datasets = Datasets('tensorflow') + datasets = Datasets("tensorflow") # test with label - dataset = datasets['dummy_v2'](\ - input_shape=(256, 256, 3), label_shape=(1,)) - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3), label_shape=(1,)) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) self.assertEqual(data[1].shape, (1, 1)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) self.assertEqual(data[1].shape, (2, 1)) # test without label - dataset = datasets['dummy_v2'](input_shape=(256, 256, 3)) - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3)) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data.shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data.shape, (2, 256, 256, 3)) with self.assertRaises(AssertionError): - dataset = datasets['dummy_v2'](\ - input_shape=(256, 256, 3), low=[1., 0.]) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3), low=[1.0, 0.0]) with self.assertRaises(AssertionError): - dataset = datasets['dummy_v2'](\ - input_shape=(256, 256, 3), high=[128., 127.]) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3), high=[128.0, 127.0]) with self.assertRaises(AssertionError): - dataset = datasets['dummy_v2'](\ - input_shape=(256, 256, 3), dtype=['float32', 'int8']) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3), dtype=["float32", "int8"]) def test_tensorflow_sparse_dummy_v2(self): - datasets = Datasets('tensorflow') + datasets = Datasets("tensorflow") # test with label - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=[[10, 20], [5, 3]], label_shape=[[1]], sparse_ratio=[0.98, 0.8]) - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = datasets["sparse_dummy_v2"]( + dense_shape=[[10, 20], [5, 3]], label_shape=[[1]], sparse_ratio=[0.98, 0.8] + ) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0][0][0].shape, (1, 4, 2)) @@ -1143,73 +1236,68 @@ def test_tensorflow_sparse_dummy_v2(self): self.assertEqual(data[0][1].shape, (1, 1)) # test without label - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), sparse_ratio=0.3) - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), sparse_ratio=0.3) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0][0].shape, (1, 137626, 3)) self.assertEqual(data[0][1].shape, (1, 137626)) - + with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), low=[1., 0.]) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), low=[1.0, 0.0]) with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), high=[128., 127.]) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), high=[128.0, 127.0]) with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), dtype=['float32', 'int8']) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), dtype=["float32", "int8"]) with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), dtype=['0.3', '0.5']) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), dtype=["0.3", "0.5"]) with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), label_shape=[[1], [2], [3]]) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), label_shape=[[1], [2], [3]]) def test_tensorflow_dataloader_multi_input(self): import tensorflow as tf + x = tf.data.Dataset.from_tensor_slices((np.random.random(20), np.random.random(20))) y = tf.data.Dataset.from_tensor_slices(np.random.random(20)) dataset = tf.data.Dataset.zip((x, y)) - dataloader = DATALOADERS['tensorflow'](dataset) + dataloader = DATALOADERS["tensorflow"](dataset) for i, (x, y) in enumerate(dataloader): self.assertIsNotNone(x) self.assertIsNotNone(y) break - + def test_style_transfer_dataset(self): - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpg') + im.save("test.jpg") - datasets = Datasets('tensorflow') - dataset = datasets['style_transfer'](content_folder='./', style_folder='./') + datasets = Datasets("tensorflow") + dataset = datasets["style_transfer"](content_folder="./", style_folder="./") length = len(dataset) image, label = dataset[0] self.assertEqual(image[0].shape, (256, 256, 3)) self.assertEqual(image[1].shape, (256, 256, 3)) - os.remove('test.jpg') + os.remove("test.jpg") def test_tensorflow_list_dict(self): - dataset = [{'a':1, 'b':2, 'c':3, 'd':4}, {'a':5, 'b':6, 'c':7, 'd':8}] - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 5, "b": 6, "c": 7, "d": 8}] + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) - self.assertEqual(data, {'a':[1], 'b':[2], 'c':[3], 'd':[4]}) + self.assertEqual(data, {"a": [1], "b": [2], "c": [3], "d": [4]}) # test iterable consistent iterator = iter(data_loader) data = next(iterator) - self.assertEqual(data, {'a':[1], 'b':[2], 'c':[3], 'd':[4]}) + self.assertEqual(data, {"a": [1], "b": [2], "c": [3], "d": [4]}) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) - self.assertEqual(data, {'a':[1, 5], 'b':[2, 6], 'c':[3, 7], 'd':[4, 8]}) + self.assertEqual(data, {"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}) # def test_tensorflow2_dataset(self): # dataset = [[1, 2, 3, 4], [5, 6, 7, 8]] @@ -1223,137 +1311,164 @@ def test_tensorflow_list_dict(self): # self.assertEqual(data[0][1], 2) def test_pytorch_dummy(self): - datasets = Datasets('pytorch') - transform = TRANSFORMS('pytorch', 'preprocess')['Resize'](**{'size':100}) - dataset = datasets['dummy'](shape=[(4, 256, 256, 3), (4, 1)], \ - high=[10., 10.], low=[0., 0.], transform=transform) + datasets = Datasets("pytorch") + transform = TRANSFORMS("pytorch", "preprocess")["Resize"](**{"size": 100}) + dataset = datasets["dummy"]( + shape=[(4, 256, 256, 3), (4, 1)], high=[10.0, 10.0], low=[0.0, 0.0], transform=transform + ) - data_loader = DATALOADERS['pytorch'](dataset) + data_loader = DATALOADERS["pytorch"](dataset) iterator = iter(data_loader) data, label = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data, label = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_dummy(self): - datasets = Datasets('mxnet') - transform = TRANSFORMS('mxnet', 'preprocess')['Resize'](**{'size':100}) - dataset = datasets['dummy'](shape=(4, 256, 256, 3), transform=transform) + datasets = Datasets("mxnet") + transform = TRANSFORMS("mxnet", "preprocess")["Resize"](**{"size": 100}) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), transform=transform) - data_loader = DATALOADERS['mxnet'](dataset) + data_loader = DATALOADERS["mxnet"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) - dataset = datasets['dummy'](shape=(4, 256, 256, 3), label=True) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), label=True) self.assertEqual(dataset[0][1], 0) def test_onnxrt_qlinear_dummy(self): - datasets = Datasets('onnxrt_qlinearops') - transform = TRANSFORMS('onnxrt_qlinearops', 'preprocess')['Resize'](**{'size':100}) - dataset = datasets['dummy'](shape=(4, 256, 256, 3), transform=transform) + datasets = Datasets("onnxrt_qlinearops") + transform = TRANSFORMS("onnxrt_qlinearops", "preprocess")["Resize"](**{"size": 100}) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), transform=transform) - data_loader = DATALOADERS['onnxrt_qlinearops'](dataset) + data_loader = DATALOADERS["onnxrt_qlinearops"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) - dataset = datasets['dummy'](shape=(4, 256, 256, 3), label=False) - data_loader = DATALOADERS['onnxrt_qlinearops'](dataset) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), label=False) + data_loader = DATALOADERS["onnxrt_qlinearops"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(len(data), 1) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](\ - shape=[(4, 256, 256, 3), (4, 256, 256, 3)], dtype=['float32', 'int8', 'int8']) + dataset = datasets["dummy"](shape=[(4, 256, 256, 3), (4, 256, 256, 3)], dtype=["float32", "int8", "int8"]) def test_onnx_integer_dummy(self): - datasets = Datasets('onnxrt_integerops') - dataset = datasets['dummy'](shape=(4, 256, 256, 3)) + datasets = Datasets("onnxrt_integerops") + dataset = datasets["dummy"](shape=(4, 256, 256, 3)) - data_loader = DATALOADERS['onnxrt_integerops'](dataset) + data_loader = DATALOADERS["onnxrt_integerops"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) def test_onnx_bert(self): import csv - os.mkdir('./MRPC') - with open('./MRPC/msr_paraphrase_test.txt', 'a') as f: - f.write('Quality #1 ID #2 ID #1 String #2 String\n') - f.write("1 1089874 1089925 PCCW 's chief operating officer , Mike Butcher , and Alex Arena , the chief financial officer , will report directly to Mr So . Current Chief Operating Officer Mike Butcher and Group Chief Financial Officer Alex Arena will report to So .") - with open('./MRPC/msr_paraphrase_train.txt', 'a') as f: - f.write('Quality #1 ID #2 ID #1 String #2 String\n') - f.write("""1 702876 702977 Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence . Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .""") - with open('./MRPC/dev.tsv', 'a') as f: - tsv_w = csv.writer(f, delimiter='\t') - tsv_w.writerow(['Quality', '#1 ID', '#2 ID', '#1 String', '#2 String']) - tsv_w.writerow(['1', '1355540', '1355592', "He said the foodservice pie business doesn 'tfit thecompany 's long-term growth strategy .", "The foodservice pie businessdoes notfit our long-term growth strategy ."]) - with open('./MRPC/dev_ids.tsv', 'a') as f: - tsv_w = csv.writer(f, delimiter='\t') - tsv_w.writerow(['1606495', '1606619']) - with open('./MRPC/test.tsv', 'a') as f: - tsv_w = csv.writer(f, delimiter='\t') - tsv_w.writerow(['index', '#1 ID', '#2 ID', '#1 String', '#2 String']) - tsv_w.writerow(['0', '1089874', '1089925', "PCCW 's chief operating officer , Mike Butcher , and Alex Arena , the chief financial officer , will report directly to Mr So .", "Current Chief Operating Officer Mike Butcher and Group Chief Financial Officer Alex Arena will report to So ."]) - with open('./MRPC/train.tsv', 'a') as f: - tsv_w = csv.writer(f, delimiter='\t') - tsv_w.writerow(['Quality', '#1 ID', '#2 ID', '#1 String', '#2 String']) - tsv_w.writerow(['1', '702876', '702977', """Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .""", """Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence ."""]) - - datasets = Datasets('onnxrt_integerops') - args = {'GLUE': - {'data_dir': './MRPC', - 'model_name_or_path': 'bert-base-uncased', - 'dynamic_length': True - }} - ds = create_dataset('onnxrt_qlinearops', args, None, None) - - ds = create_dataset('onnxrt_qlinearops', args, None, None) - dataloader = DATALOADERS['onnxrt_qlinearops'](ds) + + os.mkdir("./MRPC") + with open("./MRPC/msr_paraphrase_test.txt", "a") as f: + f.write("Quality #1 ID #2 ID #1 String #2 String\n") + f.write( + "1 1089874 1089925 PCCW 's chief operating officer , Mike Butcher , and Alex Arena , the chief financial officer , will report directly to Mr So . Current Chief Operating Officer Mike Butcher and Group Chief Financial Officer Alex Arena will report to So ." + ) + with open("./MRPC/msr_paraphrase_train.txt", "a") as f: + f.write("Quality #1 ID #2 ID #1 String #2 String\n") + f.write( + """1 702876 702977 Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence . Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .""" + ) + with open("./MRPC/dev.tsv", "a") as f: + tsv_w = csv.writer(f, delimiter="\t") + tsv_w.writerow(["Quality", "#1 ID", "#2 ID", "#1 String", "#2 String"]) + tsv_w.writerow( + [ + "1", + "1355540", + "1355592", + "He said the foodservice pie business doesn 'tfit thecompany 's long-term growth strategy .", + "The foodservice pie businessdoes notfit our long-term growth strategy .", + ] + ) + with open("./MRPC/dev_ids.tsv", "a") as f: + tsv_w = csv.writer(f, delimiter="\t") + tsv_w.writerow(["1606495", "1606619"]) + with open("./MRPC/test.tsv", "a") as f: + tsv_w = csv.writer(f, delimiter="\t") + tsv_w.writerow(["index", "#1 ID", "#2 ID", "#1 String", "#2 String"]) + tsv_w.writerow( + [ + "0", + "1089874", + "1089925", + "PCCW 's chief operating officer , Mike Butcher , and Alex Arena , the chief financial officer , will report directly to Mr So .", + "Current Chief Operating Officer Mike Butcher and Group Chief Financial Officer Alex Arena will report to So .", + ] + ) + with open("./MRPC/train.tsv", "a") as f: + tsv_w = csv.writer(f, delimiter="\t") + tsv_w.writerow(["Quality", "#1 ID", "#2 ID", "#1 String", "#2 String"]) + tsv_w.writerow( + [ + "1", + "702876", + "702977", + """Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .""", + """Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .""", + ] + ) + + datasets = Datasets("onnxrt_integerops") + args = {"GLUE": {"data_dir": "./MRPC", "model_name_or_path": "bert-base-uncased", "dynamic_length": True}} + ds = create_dataset("onnxrt_qlinearops", args, None, None) + + ds = create_dataset("onnxrt_qlinearops", args, None, None) + dataloader = DATALOADERS["onnxrt_qlinearops"](ds) for inputs, label in dataloader: self.assertEqual(len(inputs), 3) self.assertEqual(inputs[0].shape[1], 48) self.assertEqual(len(label), 1) break - shutil.rmtree('./dataset_cached') - - args = {'GLUE': - {'data_dir': './MRPC', - 'model_type': 'roberta', - 'model_name_or_path': 'roberta-base', - 'dynamic_length': False - }} - ds = create_dataset('onnxrt_qlinearops', args, None, None) - dataloader = DATALOADERS['onnxrt_qlinearops'](ds) + shutil.rmtree("./dataset_cached") + + args = { + "GLUE": { + "data_dir": "./MRPC", + "model_type": "roberta", + "model_name_or_path": "roberta-base", + "dynamic_length": False, + } + } + ds = create_dataset("onnxrt_qlinearops", args, None, None) + dataloader = DATALOADERS["onnxrt_qlinearops"](ds) for inputs, label in dataloader: self.assertEqual(len(inputs), 2) self.assertEqual(inputs[0].shape[1], 128) self.assertEqual(len(label), 1) break - shutil.rmtree('./MRPC') - shutil.rmtree('./dataset_cached') + shutil.rmtree("./MRPC") + shutil.rmtree("./dataset_cached") if __name__ == "__main__": diff --git a/test/data/test_exp_dataloader.py b/test/data/test_exp_dataloader.py index 021cccb2292..4561f38e4b1 100644 --- a/test/data/test_exp_dataloader.py +++ b/test/data/test_exp_dataloader.py @@ -1,94 +1,94 @@ """Tests for the dataloader module.""" +import os import platform +import shutil import unittest -import os + import numpy as np -import shutil -from neural_compressor.utils.create_obj_from_config import create_dataset, create_dataloader -from neural_compressor.experimental.data import DATALOADERS -from neural_compressor.experimental.data import Datasets, DATALOADERS, TRANSFORMS from PIL import Image +from neural_compressor.experimental.data import DATALOADERS, TRANSFORMS, Datasets +from neural_compressor.utils.create_obj_from_config import create_dataloader, create_dataset + + class TestDataloader(unittest.TestCase): def test_iterable_dataset(self): class iter_dataset(object): def __iter__(self): for i in range(100): yield np.zeros([256, 256, 3]) + dataset = iter_dataset() - data_loader = DATALOADERS['tensorflow'](dataset) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data.shape, (1, 256, 256, 3)) def test_tensorflow_dummy(self): - datasets = Datasets('tensorflow') - dataset = datasets['dummy'](shape=(4, 256, 256, 3)) + datasets = Datasets("tensorflow") + dataset = datasets["dummy"](shape=(4, 256, 256, 3)) - data_loader = DATALOADERS['tensorflow'](dataset) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](shape=[(4, 256, 256, 3), (256, 256, 3)]) + dataset = datasets["dummy"](shape=[(4, 256, 256, 3), (256, 256, 3)]) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](shape=(4, 256, 256, 3), low=[1., 0.]) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), low=[1.0, 0.0]) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](shape=(4, 256, 256, 3), high=[128., 127.]) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), high=[128.0, 127.0]) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](shape=(4, 256, 256, 3), dtype=['float32', 'int8']) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), dtype=["float32", "int8"]) def test_tensorflow_dummy_v2(self): - datasets = Datasets('tensorflow') + datasets = Datasets("tensorflow") # test with label - dataset = datasets['dummy_v2'](\ - input_shape=(256, 256, 3), label_shape=(1,)) - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3), label_shape=(1,)) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) self.assertEqual(data[1].shape, (1, 1)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) self.assertEqual(data[1].shape, (2, 1)) # test without label - dataset = datasets['dummy_v2'](input_shape=(256, 256, 3)) - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3)) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data.shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data.shape, (2, 256, 256, 3)) with self.assertRaises(AssertionError): - dataset = datasets['dummy_v2'](\ - input_shape=(256, 256, 3), low=[1., 0.]) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3), low=[1.0, 0.0]) with self.assertRaises(AssertionError): - dataset = datasets['dummy_v2'](\ - input_shape=(256, 256, 3), high=[128., 127.]) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3), high=[128.0, 127.0]) with self.assertRaises(AssertionError): - dataset = datasets['dummy_v2'](\ - input_shape=(256, 256, 3), dtype=['float32', 'int8']) + dataset = datasets["dummy_v2"](input_shape=(256, 256, 3), dtype=["float32", "int8"]) def test_tensorflow_sparse_dummy_v2(self): - datasets = Datasets('tensorflow') + datasets = Datasets("tensorflow") # test with label - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=[[10, 20], [5, 3]], label_shape=[[1]], sparse_ratio=[0.98, 0.8]) - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = datasets["sparse_dummy_v2"]( + dense_shape=[[10, 20], [5, 3]], label_shape=[[1]], sparse_ratio=[0.98, 0.8] + ) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0][0][0].shape, (1, 4, 2)) @@ -96,145 +96,139 @@ def test_tensorflow_sparse_dummy_v2(self): self.assertEqual(data[0][1].shape, (1, 1)) # test without label - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), sparse_ratio=0.3) - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), sparse_ratio=0.3) + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0][0].shape, (1, 137626, 3)) self.assertEqual(data[0][1].shape, (1, 137626)) - + with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), low=[1., 0.]) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), low=[1.0, 0.0]) with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), high=[128., 127.]) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), high=[128.0, 127.0]) with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), dtype=['float32', 'int8']) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), dtype=["float32", "int8"]) with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), dtype=['0.3', '0.5']) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), dtype=["0.3", "0.5"]) with self.assertRaises(AssertionError): - dataset = datasets['sparse_dummy_v2'](\ - dense_shape=(256, 256, 3), label_shape=[[1], [2], [3]]) + dataset = datasets["sparse_dummy_v2"](dense_shape=(256, 256, 3), label_shape=[[1], [2], [3]]) def test_tensorflow_dataloader_multi_input(self): import tensorflow as tf + x = tf.data.Dataset.from_tensor_slices((np.random.random(20), np.random.random(20))) y = tf.data.Dataset.from_tensor_slices(np.random.random(20)) dataset = tf.data.Dataset.zip((x, y)) - dataloader = DATALOADERS['tensorflow'](dataset) + dataloader = DATALOADERS["tensorflow"](dataset) for i, (x, y) in enumerate(dataloader): self.assertIsNotNone(x) self.assertIsNotNone(y) break - + def test_style_transfer_dataset(self): - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpg') + im.save("test.jpg") - datasets = Datasets('tensorflow') - dataset = datasets['style_transfer'](content_folder='./', style_folder='./') + datasets = Datasets("tensorflow") + dataset = datasets["style_transfer"](content_folder="./", style_folder="./") length = len(dataset) image, label = dataset[0] self.assertEqual(image[0].shape, (256, 256, 3)) self.assertEqual(image[1].shape, (256, 256, 3)) - os.remove('test.jpg') + os.remove("test.jpg") def test_tensorflow_list_dict(self): - dataset = [{'a':1, 'b':2, 'c':3, 'd':4}, {'a':5, 'b':6, 'c':7, 'd':8}] - data_loader = DATALOADERS['tensorflow'](dataset) + dataset = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 5, "b": 6, "c": 7, "d": 8}] + data_loader = DATALOADERS["tensorflow"](dataset) iterator = iter(data_loader) data = next(iterator) - self.assertEqual(data, {'a':[1], 'b':[2], 'c':[3], 'd':[4]}) + self.assertEqual(data, {"a": [1], "b": [2], "c": [3], "d": [4]}) # test iterable consistent iterator = iter(data_loader) data = next(iterator) - self.assertEqual(data, {'a':[1], 'b':[2], 'c':[3], 'd':[4]}) + self.assertEqual(data, {"a": [1], "b": [2], "c": [3], "d": [4]}) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) - self.assertEqual(data, {'a':[1, 5], 'b':[2, 6], 'c':[3, 7], 'd':[4, 8]}) - + self.assertEqual(data, {"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}) def test_pytorch_dummy(self): - datasets = Datasets('pytorch') - transform = TRANSFORMS('pytorch', 'preprocess')['Resize'](**{'size':100}) - dataset = datasets['dummy'](shape=[(4, 256, 256, 3), (4, 1)], \ - high=[10., 10.], low=[0., 0.], transform=transform) + datasets = Datasets("pytorch") + transform = TRANSFORMS("pytorch", "preprocess")["Resize"](**{"size": 100}) + dataset = datasets["dummy"]( + shape=[(4, 256, 256, 3), (4, 1)], high=[10.0, 10.0], low=[0.0, 0.0], transform=transform + ) - data_loader = DATALOADERS['pytorch'](dataset) + data_loader = DATALOADERS["pytorch"](dataset) iterator = iter(data_loader) data, label = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data, label = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_dummy(self): - datasets = Datasets('mxnet') - transform = TRANSFORMS('mxnet', 'preprocess')['Resize'](**{'size':100}) - dataset = datasets['dummy'](shape=(4, 256, 256, 3), transform=transform) + datasets = Datasets("mxnet") + transform = TRANSFORMS("mxnet", "preprocess")["Resize"](**{"size": 100}) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), transform=transform) - data_loader = DATALOADERS['mxnet'](dataset) + data_loader = DATALOADERS["mxnet"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) - dataset = datasets['dummy'](shape=(4, 256, 256, 3), label=True) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), label=True) self.assertEqual(dataset[0][1], 0) def test_onnxrt_qlinear_dummy(self): - datasets = Datasets('onnxrt_qlinearops') - transform = TRANSFORMS('onnxrt_qlinearops', 'preprocess')['Resize'](**{'size':100}) - dataset = datasets['dummy'](shape=(4, 256, 256, 3), transform=transform) + datasets = Datasets("onnxrt_qlinearops") + transform = TRANSFORMS("onnxrt_qlinearops", "preprocess")["Resize"](**{"size": 100}) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), transform=transform) - data_loader = DATALOADERS['onnxrt_qlinearops'](dataset) + data_loader = DATALOADERS["onnxrt_qlinearops"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) - dataset = datasets['dummy'](shape=(4, 256, 256, 3), label=False) - data_loader = DATALOADERS['onnxrt_qlinearops'](dataset) + dataset = datasets["dummy"](shape=(4, 256, 256, 3), label=False) + data_loader = DATALOADERS["onnxrt_qlinearops"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(len(data), 1) with self.assertRaises(AssertionError): - dataset = datasets['dummy'](\ - shape=[(4, 256, 256, 3), (4, 256, 256, 3)], dtype=['float32', 'int8', 'int8']) + dataset = datasets["dummy"](shape=[(4, 256, 256, 3), (4, 256, 256, 3)], dtype=["float32", "int8", "int8"]) def test_onnx_integer_dummy(self): - datasets = Datasets('onnxrt_integerops') - dataset = datasets['dummy'](shape=(4, 256, 256, 3)) + datasets = Datasets("onnxrt_integerops") + dataset = datasets["dummy"](shape=(4, 256, 256, 3)) - data_loader = DATALOADERS['onnxrt_integerops'](dataset) + data_loader = DATALOADERS["onnxrt_integerops"](dataset) iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (1, 256, 256, 3)) # dynamic batching - data_loader.batch(batch_size=2, last_batch='rollover') + data_loader.batch(batch_size=2, last_batch="rollover") iterator = iter(data_loader) data = next(iterator) self.assertEqual(data[0].shape, (2, 256, 256, 3)) diff --git a/test/data/test_exp_transformers.py b/test/data/test_exp_transformers.py index cbdbc224dd0..591b02051e6 100644 --- a/test/data/test_exp_transformers.py +++ b/test/data/test_exp_transformers.py @@ -1,42 +1,47 @@ """Tests for the transform module.""" -import numpy as np +import os +import platform import random import unittest -import platform -import os -from neural_compressor.experimental.data import TRANSFORMS, DATALOADERS -from neural_compressor.utils.create_obj_from_config import get_postprocess, create_dataset -from neural_compressor.utils.utility import LazyImport + +import numpy as np from PIL import Image -mx = LazyImport('mxnet') -tf = LazyImport('tensorflow') -torch = LazyImport('torch') -torchvision = LazyImport('torchvision') + +from neural_compressor.experimental.data import DATALOADERS, TRANSFORMS +from neural_compressor.utils.create_obj_from_config import create_dataset, get_postprocess +from neural_compressor.utils.utility import LazyImport + +mx = LazyImport("mxnet") +tf = LazyImport("tensorflow") +torch = LazyImport("torch") +torchvision = LazyImport("torchvision") random.seed(1) np.random.seed(1) + class TestMetrics(unittest.TestCase): def test_tensorflow_2(self): image = np.ones([256, 256, 1]) - resize_kwargs = {"size":[224, 224]} + resize_kwargs = {"size": [224, 224]} transforms = TRANSFORMS(framework="tensorflow", process="preprocess") - resize = transforms['Resize'](**resize_kwargs) + resize = transforms["Resize"](**resize_kwargs) random_crop_kwargs = {"size": 128} - random_crop = transforms['RandomCrop'](**random_crop_kwargs) + random_crop = transforms["RandomCrop"](**random_crop_kwargs) transform_list = [resize, random_crop] - compose = transforms['Compose'](transform_list) + compose = transforms["Compose"](transform_list) image_result = compose((image, None)) self.assertEqual(image_result[0].shape, (128, 128)) + class TestONNXQLImagenetTransform(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img = np.random.random_sample([600,600])*255 + cls.img = np.random.random_sample([600, 600]) * 255 def testResizeCropImagenetTransform(self): - transforms = TRANSFORMS('onnxrt_qlinearops', "preprocess") - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=True) + transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") + transform = transforms["ResizeCropImagenet"](height=224, width=224, random_crop=True) sample = (self.img, 0) result = transform(sample) resized_input = result[0] @@ -44,14 +49,15 @@ def testResizeCropImagenetTransform(self): self.assertEqual(len(resized_input[0]), 224) self.assertEqual(len(resized_input[0][0]), 224) + class TestONNXITImagenetTransform(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img = np.random.random_sample([600,600,3])*255 + cls.img = np.random.random_sample([600, 600, 3]) * 255 def testResizeCropImagenetTransform(self): - transforms = TRANSFORMS('onnxrt_integerops', "preprocess") - transform = transforms['ResizeCropImagenet'](height=224, width=224) + transforms = TRANSFORMS("onnxrt_integerops", "preprocess") + transform = transforms["ResizeCropImagenet"](height=224, width=224) sample = (self.img, 0) result = transform(sample) resized_input = result[0] @@ -60,8 +66,8 @@ def testResizeCropImagenetTransform(self): self.assertEqual(len(resized_input[0][0]), 224) def testResizeWithAspectRatio(self): - transforms = TRANSFORMS('onnxrt_integerops', "preprocess") - transform = transforms['ResizeWithAspectRatio'](height=224, width=224) + transforms = TRANSFORMS("onnxrt_integerops", "preprocess") + transform = transforms["ResizeWithAspectRatio"](height=224, width=224) sample = (self.img, 0) result = transform(sample) resized_input = result[0] @@ -69,12 +75,14 @@ def testResizeWithAspectRatio(self): self.assertEqual(len(resized_input[0]), 256) self.assertEqual(len(resized_input[0][0]), 3) + class TestTensorflowImagenetTransform(unittest.TestCase): tf.compat.v1.disable_v2_behavior() + def testBilinearImagenetTransform(self): - transforms = TRANSFORMS('tensorflow', "preprocess") - transform = transforms['BilinearImagenet'](height=224, width=224) - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "preprocess") + transform = transforms["BilinearImagenet"](height=224, width=224) + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 0) result = transform(sample) resized_input = result[0].eval(session=tf.compat.v1.Session()) @@ -82,54 +90,73 @@ def testBilinearImagenetTransform(self): self.assertEqual(len(resized_input[0]), 224) self.assertEqual(len(resized_input[0][0]), 3) - transforms = TRANSFORMS('onnxrt_qlinearops', "preprocess") - transform = transforms['BilinearImagenet'](height=224, width=224) - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") + transform = transforms["BilinearImagenet"](height=224, width=224) + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 0) result = transform(sample) self.assertEqual(len(resized_input), 224) self.assertEqual(len(resized_input[0]), 224) self.assertEqual(len(resized_input[0][0]), 3) - + def testResizeCropImagenetTransform1(self): - transforms = TRANSFORMS('tensorflow', "preprocess") - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "preprocess") + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 0) - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=True, - random_flip_left_right=True) + transform = transforms["ResizeCropImagenet"]( + height=224, width=224, random_crop=True, random_flip_left_right=True + ) result = transform(sample) resized_input = result[0].eval(session=tf.compat.v1.Session()) self.assertEqual(len(resized_input), 224) self.assertEqual(len(resized_input[0]), 224) self.assertEqual(len(resized_input[0][0]), 3) - @unittest.skipIf(tf.version.VERSION < '2.5.0', "Skip tf.experimental.numpy.moveaxis") + @unittest.skipIf(tf.version.VERSION < "2.5.0", "Skip tf.experimental.numpy.moveaxis") def testResizeCropImagenetTransform2(self): - transforms = TRANSFORMS('tensorflow', "preprocess") - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "preprocess") + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 0) - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=False, - random_flip_left_right=False, data_format='channels_last', subpixels='RGB') + transform = transforms["ResizeCropImagenet"]( + height=224, + width=224, + random_crop=False, + random_flip_left_right=False, + data_format="channels_last", + subpixels="RGB", + ) result = transform(sample) resized_input1 = result[0].eval(session=tf.compat.v1.Session()) - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=False, - random_flip_left_right=False, data_format='channels_last', subpixels='BGR') - result = transform(sample) + transform = transforms["ResizeCropImagenet"]( + height=224, + width=224, + random_crop=False, + random_flip_left_right=False, + data_format="channels_last", + subpixels="BGR", + ) + result = transform(sample) resized_input2 = result[0].eval(session=tf.compat.v1.Session()) - self.assertTrue((resized_input1[...,0]==resized_input2[...,-1]).all()) - - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=False, - random_flip_left_right=False, data_format='channels_first', subpixels='BGR') + self.assertTrue((resized_input1[..., 0] == resized_input2[..., -1]).all()) + + transform = transforms["ResizeCropImagenet"]( + height=224, + width=224, + random_crop=False, + random_flip_left_right=False, + data_format="channels_first", + subpixels="BGR", + ) rand_input = np.moveaxis(rand_input, -1, 0) sample = (rand_input, 0) - result = transform(sample) + result = transform(sample) resized_input3 = result[0].eval(session=tf.compat.v1.Session()) - self.assertTrue((resized_input1[...,0]==resized_input3[...,-1]).all()) + self.assertTrue((resized_input1[..., 0] == resized_input3[..., -1]).all()) def testLabelShift(self): - transforms = TRANSFORMS('tensorflow', "postprocess") - transform = transforms['LabelShift'](label_shift=1) - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "postprocess") + transform = transforms["LabelShift"](label_shift=1) + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 1001) label = transform(sample)[1] self.assertEqual(label, 1000) @@ -137,1024 +164,1040 @@ def testLabelShift(self): self.assertTrue(isinstance(label, np.int64) or isinstance(label, np.int32)) else: self.assertTrue(isinstance(label, np.int32)) - - label = transform((rand_input, [(1,2,3)]))[1] + + label = transform((rand_input, [(1, 2, 3)]))[1] self.assertTrue(isinstance(label, list)) self.assertTrue(isinstance(label[0], tuple)) - label = transform((rand_input, [[1,2,3]]))[1] + label = transform((rand_input, [[1, 2, 3]]))[1] self.assertTrue(isinstance(label, list)) self.assertTrue(isinstance(label[0], list)) - label = transform((rand_input, [np.array([1,2,3])]))[1] + label = transform((rand_input, [np.array([1, 2, 3])]))[1] self.assertTrue(isinstance(label, list)) self.assertTrue(isinstance(label[0], np.ndarray)) - def testQuantizedInput(self): - transforms = TRANSFORMS('tensorflow', "preprocess") - transform = transforms['QuantizedInput'](dtype='uint8', scale=100) - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "preprocess") + transform = transforms["QuantizedInput"](dtype="uint8", scale=100) + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 1001) result = transform(sample) quantized_input = result[0].eval(session=tf.compat.v1.Session()) self.assertLessEqual(quantized_input.max(), 255) self.assertGreaterEqual(quantized_input.min(), 0) - transform = transforms['QuantizedInput'](dtype='uint8') + transform = transforms["QuantizedInput"](dtype="uint8") sample = (rand_input, 1001) result = transform(sample) quantized_input = result[0] self.assertLessEqual(quantized_input.max(), 1) self.assertGreaterEqual(quantized_input.min(), 0) + class TestDataConversion(unittest.TestCase): @classmethod def setUpClass(cls): if platform.system().lower() == "windows": cls.skipTest(cls, "not support mxnet on windows yet") - cls.img = np.random.random_sample([10,10,3])*255 - cls.mx_trans = TRANSFORMS('mxnet', 'preprocess') - cls.pt_trans = TRANSFORMS('pytorch', 'preprocess') - + cls.img = np.random.random_sample([10, 10, 3]) * 255 + cls.mx_trans = TRANSFORMS("mxnet", "preprocess") + cls.pt_trans = TRANSFORMS("pytorch", "preprocess") + def testToPILImage(self): - trans = TestDataConversion.pt_trans['ToPILImage']() + trans = TestDataConversion.pt_trans["ToPILImage"]() image, _ = trans((TestDataConversion.img.astype(np.uint8), None)) self.assertTrue(isinstance(image, Image.Image)) def testToTensor(self): - trans = TestDataConversion.pt_trans['ToTensor']() + trans = TestDataConversion.pt_trans["ToTensor"]() image, _ = trans((TestDataConversion.img.astype(np.uint8), None)) self.assertTrue(isinstance(image, torch.Tensor)) - trans = TestDataConversion.mx_trans['ToTensor']() + trans = TestDataConversion.mx_trans["ToTensor"]() image, _ = trans((mx.nd.array(TestDataConversion.img), None)) - self.assertTrue(isinstance(image, mx.ndarray.NDArray)) # pylint: disable=no-member + self.assertTrue(isinstance(image, mx.ndarray.NDArray)) # pylint: disable=no-member def testToNDArray(self): - trans = TestDataConversion.mx_trans['ToNDArray']() + trans = TestDataConversion.mx_trans["ToNDArray"]() image, _ = trans((TestDataConversion.img.astype(np.uint8), None)) self.assertTrue(isinstance(image, mx.ndarray.NDArray)) + class TestSameTransfoms(unittest.TestCase): @classmethod def setUpClass(cls): if platform.system().lower() == "windows": cls.skipTest(cls, "not support mxnet on windows yet") - cls.img = np.random.random_sample([10,10,3])*255 - cls.tf_trans = TRANSFORMS('tensorflow', 'preprocess') - cls.pt_trans = TRANSFORMS('pytorch', 'preprocess') - cls.mx_trans = TRANSFORMS('mxnet', 'preprocess') - cls.ox_trans = TRANSFORMS('onnxrt_qlinearops', 'preprocess') + cls.img = np.random.random_sample([10, 10, 3]) * 255 + cls.tf_trans = TRANSFORMS("tensorflow", "preprocess") + cls.pt_trans = TRANSFORMS("pytorch", "preprocess") + cls.mx_trans = TRANSFORMS("mxnet", "preprocess") + cls.ox_trans = TRANSFORMS("onnxrt_qlinearops", "preprocess") cls.mx_img = mx.nd.array(cls.img.astype(np.uint8)) cls.pt_img = Image.fromarray(cls.img.astype(np.uint8)) cls.tf_img = tf.constant(cls.img) - _ = TRANSFORMS('tensorflow', 'postprocess') - _ = TRANSFORMS('pytorch', 'postprocess') - _ = TRANSFORMS('mxnet', 'postprocess') - _ = TRANSFORMS('onnxrt_qlinearops' , 'postprocess') - _ = TRANSFORMS('onnxrt_integerops', 'postprocess') + _ = TRANSFORMS("tensorflow", "postprocess") + _ = TRANSFORMS("pytorch", "postprocess") + _ = TRANSFORMS("mxnet", "postprocess") + _ = TRANSFORMS("onnxrt_qlinearops", "postprocess") + _ = TRANSFORMS("onnxrt_integerops", "postprocess") def testCast(self): - args = {'dtype': 'int64'} - tf_func = TestSameTransfoms.tf_trans['Cast'](**args) + args = {"dtype": "int64"} + tf_func = TestSameTransfoms.tf_trans["Cast"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result[0][0][0].dtype, 'int64') + self.assertEqual(tf_result[0][0][0].dtype, "int64") tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result[0][0][0].dtype, 'int64') - mx_func = TestSameTransfoms.mx_trans['Cast'](**args) + self.assertEqual(tf_result[0][0][0].dtype, "int64") + mx_func = TestSameTransfoms.mx_trans["Cast"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None)) self.assertEqual(mx_result[0][0][0].dtype, np.int64) - ox_func = TestSameTransfoms.ox_trans['Cast'](**args) + ox_func = TestSameTransfoms.ox_trans["Cast"](**args) ox_result = ox_func((TestSameTransfoms.img, None)) - self.assertEqual(ox_result[0][0][0].dtype, 'int64') + self.assertEqual(ox_result[0][0][0].dtype, "int64") - totensor = TestSameTransfoms.pt_trans['ToTensor']() - cast = TestSameTransfoms.pt_trans['Cast'](**args) - pt_func = TestSameTransfoms.pt_trans['Compose']([totensor, cast]) + totensor = TestSameTransfoms.pt_trans["ToTensor"]() + cast = TestSameTransfoms.pt_trans["Cast"](**args) + pt_func = TestSameTransfoms.pt_trans["Compose"]([totensor, cast]) pt_result = pt_func((TestSameTransfoms.pt_img, None)) self.assertEqual(pt_result[0][0][0].dtype, torch.int64) def testCropToBoundingBox(self): - args = {'offset_height':2, 'offset_width':2, 'target_height':5, 'target_width':5} - pt_func = TestSameTransfoms.pt_trans['CropToBoundingBox'](**args) + args = {"offset_height": 2, "offset_width": 2, "target_height": 5, "target_width": 5} + pt_func = TestSameTransfoms.pt_trans["CropToBoundingBox"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - self.assertEqual(pt_result.size, (5,5)) + self.assertEqual(pt_result.size, (5, 5)) - ox_func = TestSameTransfoms.ox_trans['CropToBoundingBox'](**args) + ox_func = TestSameTransfoms.ox_trans["CropToBoundingBox"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertEqual(ox_result.shape, (5,5,3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) - mx_func = TestSameTransfoms.mx_trans['CropToBoundingBox'](**args) + mx_func = TestSameTransfoms.mx_trans["CropToBoundingBox"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(mx_result.shape, (5,5,3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) - tf_func = TestSameTransfoms.tf_trans['CropToBoundingBox'](**args) + tf_func = TestSameTransfoms.tf_trans["CropToBoundingBox"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (5,5,3)) - + self.assertEqual(tf_result.shape, (5, 5, 3)) + def testNormalize(self): args = {} - normalize = TestSameTransfoms.pt_trans['Normalize'](**args) - totensor = TestSameTransfoms.pt_trans['ToTensor']() - pt_func = TestSameTransfoms.pt_trans['Compose']([totensor, normalize]) + normalize = TestSameTransfoms.pt_trans["Normalize"](**args) + totensor = TestSameTransfoms.pt_trans["ToTensor"]() + pt_func = TestSameTransfoms.pt_trans["Compose"]([totensor, normalize]) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - self.assertEqual(TestSameTransfoms.img.astype( - np.uint8)[0][0][0]/255., pt_result[0][0][0]) - args = {'std': [0.]} + self.assertEqual(TestSameTransfoms.img.astype(np.uint8)[0][0][0] / 255.0, pt_result[0][0][0]) + args = {"std": [0.0]} with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['Normalize'](**args) + TestSameTransfoms.pt_trans["Normalize"](**args) def testRescale(self): - ox_func = TestSameTransfoms.ox_trans['Rescale']() + ox_func = TestSameTransfoms.ox_trans["Rescale"]() ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertAlmostEqual(ox_result[1][2][0], TestSameTransfoms.img[1][2][0]/255.) + self.assertAlmostEqual(ox_result[1][2][0], TestSameTransfoms.img[1][2][0] / 255.0) def testTranspose(self): - args = {'perm': [2, 0, 1]} - tf_func = TestSameTransfoms.tf_trans['Transpose'](**args) + args = {"perm": [2, 0, 1]} + tf_func = TestSameTransfoms.tf_trans["Transpose"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - ox_func = TestSameTransfoms.ox_trans['Transpose'](**args) + ox_func = TestSameTransfoms.ox_trans["Transpose"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['Transpose'](**args) + mx_func = TestSameTransfoms.mx_trans["Transpose"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - pt_transpose = TestSameTransfoms.pt_trans['Transpose'](**args) - pt_totensor = TestSameTransfoms.pt_trans['ToTensor']() - pt_compose = TestSameTransfoms.pt_trans['Compose']([pt_totensor, pt_transpose]) + pt_transpose = TestSameTransfoms.pt_trans["Transpose"](**args) + pt_totensor = TestSameTransfoms.pt_trans["ToTensor"]() + pt_compose = TestSameTransfoms.pt_trans["Compose"]([pt_totensor, pt_transpose]) pt_result = pt_compose((TestSameTransfoms.pt_img, None))[0] - - self.assertEqual(tf_result.shape, (3,10,10)) - self.assertEqual(ox_result.shape, (3,10,10)) - self.assertEqual(mx_result.shape, (3,10,10)) - self.assertEqual(pt_result.shape, (10,3,10)) + + self.assertEqual(tf_result.shape, (3, 10, 10)) + self.assertEqual(ox_result.shape, (3, 10, 10)) + self.assertEqual(mx_result.shape, (3, 10, 10)) + self.assertEqual(pt_result.shape, (10, 3, 10)) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (3,10,10)) - + self.assertEqual(tf_result.shape, (3, 10, 10)) + def testCenterCrop(self): - args = {'size':[4,4]} - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + args = {"size": [4, 4]} + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['CenterCrop'](**args) + pt_func = TestSameTransfoms.pt_trans["CenterCrop"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CenterCrop'](**args) + mx_func = TestSameTransfoms.mx_trans["CenterCrop"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(pt_result.size, (4,4)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(pt_result.size, (4, 4)) + self.assertEqual(mx_result.shape, (4, 4, 3)) self.assertEqual(np.array(pt_result)[0][0][0], mx_result.asnumpy()[0][0][0]) self.assertEqual(np.array(pt_result)[0][0][0], int(tf_result[0][0][0])) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) - tf_result = tf_func((tf.constant(TestSameTransfoms.img.reshape((1,10,10,3))), None))[0] + tf_result = tf_func((tf.constant(TestSameTransfoms.img.reshape((1, 10, 10, 3))), None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (1,4,4,3)) + self.assertEqual(tf_result.shape, (1, 4, 4, 3)) - args = {'size':4} - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + args = {"size": 4} + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['CenterCrop'](**args) + pt_func = TestSameTransfoms.pt_trans["CenterCrop"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CenterCrop'](**args) + mx_func = TestSameTransfoms.mx_trans["CenterCrop"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(pt_result.size, (4,4)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(pt_result.size, (4, 4)) + self.assertEqual(mx_result.shape, (4, 4, 3)) self.assertEqual(np.array(pt_result)[0][0][0], mx_result.asnumpy()[0][0][0]) self.assertEqual(np.array(pt_result)[0][0][0], int(tf_result[0][0][0])) - - args = {'size':[4]} - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + + args = {"size": [4]} + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) with self.assertRaises(ValueError): - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((np.array([[TestSameTransfoms.img]]), None)) with self.assertRaises(ValueError): - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) - tf_result = tf_func((tf.constant(TestSameTransfoms.img.reshape((1,1,10,10,3))), None)) + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) + tf_result = tf_func((tf.constant(TestSameTransfoms.img.reshape((1, 1, 10, 10, 3))), None)) - args = {'size':[20]} + args = {"size": [20]} with self.assertRaises(ValueError): - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None)) with self.assertRaises(ValueError): - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.tf_img, None)) def testResizeWithRatio(self): - args = {'padding': True} - label = [[0.1,0.1,0.5,0.5], [], [], []] - tf_func = TestSameTransfoms.tf_trans['ResizeWithRatio'](**args) + args = {"padding": True} + label = [[0.1, 0.1, 0.5, 0.5], [], [], []] + tf_func = TestSameTransfoms.tf_trans["ResizeWithRatio"](**args) tf_result = tf_func((TestSameTransfoms.img, label))[0] - self.assertEqual(tf_result.shape, (1365,1365,3)) - - args = {'padding': False} - tf_func = TestSameTransfoms.tf_trans['ResizeWithRatio'](**args) + self.assertEqual(tf_result.shape, (1365, 1365, 3)) + + args = {"padding": False} + tf_func = TestSameTransfoms.tf_trans["ResizeWithRatio"](**args) tf_result = tf_func((TestSameTransfoms.img, label))[0] - self.assertTrue((tf_result.shape[0]==800 or tf_result.shape[1] ==1365)) - + self.assertTrue((tf_result.shape[0] == 800 or tf_result.shape[1] == 1365)) + def testResize(self): - tf_func = TestSameTransfoms.tf_trans['Resize'](**{'size':[4,5]}) + tf_func = TestSameTransfoms.tf_trans["Resize"](**{"size": [4, 5]}) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['Resize'](**{'size':[4,5]}) + pt_func = TestSameTransfoms.pt_trans["Resize"](**{"size": [4, 5]}) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['Resize'](**{'size':[4,5]}) + mx_func = TestSameTransfoms.mx_trans["Resize"](**{"size": [4, 5]}) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (5,4,3)) - self.assertEqual(pt_result.size, (5,4)) - self.assertEqual(mx_result.shape, (4,5,3)) + self.assertEqual(tf_result.shape, (5, 4, 3)) + self.assertEqual(pt_result.size, (5, 4)) + self.assertEqual(mx_result.shape, (4, 5, 3)) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (4,5,3)) + self.assertEqual(tf_result.shape, (4, 5, 3)) - args = {'size': 4} - tf_func = TestSameTransfoms.tf_trans['Resize'](**args) + args = {"size": 4} + tf_func = TestSameTransfoms.tf_trans["Resize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['Resize'](**args) + pt_func = TestSameTransfoms.pt_trans["Resize"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['Resize'](**args) + mx_func = TestSameTransfoms.mx_trans["Resize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(pt_result.size, (4,4)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(pt_result.size, (4, 4)) + self.assertEqual(mx_result.shape, (4, 4, 3)) - args = {'size': [4]} - tf_func = TestSameTransfoms.tf_trans['Resize'](**args) + args = {"size": [4]} + tf_func = TestSameTransfoms.tf_trans["Resize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['Resize'](**args) + mx_func = TestSameTransfoms.mx_trans["Resize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(mx_result.shape, (4, 4, 3)) - args = {'size': 4, 'interpolation':'test'} + args = {"size": 4, "interpolation": "test"} with self.assertRaises(ValueError): - TestSameTransfoms.tf_trans['Resize'](**args) + TestSameTransfoms.tf_trans["Resize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['Resize'](**args) + TestSameTransfoms.pt_trans["Resize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.mx_trans['Resize'](**args) - + TestSameTransfoms.mx_trans["Resize"](**args) + def testRandomResizedCrop(self): - tf_func = TestSameTransfoms.tf_trans['RandomResizedCrop'](**{'size':[4,5]}) + tf_func = TestSameTransfoms.tf_trans["RandomResizedCrop"](**{"size": [4, 5]}) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['RandomResizedCrop'](**{'size':[4,5]}) + pt_func = TestSameTransfoms.pt_trans["RandomResizedCrop"](**{"size": [4, 5]}) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['RandomResizedCrop'](**{'size':[4,5]}) + mx_func = TestSameTransfoms.mx_trans["RandomResizedCrop"](**{"size": [4, 5]}) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (5,4,3)) - self.assertEqual(pt_result.size, (5,4)) - self.assertEqual(mx_result.shape, (4,5,3)) - + self.assertEqual(tf_result.shape, (5, 4, 3)) + self.assertEqual(pt_result.size, (5, 4)) + self.assertEqual(mx_result.shape, (4, 5, 3)) + tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (4,5,3)) + self.assertEqual(tf_result.shape, (4, 5, 3)) - args = {'size': [4]} - tf_func = TestSameTransfoms.tf_trans['RandomResizedCrop'](**args) + args = {"size": [4]} + tf_func = TestSameTransfoms.tf_trans["RandomResizedCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - mx_func = TestSameTransfoms.mx_trans['RandomResizedCrop'](**args) + self.assertEqual(tf_result.shape, (4, 4, 3)) + mx_func = TestSameTransfoms.mx_trans["RandomResizedCrop"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(mx_result.shape, (4, 4, 3)) - args = {'size': 4} - tf_func = TestSameTransfoms.tf_trans['RandomResizedCrop'](**args) + args = {"size": 4} + tf_func = TestSameTransfoms.tf_trans["RandomResizedCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['RandomResizedCrop'](**args) + pt_func = TestSameTransfoms.pt_trans["RandomResizedCrop"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['RandomResizedCrop'](**args) + mx_func = TestSameTransfoms.mx_trans["RandomResizedCrop"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(pt_result.size, (4,4)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(pt_result.size, (4, 4)) + self.assertEqual(mx_result.shape, (4, 4, 3)) - args = {'size': 4, 'scale':(0.8, 0.2)} + args = {"size": 4, "scale": (0.8, 0.2)} with self.assertRaises(ValueError): - TestSameTransfoms.tf_trans['RandomResizedCrop'](**args) + TestSameTransfoms.tf_trans["RandomResizedCrop"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['RandomResizedCrop'](**args) + TestSameTransfoms.pt_trans["RandomResizedCrop"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.mx_trans['RandomResizedCrop'](**args) - - args = {'size': 4, 'interpolation':'test'} + TestSameTransfoms.mx_trans["RandomResizedCrop"](**args) + + args = {"size": 4, "interpolation": "test"} with self.assertRaises(ValueError): - TestSameTransfoms.tf_trans['RandomResizedCrop'](**args) + TestSameTransfoms.tf_trans["RandomResizedCrop"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['RandomResizedCrop'](**args) + TestSameTransfoms.pt_trans["RandomResizedCrop"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.mx_trans['RandomResizedCrop'](**args) + TestSameTransfoms.mx_trans["RandomResizedCrop"](**args) def testCropResize(self): - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':[5,5]} - tf_func = TestSameTransfoms.tf_trans['CropResize'](**args) + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": [5, 5]} + tf_func = TestSameTransfoms.tf_trans["CropResize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CropResize'](**args) + mx_func = TestSameTransfoms.mx_trans["CropResize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - ox_func = TestSameTransfoms.ox_trans['CropResize'](**args) + ox_func = TestSameTransfoms.ox_trans["CropResize"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['CropResize'](**args) + pt_func = TestSameTransfoms.pt_trans["CropResize"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) - self.assertEqual(mx_result.shape, (5,5,3)) - self.assertEqual(ox_result.shape, (5,5,3)) - self.assertEqual(pt_result.size, (5,5)) + self.assertEqual(tf_result.shape, (5, 5, 3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) + self.assertEqual(pt_result.size, (5, 5)) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':5} - tf_func = TestSameTransfoms.tf_trans['CropResize'](**args) + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": 5} + tf_func = TestSameTransfoms.tf_trans["CropResize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CropResize'](**args) + mx_func = TestSameTransfoms.mx_trans["CropResize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - ox_func = TestSameTransfoms.ox_trans['CropResize'](**args) + ox_func = TestSameTransfoms.ox_trans["CropResize"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) - self.assertEqual(mx_result.shape, (5,5,3)) - self.assertEqual(ox_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':[5]} - tf_func = TestSameTransfoms.tf_trans['CropResize'](**args) + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": [5]} + tf_func = TestSameTransfoms.tf_trans["CropResize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CropResize'](**args) + mx_func = TestSameTransfoms.mx_trans["CropResize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - ox_func = TestSameTransfoms.ox_trans['CropResize'](**args) + ox_func = TestSameTransfoms.ox_trans["CropResize"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) - self.assertEqual(mx_result.shape, (5,5,3)) - self.assertEqual(ox_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':[5,5]} - tf_func = TestSameTransfoms.tf_trans['CropResize'](**args) + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": [5, 5]} + tf_func = TestSameTransfoms.tf_trans["CropResize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CropResize'](**args) + mx_func = TestSameTransfoms.mx_trans["CropResize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - ox_func = TestSameTransfoms.ox_trans['CropResize'](**args) + ox_func = TestSameTransfoms.ox_trans["CropResize"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) - self.assertEqual(mx_result.shape, (5,5,3)) - self.assertEqual(ox_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':5, 'interpolation':'test'} + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": 5, "interpolation": "test"} with self.assertRaises(ValueError): - TestSameTransfoms.ox_trans['CropResize'](**args) + TestSameTransfoms.ox_trans["CropResize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.mx_trans['CropResize'](**args) + TestSameTransfoms.mx_trans["CropResize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.tf_trans['CropResize'](**args) + TestSameTransfoms.tf_trans["CropResize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['CropResize'](**args) + TestSameTransfoms.pt_trans["CropResize"](**args) def testRandomHorizontalFlip(self): - tf_func = TestSameTransfoms.tf_trans['RandomHorizontalFlip']() + tf_func = TestSameTransfoms.tf_trans["RandomHorizontalFlip"]() tf_result = tf_func((TestSameTransfoms.img, None))[0] - ox_func = TestSameTransfoms.ox_trans['RandomHorizontalFlip']() + ox_func = TestSameTransfoms.ox_trans["RandomHorizontalFlip"]() ox_result = ox_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['RandomHorizontalFlip']() + pt_func = TestSameTransfoms.pt_trans["RandomHorizontalFlip"]() pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['RandomHorizontalFlip']() + mx_func = TestSameTransfoms.mx_trans["RandomHorizontalFlip"]() mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] self.assertTrue( - (np.array(TestSameTransfoms.pt_img) == np.array(pt_result)).all() or - (np.fliplr(np.array(TestSameTransfoms.pt_img)) == np.array(pt_result)).all() + (np.array(TestSameTransfoms.pt_img) == np.array(pt_result)).all() + or (np.fliplr(np.array(TestSameTransfoms.pt_img)) == np.array(pt_result)).all() ) self.assertTrue( - (TestSameTransfoms.img == tf_result).all() or - (np.fliplr(TestSameTransfoms.img) == tf_result).all() + (TestSameTransfoms.img == tf_result).all() or (np.fliplr(TestSameTransfoms.img) == tf_result).all() ) self.assertTrue( - (TestSameTransfoms.img == ox_result).all() or - (np.fliplr(TestSameTransfoms.img) == ox_result).all() + (TestSameTransfoms.img == ox_result).all() or (np.fliplr(TestSameTransfoms.img) == ox_result).all() ) self.assertTrue( - (TestSameTransfoms.mx_img.asnumpy() == mx_result.asnumpy()).all() or - (np.fliplr(TestSameTransfoms.mx_img.asnumpy()) == mx_result.asnumpy()).all() + (TestSameTransfoms.mx_img.asnumpy() == mx_result.asnumpy()).all() + or (np.fliplr(TestSameTransfoms.mx_img.asnumpy()) == mx_result.asnumpy()).all() ) - + tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) self.assertTrue( - (TestSameTransfoms.img == tf_result).all() or - (np.fliplr(TestSameTransfoms.img) == tf_result).all() + (TestSameTransfoms.img == tf_result).all() or (np.fliplr(TestSameTransfoms.img) == tf_result).all() ) def testRandomVerticalFlip(self): - tf_func = TestSameTransfoms.tf_trans['RandomVerticalFlip']() + tf_func = TestSameTransfoms.tf_trans["RandomVerticalFlip"]() tf_result = tf_func((TestSameTransfoms.img, None))[0] - ox_func = TestSameTransfoms.ox_trans['RandomVerticalFlip']() + ox_func = TestSameTransfoms.ox_trans["RandomVerticalFlip"]() ox_result = ox_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['RandomVerticalFlip']() + pt_func = TestSameTransfoms.pt_trans["RandomVerticalFlip"]() pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['RandomVerticalFlip']() + mx_func = TestSameTransfoms.mx_trans["RandomVerticalFlip"]() mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] self.assertTrue( - (np.array(TestSameTransfoms.pt_img) == np.array(pt_result)).all() or - (np.flipud(np.array(TestSameTransfoms.pt_img)) == np.array(pt_result)).all() + (np.array(TestSameTransfoms.pt_img) == np.array(pt_result)).all() + or (np.flipud(np.array(TestSameTransfoms.pt_img)) == np.array(pt_result)).all() ) self.assertTrue( - (TestSameTransfoms.img == tf_result).all() or - (np.flipud(TestSameTransfoms.img) == tf_result).all() + (TestSameTransfoms.img == tf_result).all() or (np.flipud(TestSameTransfoms.img) == tf_result).all() ) self.assertTrue( - (TestSameTransfoms.img == ox_result).all() or - (np.flipud(TestSameTransfoms.img) == ox_result).all() + (TestSameTransfoms.img == ox_result).all() or (np.flipud(TestSameTransfoms.img) == ox_result).all() ) self.assertTrue( - (TestSameTransfoms.mx_img.asnumpy() == mx_result.asnumpy()).all() or - (np.flipud(TestSameTransfoms.mx_img.asnumpy()) == mx_result.asnumpy()).all() + (TestSameTransfoms.mx_img.asnumpy() == mx_result.asnumpy()).all() + or (np.flipud(TestSameTransfoms.mx_img.asnumpy()) == mx_result.asnumpy()).all() ) - + tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) self.assertTrue( - (TestSameTransfoms.img == tf_result).all() or - (np.flipud(TestSameTransfoms.img) == tf_result).all() + (TestSameTransfoms.img == tf_result).all() or (np.flipud(TestSameTransfoms.img) == tf_result).all() ) + class TestTFTransorm(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img = np.ones([10,10,3]) + cls.img = np.ones([10, 10, 3]) cls.tf_img = tf.constant(cls.img) - cls.transforms = TRANSFORMS('tensorflow', 'preprocess') + cls.transforms = TRANSFORMS("tensorflow", "preprocess") cls.tf_img = tf.constant(cls.img) def testRandomCrop(self): - args = {'size': [50]} - transform = TestTFTransorm.transforms['RandomCrop'](**args) + args = {"size": [50]} + transform = TestTFTransorm.transforms["RandomCrop"](**args) self.assertRaises(ValueError, transform, (TestTFTransorm.img, None)) self.assertRaises(ValueError, transform, (TestTFTransorm.tf_img, None)) - - args = {'size': [5, 5]} - transform = TestTFTransorm.transforms['RandomCrop'](**args) + + args = {"size": [5, 5]} + transform = TestTFTransorm.transforms["RandomCrop"](**args) img_result = transform((TestTFTransorm.img, None))[0] - self.assertEqual(img_result.shape, (5,5,3)) - tf_result = transform((tf.constant(TestTFTransorm.img.reshape((1,10,10,3))), None))[0] + self.assertEqual(img_result.shape, (5, 5, 3)) + tf_result = transform((tf.constant(TestTFTransorm.img.reshape((1, 10, 10, 3))), None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (1,5,5,3)) + self.assertEqual(tf_result.shape, (1, 5, 5, 3)) - args = {'size': [10,10]} - transform = TestTFTransorm.transforms['RandomCrop'](**args) + args = {"size": [10, 10]} + transform = TestTFTransorm.transforms["RandomCrop"](**args) img_result = transform((TestTFTransorm.img, None))[0] - self.assertEqual(img_result.shape, (10,10,3)) + self.assertEqual(img_result.shape, (10, 10, 3)) tf_result = transform((TestTFTransorm.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (10,10,3)) + self.assertEqual(tf_result.shape, (10, 10, 3)) def testPaddedCenterCrop(self): - args = {'size':[4,4]} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": [4, 4]} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (10,10,3)) + self.assertEqual(tf_result.shape, (10, 10, 3)) - args = {'size':[4,4], 'crop_padding': 4} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": [4, 4], "crop_padding": 4} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) - args = {'size':4} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": 4} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (10,10,3)) + self.assertEqual(tf_result.shape, (10, 10, 3)) - args = {'size':4, 'crop_padding':4} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": 4, "crop_padding": 4} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) - args = {'size':[4]} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": [4]} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (10,10,3)) - - args = {'size':[4], 'crop_padding':4} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + self.assertEqual(tf_result.shape, (10, 10, 3)) + + args = {"size": [4], "crop_padding": 4} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) - args = {'size':[4,5], 'crop_padding':4} + args = {"size": [4, 5], "crop_padding": 4} with self.assertRaises(ValueError): - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None)) def testRescale(self): - transform = TestTFTransorm.transforms['Rescale']() + transform = TestTFTransorm.transforms["Rescale"]() img_result = transform((TestTFTransorm.img, None))[0] - comp_result = np.array(TestTFTransorm.img)/255. + comp_result = np.array(TestTFTransorm.img) / 255.0 self.assertAlmostEqual(img_result[0][0][0], comp_result[0][0][0], places=5) tf_result = transform((TestTFTransorm.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertAlmostEqual(tf_result[0][0][0], comp_result[0][0][0], places=5) + self.assertAlmostEqual(tf_result[0][0][0], comp_result[0][0][0], places=5) def testNormalize(self): - args = {'mean':[0.0,0.0,0.0], 'std':[0.2, 0.5, 0.1]} - normalize = TestTFTransorm.transforms['Normalize'](**args) + args = {"mean": [0.0, 0.0, 0.0], "std": [0.2, 0.5, 0.1]} + normalize = TestTFTransorm.transforms["Normalize"](**args) img_result = normalize((TestTFTransorm.img, None))[0] - comp_result = np.array(TestTFTransorm.img)/[0.2, 0.5, 0.1] + comp_result = np.array(TestTFTransorm.img) / [0.2, 0.5, 0.1] self.assertAlmostEqual(img_result[0][0][0], comp_result[0][0][0], places=5) self.assertAlmostEqual(img_result[0][0][1], comp_result[0][0][1], places=5) self.assertAlmostEqual(img_result[0][0][2], comp_result[0][0][2], places=5) - + tf_result = normalize((TestTFTransorm.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) self.assertAlmostEqual(tf_result[0][0][0], comp_result[0][0][0], places=5) - args = {'mean':[0.0,0.0,0.0], 'std':[0, 0, 0]} + args = {"mean": [0.0, 0.0, 0.0], "std": [0, 0, 0]} with self.assertRaises(ValueError): TestTFTransorm.transforms["Normalize"](**args) def testRandomResizedCrop(self): - args = {'size':[50]} + args = {"size": [50]} randomresizedcrop = TestTFTransorm.transforms["RandomResizedCrop"](**args) - compose = TestTFTransorm.transforms['Compose']([randomresizedcrop]) + compose = TestTFTransorm.transforms["Compose"]([randomresizedcrop]) image_result = compose((TestTFTransorm.img, None))[0] - self.assertEqual(image_result.shape, (50,50,3)) - args = {'size':[100, 100]} + self.assertEqual(image_result.shape, (50, 50, 3)) + args = {"size": [100, 100]} randomresizedcrop = TestTFTransorm.transforms["RandomResizedCrop"](**args) - compose = TestTFTransorm.transforms['Compose']([randomresizedcrop]) + compose = TestTFTransorm.transforms["Compose"]([randomresizedcrop]) image_result = compose((TestTFTransorm.img, None))[0] - self.assertEqual(image_result.shape, (100,100,3)) + self.assertEqual(image_result.shape, (100, 100, 3)) tf_result = randomresizedcrop((TestTFTransorm.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (100,100,3)) - args = {'size':[100, 100], 'scale':(0.8, 0.1)} + self.assertEqual(tf_result.shape, (100, 100, 3)) + args = {"size": [100, 100], "scale": (0.8, 0.1)} with self.assertRaises(ValueError): TestTFTransorm.transforms["RandomResizedCrop"](**args) def testSquadV1(self): - import urllib import json - vocab_url = "https://raw.githubusercontent.com/microsoft/SDNet/master/bert_vocab_files/bert-large-uncased-vocab.txt" + import urllib + + vocab_url = ( + "https://raw.githubusercontent.com/microsoft/SDNet/master/bert_vocab_files/bert-large-uncased-vocab.txt" + ) urllib.request.urlretrieve(vocab_url, "./vocab.txt") - label = [{ - "paragraphs":[ - {'context': - 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.', - 'qas': [{ - 'answers': [ - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}], - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', - 'id': '56be4db0acb8001400a502ec'}] - } - ] - }] - fake_json = json.dumps({'data': label}) - with open('dev.json', 'w') as f: + label = [ + { + "paragraphs": [ + { + "context": "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.", + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ], + } + ] + } + ] + fake_json = json.dumps({"data": label}) + with open("dev.json", "w") as f: f.write(fake_json) - args = { - 'label_file': './dev.json', - 'vocab_file': './vocab.txt' - } - post_transforms = TRANSFORMS('tensorflow', 'postprocess') - squadv1 = post_transforms['SquadV1'](**args) - + args = {"label_file": "./dev.json", "vocab_file": "./vocab.txt"} + post_transforms = TRANSFORMS("tensorflow", "postprocess") + squadv1 = post_transforms["SquadV1"](**args) + preds_0 = np.array([1000000000]) - preds_1 = np.random.uniform(low=-12.3, high=6.8, size=(1,384)) - preds_2 = np.random.uniform(low=-10.8, high=7.4, size=(1,384)) + preds_1 = np.random.uniform(low=-12.3, high=6.8, size=(1, 384)) + preds_2 = np.random.uniform(low=-10.8, high=7.4, size=(1, 384)) preds = [preds_0, preds_1, preds_2] result = squadv1((preds, label)) - self.assertTrue(result[1][0]['paragraphs'][0]['qas'][0]['id'] in result[0]) - os.remove('dev.json') - os.remove('vocab.txt') - + self.assertTrue(result[1][0]["paragraphs"][0]["qas"][0]["id"] in result[0]) + os.remove("dev.json") + os.remove("vocab.txt") + + class TestAlignImageChannel(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img1 = np.random.random_sample([100,100,3]) * 255 - cls.img2 = np.random.random_sample([100,100]) * 255 - cls.img3 = np.random.random_sample([100,100,4]) * 255 + cls.img1 = np.random.random_sample([100, 100, 3]) * 255 + cls.img2 = np.random.random_sample([100, 100]) * 255 + cls.img3 = np.random.random_sample([100, 100, 4]) * 255 cls.pt_img1 = Image.fromarray(cls.img1.astype(np.uint8)) cls.pt_img2 = Image.fromarray(cls.img2.astype(np.uint8)) cls.pt_img3 = Image.fromarray(cls.img3.astype(np.uint8)) - + def testTensorflow(self): - transforms = TRANSFORMS('tensorflow', 'preprocess') - align = transforms['AlignImageChannel'](**{'dim':1}) + transforms = TRANSFORMS("tensorflow", "preprocess") + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img1.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':1}) + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img2.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':3}) + align = transforms["AlignImageChannel"](**{"dim": 3}) image, _ = align((TestAlignImageChannel.img3.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 3) - align = transforms['AlignImageChannel'](**{'dim':2}) - self.assertRaises(ValueError, align, - (TestAlignImageChannel.img1.astype(np.uint8), None)) + align = transforms["AlignImageChannel"](**{"dim": 2}) + self.assertRaises(ValueError, align, (TestAlignImageChannel.img1.astype(np.uint8), None)) with self.assertRaises(ValueError): - transforms['AlignImageChannel'](**{'dim':5}) + transforms["AlignImageChannel"](**{"dim": 5}) def testONNX(self): - transforms = TRANSFORMS('onnxrt_qlinearops', 'preprocess') - align = transforms['AlignImageChannel'](**{'dim':1}) + transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img1.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':1}) + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img2.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':3}) + align = transforms["AlignImageChannel"](**{"dim": 3}) image, _ = align((TestAlignImageChannel.img3.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 3) - align = transforms['AlignImageChannel'](**{'dim':2}) - self.assertRaises(ValueError, align, - (TestAlignImageChannel.img1.astype(np.uint8), None)) + align = transforms["AlignImageChannel"](**{"dim": 2}) + self.assertRaises(ValueError, align, (TestAlignImageChannel.img1.astype(np.uint8), None)) with self.assertRaises(ValueError): - transforms['AlignImageChannel'](**{'dim':5}) + transforms["AlignImageChannel"](**{"dim": 5}) def testPyTorch(self): - transforms = TRANSFORMS('pytorch', 'preprocess') - align = transforms['AlignImageChannel'](**{'dim':1}) + transforms = TRANSFORMS("pytorch", "preprocess") + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.pt_img1, None)) - self.assertEqual(image.mode, 'L') + self.assertEqual(image.mode, "L") - align = transforms['AlignImageChannel'](**{'dim':1}) + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.pt_img2, None)) - self.assertEqual(image.mode, 'L') + self.assertEqual(image.mode, "L") - align = transforms['AlignImageChannel'](**{'dim':3}) + align = transforms["AlignImageChannel"](**{"dim": 3}) image, _ = align((TestAlignImageChannel.pt_img3, None)) - self.assertEqual(image.mode, 'RGB') + self.assertEqual(image.mode, "RGB") with self.assertRaises(ValueError): - align = transforms['AlignImageChannel'](**{'dim':2}) + align = transforms["AlignImageChannel"](**{"dim": 2}) with self.assertRaises(ValueError): - transforms['AlignImageChannel'](**{'dim':5}) + transforms["AlignImageChannel"](**{"dim": 5}) @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def testMXNet(self): - transforms = TRANSFORMS('mxnet', 'preprocess') - align = transforms['AlignImageChannel'](**{'dim':1}) + transforms = TRANSFORMS("mxnet", "preprocess") + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img1.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':1}) + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img2.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':3}) + align = transforms["AlignImageChannel"](**{"dim": 3}) image, _ = align((TestAlignImageChannel.img3.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 3) - align = transforms['AlignImageChannel'](**{'dim':2}) - self.assertRaises(ValueError, align, - (TestAlignImageChannel.img1.astype(np.uint8), None)) + align = transforms["AlignImageChannel"](**{"dim": 2}) + self.assertRaises(ValueError, align, (TestAlignImageChannel.img1.astype(np.uint8), None)) with self.assertRaises(ValueError): - transforms['AlignImageChannel'](**{'dim':5}) + transforms["AlignImageChannel"](**{"dim": 5}) + class TestToArray(unittest.TestCase): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def testParse(self): - random_array = np.random.random_sample([10,10,3]) * 255 + random_array = np.random.random_sample([10, 10, 3]) * 255 random_array = random_array.astype(np.uint8) img1 = Image.fromarray(random_array) - onnx_transforms = TRANSFORMS('onnxrt_qlinearops', 'preprocess') - onnx_parse = onnx_transforms['ToArray']() + onnx_transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") + onnx_parse = onnx_transforms["ToArray"]() img, _ = onnx_parse((img1, None)) self.assertTrue(isinstance(img, np.ndarray)) - mxnet_transforms = TRANSFORMS('mxnet', 'preprocess') - mxnet_parse = mxnet_transforms['ToArray']() + mxnet_transforms = TRANSFORMS("mxnet", "preprocess") + mxnet_parse = mxnet_transforms["ToArray"]() img, _ = mxnet_parse((mx.nd.array(random_array), None)) self.assertTrue(isinstance(img, np.ndarray)) - self.assertRaises(ValueError, mxnet_parse, ([1,2], None)) + self.assertRaises(ValueError, mxnet_parse, ([1, 2], None)) + class TestMXNetTransform(unittest.TestCase): @classmethod def setUpClass(cls): if platform.system().lower() == "windows": cls.skipTest(cls, "not support mxnet on windows yet") - array = np.random.random_sample([100,100,3]) * 255 + array = np.random.random_sample([100, 100, 3]) * 255 cls.img = mx.nd.array(array) - cls.transforms = TRANSFORMS('mxnet', 'preprocess') + cls.transforms = TRANSFORMS("mxnet", "preprocess") def testRandomCrop(self): - args = {'size':[50]} + args = {"size": [50]} randomcrop = TestMXNetTransform.transforms["RandomCrop"](**args) - compose = TestMXNetTransform.transforms['Compose']([randomcrop]) + compose = TestMXNetTransform.transforms["Compose"]([randomcrop]) image_result = compose((TestMXNetTransform.img, None)) - self.assertEqual(image_result[0].shape, (50,50,3)) + self.assertEqual(image_result[0].shape, (50, 50, 3)) def testNormalize(self): - args = {'mean':[0.0,0.0,0.0], 'std':[0.29, 0.24, 0.25]} - normalize = TestMXNetTransform.transforms['Normalize'](**args) + args = {"mean": [0.0, 0.0, 0.0], "std": [0.29, 0.24, 0.25]} + normalize = TestMXNetTransform.transforms["Normalize"](**args) image_result = normalize((TestMXNetTransform.img, None)) - self.assertAlmostEqual(image_result[0].asnumpy()[0][0][0], - (TestMXNetTransform.img.asnumpy()/[0.29])[0][0][0], places=3) + self.assertAlmostEqual( + image_result[0].asnumpy()[0][0][0], (TestMXNetTransform.img.asnumpy() / [0.29])[0][0][0], places=3 + ) + class TestONNXTransfrom(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img = np.random.random_sample([100,100,3]) * 255 - cls.transforms = TRANSFORMS('onnxrt_qlinearops', 'preprocess') + cls.img = np.random.random_sample([100, 100, 3]) * 255 + cls.transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") def testResize(self): - args = {'size':[224]} - resize = TestONNXTransfrom.transforms['Resize'](**args) - compose = TestONNXTransfrom.transforms['Compose']([resize]) + args = {"size": [224]} + resize = TestONNXTransfrom.transforms["Resize"](**args) + compose = TestONNXTransfrom.transforms["Compose"]([resize]) image_result = compose((self.img, None)) - self.assertEqual(image_result[0].shape, (224,224,3)) - args = {'size':[100, 100], 'interpolation':'test'} + self.assertEqual(image_result[0].shape, (224, 224, 3)) + args = {"size": [100, 100], "interpolation": "test"} with self.assertRaises(ValueError): - TestONNXTransfrom.transforms['Resize'](**args) + TestONNXTransfrom.transforms["Resize"](**args) - args = {'size':224} - resize = TestONNXTransfrom.transforms['Resize'](**args) - compose = TestONNXTransfrom.transforms['Compose']([resize]) + args = {"size": 224} + resize = TestONNXTransfrom.transforms["Resize"](**args) + compose = TestONNXTransfrom.transforms["Compose"]([resize]) image_result = compose((self.img, None)) - self.assertEqual(image_result[0].shape, (224,224,3)) - - args = {'size':[224,224]} - resize = TestONNXTransfrom.transforms['Resize'](**args) - compose = TestONNXTransfrom.transforms['Compose']([resize]) + self.assertEqual(image_result[0].shape, (224, 224, 3)) + + args = {"size": [224, 224]} + resize = TestONNXTransfrom.transforms["Resize"](**args) + compose = TestONNXTransfrom.transforms["Compose"]([resize]) image_result = compose((self.img, None)) - self.assertEqual(image_result[0].shape, (224,224,3)) - + self.assertEqual(image_result[0].shape, (224, 224, 3)) + def testNormalize(self): - args = {'mean':[0.0,0.0,0.0], 'std':[0.29, 0.24, 0.25]} - normalize = TestONNXTransfrom.transforms['Normalize'](**args) - compose = TestONNXTransfrom.transforms['Compose']([normalize]) + args = {"mean": [0.0, 0.0, 0.0], "std": [0.29, 0.24, 0.25]} + normalize = TestONNXTransfrom.transforms["Normalize"](**args) + compose = TestONNXTransfrom.transforms["Compose"]([normalize]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertTrue( - (image_result[0] == np.array(TestONNXTransfrom.img)/[0.29, 0.24, 0.25]).all()) + self.assertTrue((image_result[0] == np.array(TestONNXTransfrom.img) / [0.29, 0.24, 0.25]).all()) - args = {'mean':[0.0,0.0,0.0], 'std':[0,0,0]} + args = {"mean": [0.0, 0.0, 0.0], "std": [0, 0, 0]} with self.assertRaises(ValueError): TestONNXTransfrom.transforms["Normalize"](**args) def testRandomCrop(self): - args = {'size':[50]} + args = {"size": [50]} randomcrop = TestONNXTransfrom.transforms["RandomCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([randomcrop]) + compose = TestONNXTransfrom.transforms["Compose"]([randomcrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (50,50,3)) - args = {'size':[1000, 1000]} + self.assertEqual(image_result[0].shape, (50, 50, 3)) + args = {"size": [1000, 1000]} with self.assertRaises(ValueError): trans = TestONNXTransfrom.transforms["RandomCrop"](**args) trans((TestONNXTransfrom.img, None)) - args = {'size':50} + args = {"size": 50} randomcrop = TestONNXTransfrom.transforms["RandomCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([randomcrop]) + compose = TestONNXTransfrom.transforms["Compose"]([randomcrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (50,50,3)) - - args = {'size':[100,100]} + self.assertEqual(image_result[0].shape, (50, 50, 3)) + + args = {"size": [100, 100]} randomcrop = TestONNXTransfrom.transforms["RandomCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([randomcrop]) + compose = TestONNXTransfrom.transforms["Compose"]([randomcrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (100,100,3)) - + self.assertEqual(image_result[0].shape, (100, 100, 3)) + def testCenterCrop(self): - args = {'size':[100]} + args = {"size": [100]} centercrop = TestONNXTransfrom.transforms["CenterCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([centercrop]) + compose = TestONNXTransfrom.transforms["Compose"]([centercrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (100,100,3)) - args = {'size': 5} + self.assertEqual(image_result[0].shape, (100, 100, 3)) + args = {"size": 5} centercrop = TestONNXTransfrom.transforms["CenterCrop"](**args) image_result = centercrop((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (5,5,3)) - args = {'size': [5, 6]} + self.assertEqual(image_result[0].shape, (5, 5, 3)) + args = {"size": [5, 6]} centercrop = TestONNXTransfrom.transforms["CenterCrop"](**args) image_result = centercrop((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (5,6,3)) - args = {'size':[150]} + self.assertEqual(image_result[0].shape, (5, 6, 3)) + args = {"size": [150]} centercrop = TestONNXTransfrom.transforms["CenterCrop"](**args) with self.assertRaises(ValueError): centercrop((TestONNXTransfrom.img, None)) def testRandomResizedCrop(self): - args = {'size':[150]} + args = {"size": [150]} randomresizedcrop = TestONNXTransfrom.transforms["RandomResizedCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([randomresizedcrop]) + compose = TestONNXTransfrom.transforms["Compose"]([randomresizedcrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (150,150,3)) - args = {'size':[150, 150], 'scale':(0.9, 0.3)} + self.assertEqual(image_result[0].shape, (150, 150, 3)) + args = {"size": [150, 150], "scale": (0.9, 0.3)} with self.assertRaises(ValueError): TestONNXTransfrom.transforms["RandomResizedCrop"](**args) - args = {'size':150, 'interpolation':'test'} + args = {"size": 150, "interpolation": "test"} with self.assertRaises(ValueError): TestONNXTransfrom.transforms["RandomResizedCrop"](**args) + class TestImagenetTransform(unittest.TestCase): def testParseDecodeImagenet(self): - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpeg') + im.save("test.jpeg") - image = tf.compat.v1.gfile.FastGFile('test.jpeg','rb').read() + image = tf.compat.v1.gfile.FastGFile("test.jpeg", "rb").read() label = 10 - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/class/label': tf.train.Feature( - int64_list=tf.train.Int64List(value=[label])), - 'image/object/bbox/xmin': tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/ymin': tf.train.Feature( - float_list=tf.train.FloatList(value=[20])), - 'image/object/bbox/xmax': tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - 'image/object/bbox/ymax': tf.train.Feature( - float_list=tf.train.FloatList(value=[200])), - })) - with tf.io.TFRecordWriter('test-0-of-0') as writer: + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/class/label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[20])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[200])), + } + ) + ) + with tf.io.TFRecordWriter("test-0-of-0") as writer: writer.write(example.SerializeToString()) - eval_dataset = create_dataset( - 'tensorflow', {'ImageRecord':{'root':'./'}}, {'ParseDecodeImagenet':{}}, None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) + eval_dataset = create_dataset("tensorflow", {"ImageRecord": {"root": "./"}}, {"ParseDecodeImagenet": {}}, None) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) self.assertEqual(labels[0][0], 10) break from neural_compressor.experimental.data.transforms.imagenet_transform import ParseDecodeImagenet + func = ParseDecodeImagenet() out = func(example.SerializeToString()) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100,100,3)) + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100, 100, 3)) from neural_compressor.experimental.data.datasets.dataset import TensorflowTFRecordDataset - ds = TensorflowTFRecordDataset('test-0-of-0', func) - dataloader = DATALOADERS['tensorflow'](dataset=ds, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) + + ds = TensorflowTFRecordDataset("test-0-of-0", func) + dataloader = DATALOADERS["tensorflow"](dataset=ds, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) self.assertEqual(labels[0][0], 10) break - os.remove('test-0-of-0') - os.remove('test.jpeg') + os.remove("test-0-of-0") + os.remove("test.jpeg") + class TestCOCOTransform(unittest.TestCase): def testCOCODecode(self): - tf.compat.v1.disable_eager_execution() + tf.compat.v1.disable_eager_execution() - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpeg') - - image = tf.compat.v1.gfile.FastGFile('test.jpeg','rb').read() - source_id = '000000397133.jpg'.encode('utf-8') - label = 'person'.encode('utf-8') - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/object/class/text':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[label])), - 'image/source_id':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[source_id])), - 'image/object/bbox/xmin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/ymin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/xmax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - 'image/object/bbox/ymax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - })) - - with tf.io.TFRecordWriter('test.record') as writer: + im.save("test.jpeg") + + image = tf.compat.v1.gfile.FastGFile("test.jpeg", "rb").read() + source_id = "000000397133.jpg".encode("utf-8") + label = "person".encode("utf-8") + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/object/class/text": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label])), + "image/source_id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[source_id])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + } + ) + ) + + with tf.io.TFRecordWriter("test.record") as writer: writer.write(example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', {'COCORecord':{'root':'test.record'}}, - {'ParseDecodeCoco':{}, 'Resize': {'size': 50}, 'Cast':{'dtype':'int64'}, - 'CropToBoundingBox':{'offset_height':2, 'offset_width':2, 'target_height':5, 'target_width':5}, - 'CenterCrop':{'size':[4,4]}, - 'RandomResizedCrop':{'size':[4,5]}, - }, None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,4,5,3)) - self.assertEqual(labels[0].shape, (1,1,4)) + "tensorflow", + {"COCORecord": {"root": "test.record"}}, + { + "ParseDecodeCoco": {}, + "Resize": {"size": 50}, + "Cast": {"dtype": "int64"}, + "CropToBoundingBox": {"offset_height": 2, "offset_width": 2, "target_height": 5, "target_width": 5}, + "CenterCrop": {"size": [4, 4]}, + "RandomResizedCrop": {"size": [4, 5]}, + }, + None, + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 4, 5, 3)) + self.assertEqual(labels[0].shape, (1, 1, 4)) - from neural_compressor.experimental.data.transforms.transform import TensorflowResizeWithRatio from neural_compressor.experimental.data.datasets.coco_dataset import ParseDecodeCoco + from neural_compressor.experimental.data.transforms.transform import TensorflowResizeWithRatio + func = ParseDecodeCoco() out = func(example.SerializeToString()) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100,100,3)) + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100, 100, 3)) func = ParseDecodeCoco() out = func(example.SerializeToString()) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100,100,3)) + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100, 100, 3)) - func = TensorflowResizeWithRatio(**{'padding':True}) + func = TensorflowResizeWithRatio(**{"padding": True}) out = func(out) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (1365,1365,3)) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/source_id':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[source_id])), - 'image/object/bbox/xmin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/ymin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/xmax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - 'image/object/bbox/ymax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - })) - - with tf.io.TFRecordWriter('test2.record') as writer: + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (1365, 1365, 3)) + + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/source_id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[source_id])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + } + ) + ) + + with tf.io.TFRecordWriter("test2.record") as writer: writer.write(example.SerializeToString()) - self.assertRaises(ValueError, create_dataset, - 'tensorflow', {'COCORecord':{'root':'test2.record'}}, None, None) + self.assertRaises( + ValueError, create_dataset, "tensorflow", {"COCORecord": {"root": "test2.record"}}, None, None + ) + + os.remove("test2.record") + os.remove("test.record") + os.remove("test.jpeg") - os.remove('test2.record') - os.remove('test.record') - os.remove('test.jpeg') class TestVOCTransform(unittest.TestCase): def testVOCDecode(self): import shutil - tf.compat.v1.disable_eager_execution() + + tf.compat.v1.disable_eager_execution() def _bytes_list_feature(values): import six + def norm2bytes(value): return value.encode() if isinstance(value, str) and six.PY3 else value - return tf.train.Feature( - bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) + + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) def _int64_list_feature(values): import collections import collections.abc + if not isinstance(values, collections.abc.Iterable): values = [values] return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpg') - random_array = np.random.random_sample([100,100,3]) * 0 + im.save("test.jpg") + random_array = np.random.random_sample([100, 100, 3]) * 0 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.png') - image_data = tf.compat.v1.gfile.GFile('test.jpg', 'rb').read() - seg_data = tf.compat.v1.gfile.GFile('test.png', 'rb').read() - filename = 'test' - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': _bytes_list_feature(image_data), - 'image/filename': _bytes_list_feature(filename), - 'image/format': _bytes_list_feature('png'), - 'image/height': _int64_list_feature(100), - 'image/width': _int64_list_feature(100), - 'image/channels': _int64_list_feature(3), - 'image/segmentation/class/encoded': ( - _bytes_list_feature(seg_data)), - 'image/segmentation/class/format': _bytes_list_feature('png'), - })) - - if not os.path.exists('./test_record'): - os.mkdir('./test_record') - with tf.io.TFRecordWriter('./test_record/val-test.record') as writer: + im.save("test.png") + image_data = tf.compat.v1.gfile.GFile("test.jpg", "rb").read() + seg_data = tf.compat.v1.gfile.GFile("test.png", "rb").read() + filename = "test" + + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": _bytes_list_feature(image_data), + "image/filename": _bytes_list_feature(filename), + "image/format": _bytes_list_feature("png"), + "image/height": _int64_list_feature(100), + "image/width": _int64_list_feature(100), + "image/channels": _int64_list_feature(3), + "image/segmentation/class/encoded": (_bytes_list_feature(seg_data)), + "image/segmentation/class/format": _bytes_list_feature("png"), + } + ) + ) + + if not os.path.exists("./test_record"): + os.mkdir("./test_record") + with tf.io.TFRecordWriter("./test_record/val-test.record") as writer: writer.write(example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', {'VOCRecord':{'root':'./test_record'}}, {'ParseDecodeVoc':{}}, None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) - self.assertEqual(labels[0].shape, (100,100,1)) + "tensorflow", {"VOCRecord": {"root": "./test_record"}}, {"ParseDecodeVoc": {}}, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) + self.assertEqual(labels[0].shape, (100, 100, 1)) from neural_compressor.experimental.data.transforms.transform import ParseDecodeVocTransform + func = ParseDecodeVocTransform() out = func(example.SerializeToString()) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100,100,3)) + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100, 100, 3)) + + os.remove("./test_record/val-test.record") + os.remove("test.jpg") + os.remove("test.png") + shutil.rmtree("./test_record") - os.remove('./test_record/val-test.record') - os.remove('test.jpg') - os.remove('test.png') - shutil.rmtree('./test_record') if __name__ == "__main__": unittest.main() diff --git a/test/data/test_filter.py b/test/data/test_filter.py index 830f8e7cf1c..71a3a12affe 100644 --- a/test/data/test_filter.py +++ b/test/data/test_filter.py @@ -1,160 +1,153 @@ -import unittest -import numpy as np -import os import json +import os import shutil -from PIL import Image -from neural_compressor.data import FILTERS, TRANSFORMS, Datasets, DATALOADERS -from neural_compressor.utils.create_obj_from_config import create_dataset, get_preprocess, create_dataloader +import unittest +import numpy as np import tensorflow as tf +from PIL import Image + +from neural_compressor.data import DATALOADERS, FILTERS, TRANSFORMS, Datasets +from neural_compressor.utils.create_obj_from_config import create_dataloader, create_dataset, get_preprocess + class TestCOCOFilter(unittest.TestCase): def testLabelBalanceCOCORecord(self): - tf.compat.v1.disable_eager_execution() + tf.compat.v1.disable_eager_execution() - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpeg') + im.save("test.jpeg") - image = tf.compat.v1.gfile.FastGFile('test.jpeg','rb').read() - source_id = '000000397133.jpg'.encode('utf-8') - label = 'person'.encode('utf-8') - example1 = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/object/class/text':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[label])), - 'image/source_id':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[source_id])), - 'image/object/bbox/xmin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/ymin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/xmax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - 'image/object/bbox/ymax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - })) - example2 = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/object/class/text':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[label])), - 'image/source_id':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[source_id])), - 'image/object/bbox/xmin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10, 20])), - 'image/object/bbox/ymin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10, 20])), - 'image/object/bbox/xmax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100, 200])), - 'image/object/bbox/ymax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100, 200])), - })) - with tf.io.TFRecordWriter('test.record') as writer: + image = tf.compat.v1.gfile.FastGFile("test.jpeg", "rb").read() + source_id = "000000397133.jpg".encode("utf-8") + label = "person".encode("utf-8") + example1 = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/object/class/text": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label])), + "image/source_id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[source_id])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + } + ) + ) + example2 = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/object/class/text": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label])), + "image/source_id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[source_id])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10, 20])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[10, 20])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100, 200])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[100, 200])), + } + ) + ) + with tf.io.TFRecordWriter("test.record") as writer: writer.write(example1.SerializeToString()) writer.write(example2.SerializeToString()) - - preprocesses = TRANSFORMS('tensorflow', 'preprocess') - filters = FILTERS('tensorflow') - filter = filters['LabelBalanceCOCORecord'](2) - datasets = Datasets('tensorflow') - dataset = datasets['COCORecord']('test.record', \ - transform=None, filter=filter) - dataloader = DATALOADERS['tensorflow'](dataset=dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) - self.assertEqual(labels[0].shape, (1,2,4)) + + preprocesses = TRANSFORMS("tensorflow", "preprocess") + filters = FILTERS("tensorflow") + filter = filters["LabelBalanceCOCORecord"](2) + datasets = Datasets("tensorflow") + dataset = datasets["COCORecord"]("test.record", transform=None, filter=filter) + dataloader = DATALOADERS["tensorflow"](dataset=dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) + self.assertEqual(labels[0].shape, (1, 2, 4)) dataset2 = create_dataset( - 'tensorflow', {'COCORecord':{'root':'test.record'}}, None, {'LabelBalance':{'size':2}}) - dataloader2 = DATALOADERS['tensorflow'](dataset=dataset2, batch_size=1) - for (inputs, labels) in dataloader2: - self.assertEqual(inputs.shape, (1,100,100,3)) - self.assertEqual(labels[0].shape, (1,2,4)) + "tensorflow", {"COCORecord": {"root": "test.record"}}, None, {"LabelBalance": {"size": 2}} + ) + dataloader2 = DATALOADERS["tensorflow"](dataset=dataset2, batch_size=1) + for inputs, labels in dataloader2: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) + self.assertEqual(labels[0].shape, (1, 2, 4)) - dataloader3 = create_dataloader('tensorflow', {'batch_size':1, 'dataset':{'COCORecord':{'root':'test.record'}},\ - 'filter':{'LabelBalance':{'size':2}}, 'transform':None}) - for (inputs, labels) in dataloader3: - self.assertEqual(inputs.shape, (1,100,100,3)) - self.assertEqual(labels[0].shape, (1,2,4)) - os.remove('test.record') - os.remove('test.jpeg') + dataloader3 = create_dataloader( + "tensorflow", + { + "batch_size": 1, + "dataset": {"COCORecord": {"root": "test.record"}}, + "filter": {"LabelBalance": {"size": 2}}, + "transform": None, + }, + ) + for inputs, labels in dataloader3: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) + self.assertEqual(labels[0].shape, (1, 2, 4)) + os.remove("test.record") + os.remove("test.jpeg") def testLabelBalanceCOCORaw(self): - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - os.makedirs('val2017', exist_ok=True) - im.save('./val2017/test_0.jpg') - im.save('./val2017/test_1.jpg') + os.makedirs("val2017", exist_ok=True) + im.save("./val2017/test_0.jpg") + im.save("./val2017/test_1.jpg") fake_dict = { - 'info': { - 'description': 'COCO 2017 Dataset', - 'url': 'http://cocodataset.org', - 'version': '1.0', - 'year': 2017, - 'contributor': 'COCO Consortium', - 'date_created': '2017/09/01' - }, - 'licenses':{}, - 'images':[{ - 'file_name': 'test_0.jpg', - 'height': 100, - 'width': 100, - 'id': 0 - }, - { - 'file_name': 'test_1.jpg', - 'height': 100, - 'width': 100, - 'id': 1 - }], - 'annotations':[{ - 'category_id': 18, - 'id': 1767, - 'iscrowd': 0, - 'image_id': 0, - 'bbox': [473.07, 395.93, 38.65, 28.67], + "info": { + "description": "COCO 2017 Dataset", + "url": "http://cocodataset.org", + "version": "1.0", + "year": 2017, + "contributor": "COCO Consortium", + "date_created": "2017/09/01", }, - { - 'category_id': 18, - 'id': 1768, - 'iscrowd': 0, - 'image_id': 1, - 'bbox': [473.07, 395.93, 38.65, 28.67], - }, - { - 'category_id': 18, - 'id': 1768, - 'iscrowd': 0, - 'image_id': 1, - 'bbox': [473.07, 395.93, 38.65, 28.67], - }], - 'categories':[{ - 'supercategory': 'animal', - 'id': 18, - 'name': 'dog' - }] + "licenses": {}, + "images": [ + {"file_name": "test_0.jpg", "height": 100, "width": 100, "id": 0}, + {"file_name": "test_1.jpg", "height": 100, "width": 100, "id": 1}, + ], + "annotations": [ + { + "category_id": 18, + "id": 1767, + "iscrowd": 0, + "image_id": 0, + "bbox": [473.07, 395.93, 38.65, 28.67], + }, + { + "category_id": 18, + "id": 1768, + "iscrowd": 0, + "image_id": 1, + "bbox": [473.07, 395.93, 38.65, 28.67], + }, + { + "category_id": 18, + "id": 1768, + "iscrowd": 0, + "image_id": 1, + "bbox": [473.07, 395.93, 38.65, 28.67], + }, + ], + "categories": [{"supercategory": "animal", "id": 18, "name": "dog"}], } fake_json = json.dumps(fake_dict) - os.makedirs('annotations', exist_ok=True) - with open('./annotations/instances_val2017.json', 'w') as f: + os.makedirs("annotations", exist_ok=True) + with open("./annotations/instances_val2017.json", "w") as f: f.write(fake_json) - filters = FILTERS('onnxrt_qlinearops') - filter = filters['LabelBalanceCOCORaw'](1) - datasets = Datasets('onnxrt_qlinearops') - dataset = datasets['COCORaw']('./', transform=None, filter=filter) - dataloader = DATALOADERS['onnxrt_qlinearops'](dataset=dataset, batch_size=1) - for (inputs, labels) in dataloader: + filters = FILTERS("onnxrt_qlinearops") + filter = filters["LabelBalanceCOCORaw"](1) + datasets = Datasets("onnxrt_qlinearops") + dataset = datasets["COCORaw"]("./", transform=None, filter=filter) + dataloader = DATALOADERS["onnxrt_qlinearops"](dataset=dataset, batch_size=1) + for inputs, labels in dataloader: self.assertEqual(labels[0].shape[1], 1) - shutil.rmtree('annotations') - shutil.rmtree('val2017') + shutil.rmtree("annotations") + shutil.rmtree("val2017") + if __name__ == "__main__": unittest.main() - diff --git a/test/data/test_tokenization.py b/test/data/test_tokenization.py index e809d282923..0a8e1018cdc 100644 --- a/test/data/test_tokenization.py +++ b/test/data/test_tokenization.py @@ -1,27 +1,33 @@ -from neural_compressor.experimental.data.transforms.tokenization import FullTokenizer -import unittest import os import shutil +import unittest + +from neural_compressor.experimental.data.transforms.tokenization import FullTokenizer from neural_compressor.utils.utility import LazyImport -tf = LazyImport('tensorflow') + +tf = LazyImport("tensorflow") basic_text = ["un", "##aff", "##able"] + + class TestFullTokenizer(unittest.TestCase): @classmethod def setUpClass(cls): - os.makedirs('val', exist_ok=True) - vocab_file = 'val/temp.txt' - with tf.io.gfile.GFile(vocab_file,"w+") as f: + os.makedirs("val", exist_ok=True) + vocab_file = "val/temp.txt" + with tf.io.gfile.GFile(vocab_file, "w+") as f: for vocab in basic_text: - f.write(vocab + '\n') + f.write(vocab + "\n") f.close() + @classmethod def tearDownClass(cls): - if os.path.exists('val'): - shutil.rmtree('val') + if os.path.exists("val"): + shutil.rmtree("val") + def test_tokenizer(self): - tokenizer = FullTokenizer('val/temp.txt') - ids = [2,1,0] + tokenizer = FullTokenizer("val/temp.txt") + ids = [2, 1, 0] tokens = basic_text[::-1] tokens_to_ids = tokenizer.convert_tokens_to_ids(tokens) self.assertEqual(tokens_to_ids, ids) diff --git a/test/data/test_transform.py b/test/data/test_transform.py index c7a1c50e43b..aba4f25cc64 100644 --- a/test/data/test_transform.py +++ b/test/data/test_transform.py @@ -1,42 +1,47 @@ """Tests for the transform module.""" -import numpy as np +import os +import platform import random import unittest -import platform -import os -from neural_compressor.data import TRANSFORMS, DATALOADERS -from neural_compressor.utils.create_obj_from_config import get_postprocess, create_dataset -from neural_compressor.utils.utility import LazyImport + +import numpy as np from PIL import Image -mx = LazyImport('mxnet') -tf = LazyImport('tensorflow') -torch = LazyImport('torch') -torchvision = LazyImport('torchvision') + +from neural_compressor.data import DATALOADERS, TRANSFORMS +from neural_compressor.utils.create_obj_from_config import create_dataset, get_postprocess +from neural_compressor.utils.utility import LazyImport + +mx = LazyImport("mxnet") +tf = LazyImport("tensorflow") +torch = LazyImport("torch") +torchvision = LazyImport("torchvision") random.seed(1) np.random.seed(1) + class TestMetrics(unittest.TestCase): def test_tensorflow_2(self): image = np.ones([256, 256, 1]) - resize_kwargs = {"size":[224, 224]} + resize_kwargs = {"size": [224, 224]} transforms = TRANSFORMS(framework="tensorflow", process="preprocess") - resize = transforms['Resize'](**resize_kwargs) + resize = transforms["Resize"](**resize_kwargs) random_crop_kwargs = {"size": 128} - random_crop = transforms['RandomCrop'](**random_crop_kwargs) + random_crop = transforms["RandomCrop"](**random_crop_kwargs) transform_list = [resize, random_crop] - compose = transforms['Compose'](transform_list) + compose = transforms["Compose"](transform_list) image_result = compose((image, None)) self.assertEqual(image_result[0].shape, (128, 128)) + class TestONNXQLImagenetTransform(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img = np.random.random_sample([600,600])*255 + cls.img = np.random.random_sample([600, 600]) * 255 def testResizeCropImagenetTransform(self): - transforms = TRANSFORMS('onnxrt_qlinearops', "preprocess") - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=True) + transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") + transform = transforms["ResizeCropImagenet"](height=224, width=224, random_crop=True) sample = (self.img, 0) result = transform(sample) resized_input = result[0] @@ -44,14 +49,15 @@ def testResizeCropImagenetTransform(self): self.assertEqual(len(resized_input[0]), 224) self.assertEqual(len(resized_input[0][0]), 224) + class TestONNXITImagenetTransform(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img = np.random.random_sample([600,600,3])*255 + cls.img = np.random.random_sample([600, 600, 3]) * 255 def testResizeCropImagenetTransform(self): - transforms = TRANSFORMS('onnxrt_integerops', "preprocess") - transform = transforms['ResizeCropImagenet'](height=224, width=224) + transforms = TRANSFORMS("onnxrt_integerops", "preprocess") + transform = transforms["ResizeCropImagenet"](height=224, width=224) sample = (self.img, 0) result = transform(sample) resized_input = result[0] @@ -60,8 +66,8 @@ def testResizeCropImagenetTransform(self): self.assertEqual(len(resized_input[0][0]), 224) def testResizeWithAspectRatio(self): - transforms = TRANSFORMS('onnxrt_integerops', "preprocess") - transform = transforms['ResizeWithAspectRatio'](height=224, width=224) + transforms = TRANSFORMS("onnxrt_integerops", "preprocess") + transform = transforms["ResizeWithAspectRatio"](height=224, width=224) sample = (self.img, 0) result = transform(sample) resized_input = result[0] @@ -69,12 +75,14 @@ def testResizeWithAspectRatio(self): self.assertEqual(len(resized_input[0]), 256) self.assertEqual(len(resized_input[0][0]), 3) + class TestTensorflowImagenetTransform(unittest.TestCase): tf.compat.v1.disable_v2_behavior() + def testBilinearImagenetTransform(self): - transforms = TRANSFORMS('tensorflow', "preprocess") - transform = transforms['BilinearImagenet'](height=224, width=224) - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "preprocess") + transform = transforms["BilinearImagenet"](height=224, width=224) + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 0) result = transform(sample) resized_input = result[0].eval(session=tf.compat.v1.Session()) @@ -82,54 +90,73 @@ def testBilinearImagenetTransform(self): self.assertEqual(len(resized_input[0]), 224) self.assertEqual(len(resized_input[0][0]), 3) - transforms = TRANSFORMS('onnxrt_qlinearops', "preprocess") - transform = transforms['BilinearImagenet'](height=224, width=224) - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") + transform = transforms["BilinearImagenet"](height=224, width=224) + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 0) result = transform(sample) self.assertEqual(len(resized_input), 224) self.assertEqual(len(resized_input[0]), 224) self.assertEqual(len(resized_input[0][0]), 3) - + def testResizeCropImagenetTransform1(self): - transforms = TRANSFORMS('tensorflow', "preprocess") - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "preprocess") + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 0) - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=True, - random_flip_left_right=True) + transform = transforms["ResizeCropImagenet"]( + height=224, width=224, random_crop=True, random_flip_left_right=True + ) result = transform(sample) resized_input = result[0].eval(session=tf.compat.v1.Session()) self.assertEqual(len(resized_input), 224) self.assertEqual(len(resized_input[0]), 224) self.assertEqual(len(resized_input[0][0]), 3) - @unittest.skipIf(tf.version.VERSION < '2.5.0', "Skip tf.experimental.numpy.moveaxis") + @unittest.skipIf(tf.version.VERSION < "2.5.0", "Skip tf.experimental.numpy.moveaxis") def testResizeCropImagenetTransform2(self): - transforms = TRANSFORMS('tensorflow', "preprocess") - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "preprocess") + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 0) - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=False, - random_flip_left_right=False, data_format='channels_last', subpixels='RGB') + transform = transforms["ResizeCropImagenet"]( + height=224, + width=224, + random_crop=False, + random_flip_left_right=False, + data_format="channels_last", + subpixels="RGB", + ) result = transform(sample) resized_input1 = result[0].eval(session=tf.compat.v1.Session()) - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=False, - random_flip_left_right=False, data_format='channels_last', subpixels='BGR') - result = transform(sample) + transform = transforms["ResizeCropImagenet"]( + height=224, + width=224, + random_crop=False, + random_flip_left_right=False, + data_format="channels_last", + subpixels="BGR", + ) + result = transform(sample) resized_input2 = result[0].eval(session=tf.compat.v1.Session()) - self.assertTrue((resized_input1[...,0]==resized_input2[...,-1]).all()) - - transform = transforms['ResizeCropImagenet'](height=224, width=224, random_crop=False, - random_flip_left_right=False, data_format='channels_first', subpixels='BGR') + self.assertTrue((resized_input1[..., 0] == resized_input2[..., -1]).all()) + + transform = transforms["ResizeCropImagenet"]( + height=224, + width=224, + random_crop=False, + random_flip_left_right=False, + data_format="channels_first", + subpixels="BGR", + ) rand_input = np.moveaxis(rand_input, -1, 0) sample = (rand_input, 0) - result = transform(sample) + result = transform(sample) resized_input3 = result[0].eval(session=tf.compat.v1.Session()) - self.assertTrue((resized_input1[...,0]==resized_input3[...,-1]).all()) + self.assertTrue((resized_input1[..., 0] == resized_input3[..., -1]).all()) def testLabelShift(self): - transforms = TRANSFORMS('tensorflow', "postprocess") - transform = transforms['LabelShift'](label_shift=1) - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "postprocess") + transform = transforms["LabelShift"](label_shift=1) + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 1001) label = transform(sample)[1] self.assertEqual(label, 1000) @@ -137,1024 +164,1040 @@ def testLabelShift(self): self.assertTrue(isinstance(label, np.int64) or isinstance(label, np.int32)) else: self.assertTrue(isinstance(label, np.int32)) - - label = transform((rand_input, [(1,2,3)]))[1] + + label = transform((rand_input, [(1, 2, 3)]))[1] self.assertTrue(isinstance(label, list)) self.assertTrue(isinstance(label[0], tuple)) - label = transform((rand_input, [[1,2,3]]))[1] + label = transform((rand_input, [[1, 2, 3]]))[1] self.assertTrue(isinstance(label, list)) self.assertTrue(isinstance(label[0], list)) - label = transform((rand_input, [np.array([1,2,3])]))[1] + label = transform((rand_input, [np.array([1, 2, 3])]))[1] self.assertTrue(isinstance(label, list)) self.assertTrue(isinstance(label[0], np.ndarray)) - def testQuantizedInput(self): - transforms = TRANSFORMS('tensorflow', "preprocess") - transform = transforms['QuantizedInput'](dtype='uint8', scale=100) - rand_input = np.random.random_sample([600,600,3]).astype(np.float32) + transforms = TRANSFORMS("tensorflow", "preprocess") + transform = transforms["QuantizedInput"](dtype="uint8", scale=100) + rand_input = np.random.random_sample([600, 600, 3]).astype(np.float32) sample = (rand_input, 1001) result = transform(sample) quantized_input = result[0].eval(session=tf.compat.v1.Session()) self.assertLessEqual(quantized_input.max(), 255) self.assertGreaterEqual(quantized_input.min(), 0) - transform = transforms['QuantizedInput'](dtype='uint8') + transform = transforms["QuantizedInput"](dtype="uint8") sample = (rand_input, 1001) result = transform(sample) quantized_input = result[0] self.assertLessEqual(quantized_input.max(), 1) self.assertGreaterEqual(quantized_input.min(), 0) + class TestDataConversion(unittest.TestCase): @classmethod def setUpClass(cls): if platform.system().lower() == "windows": cls.skipTest(cls, "not support mxnet on windows yet") - cls.img = np.random.random_sample([10,10,3])*255 - cls.mx_trans = TRANSFORMS('mxnet', 'preprocess') - cls.pt_trans = TRANSFORMS('pytorch', 'preprocess') - + cls.img = np.random.random_sample([10, 10, 3]) * 255 + cls.mx_trans = TRANSFORMS("mxnet", "preprocess") + cls.pt_trans = TRANSFORMS("pytorch", "preprocess") + def testToPILImage(self): - trans = TestDataConversion.pt_trans['ToPILImage']() + trans = TestDataConversion.pt_trans["ToPILImage"]() image, _ = trans((TestDataConversion.img.astype(np.uint8), None)) self.assertTrue(isinstance(image, Image.Image)) def testToTensor(self): - trans = TestDataConversion.pt_trans['ToTensor']() + trans = TestDataConversion.pt_trans["ToTensor"]() image, _ = trans((TestDataConversion.img.astype(np.uint8), None)) self.assertTrue(isinstance(image, torch.Tensor)) - trans = TestDataConversion.mx_trans['ToTensor']() + trans = TestDataConversion.mx_trans["ToTensor"]() image, _ = trans((mx.nd.array(TestDataConversion.img), None)) - self.assertTrue(isinstance(image, mx.ndarray.NDArray)) # pylint: disable=no-member + self.assertTrue(isinstance(image, mx.ndarray.NDArray)) # pylint: disable=no-member def testToNDArray(self): - trans = TestDataConversion.mx_trans['ToNDArray']() + trans = TestDataConversion.mx_trans["ToNDArray"]() image, _ = trans((TestDataConversion.img.astype(np.uint8), None)) self.assertTrue(isinstance(image, mx.ndarray.NDArray)) + class TestSameTransfoms(unittest.TestCase): @classmethod def setUpClass(cls): if platform.system().lower() == "windows": cls.skipTest(cls, "not support mxnet on windows yet") - cls.img = np.random.random_sample([10,10,3])*255 - cls.tf_trans = TRANSFORMS('tensorflow', 'preprocess') - cls.pt_trans = TRANSFORMS('pytorch', 'preprocess') - cls.mx_trans = TRANSFORMS('mxnet', 'preprocess') - cls.ox_trans = TRANSFORMS('onnxrt_qlinearops', 'preprocess') + cls.img = np.random.random_sample([10, 10, 3]) * 255 + cls.tf_trans = TRANSFORMS("tensorflow", "preprocess") + cls.pt_trans = TRANSFORMS("pytorch", "preprocess") + cls.mx_trans = TRANSFORMS("mxnet", "preprocess") + cls.ox_trans = TRANSFORMS("onnxrt_qlinearops", "preprocess") cls.mx_img = mx.nd.array(cls.img.astype(np.uint8)) cls.pt_img = Image.fromarray(cls.img.astype(np.uint8)) cls.tf_img = tf.constant(cls.img) - _ = TRANSFORMS('tensorflow', 'postprocess') - _ = TRANSFORMS('pytorch', 'postprocess') - _ = TRANSFORMS('mxnet', 'postprocess') - _ = TRANSFORMS('onnxrt_qlinearops' , 'postprocess') - _ = TRANSFORMS('onnxrt_integerops', 'postprocess') + _ = TRANSFORMS("tensorflow", "postprocess") + _ = TRANSFORMS("pytorch", "postprocess") + _ = TRANSFORMS("mxnet", "postprocess") + _ = TRANSFORMS("onnxrt_qlinearops", "postprocess") + _ = TRANSFORMS("onnxrt_integerops", "postprocess") def testCast(self): - args = {'dtype': 'int64'} - tf_func = TestSameTransfoms.tf_trans['Cast'](**args) + args = {"dtype": "int64"} + tf_func = TestSameTransfoms.tf_trans["Cast"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result[0][0][0].dtype, 'int64') + self.assertEqual(tf_result[0][0][0].dtype, "int64") tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result[0][0][0].dtype, 'int64') - mx_func = TestSameTransfoms.mx_trans['Cast'](**args) + self.assertEqual(tf_result[0][0][0].dtype, "int64") + mx_func = TestSameTransfoms.mx_trans["Cast"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None)) self.assertEqual(mx_result[0][0][0].dtype, np.int64) - ox_func = TestSameTransfoms.ox_trans['Cast'](**args) + ox_func = TestSameTransfoms.ox_trans["Cast"](**args) ox_result = ox_func((TestSameTransfoms.img, None)) - self.assertEqual(ox_result[0][0][0].dtype, 'int64') + self.assertEqual(ox_result[0][0][0].dtype, "int64") - totensor = TestSameTransfoms.pt_trans['ToTensor']() - cast = TestSameTransfoms.pt_trans['Cast'](**args) - pt_func = TestSameTransfoms.pt_trans['Compose']([totensor, cast]) + totensor = TestSameTransfoms.pt_trans["ToTensor"]() + cast = TestSameTransfoms.pt_trans["Cast"](**args) + pt_func = TestSameTransfoms.pt_trans["Compose"]([totensor, cast]) pt_result = pt_func((TestSameTransfoms.pt_img, None)) self.assertEqual(pt_result[0][0][0].dtype, torch.int64) def testCropToBoundingBox(self): - args = {'offset_height':2, 'offset_width':2, 'target_height':5, 'target_width':5} - pt_func = TestSameTransfoms.pt_trans['CropToBoundingBox'](**args) + args = {"offset_height": 2, "offset_width": 2, "target_height": 5, "target_width": 5} + pt_func = TestSameTransfoms.pt_trans["CropToBoundingBox"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - self.assertEqual(pt_result.size, (5,5)) + self.assertEqual(pt_result.size, (5, 5)) - ox_func = TestSameTransfoms.ox_trans['CropToBoundingBox'](**args) + ox_func = TestSameTransfoms.ox_trans["CropToBoundingBox"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertEqual(ox_result.shape, (5,5,3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) - mx_func = TestSameTransfoms.mx_trans['CropToBoundingBox'](**args) + mx_func = TestSameTransfoms.mx_trans["CropToBoundingBox"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(mx_result.shape, (5,5,3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) - tf_func = TestSameTransfoms.tf_trans['CropToBoundingBox'](**args) + tf_func = TestSameTransfoms.tf_trans["CropToBoundingBox"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (5,5,3)) - + self.assertEqual(tf_result.shape, (5, 5, 3)) + def testNormalize(self): args = {} - normalize = TestSameTransfoms.pt_trans['Normalize'](**args) - totensor = TestSameTransfoms.pt_trans['ToTensor']() - pt_func = TestSameTransfoms.pt_trans['Compose']([totensor, normalize]) + normalize = TestSameTransfoms.pt_trans["Normalize"](**args) + totensor = TestSameTransfoms.pt_trans["ToTensor"]() + pt_func = TestSameTransfoms.pt_trans["Compose"]([totensor, normalize]) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - self.assertEqual(TestSameTransfoms.img.astype( - np.uint8)[0][0][0]/255., pt_result[0][0][0]) - args = {'std': [0.]} + self.assertEqual(TestSameTransfoms.img.astype(np.uint8)[0][0][0] / 255.0, pt_result[0][0][0]) + args = {"std": [0.0]} with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['Normalize'](**args) + TestSameTransfoms.pt_trans["Normalize"](**args) def testRescale(self): - ox_func = TestSameTransfoms.ox_trans['Rescale']() + ox_func = TestSameTransfoms.ox_trans["Rescale"]() ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertAlmostEqual(ox_result[1][2][0], TestSameTransfoms.img[1][2][0]/255.) + self.assertAlmostEqual(ox_result[1][2][0], TestSameTransfoms.img[1][2][0] / 255.0) def testTranspose(self): - args = {'perm': [2, 0, 1]} - tf_func = TestSameTransfoms.tf_trans['Transpose'](**args) + args = {"perm": [2, 0, 1]} + tf_func = TestSameTransfoms.tf_trans["Transpose"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - ox_func = TestSameTransfoms.ox_trans['Transpose'](**args) + ox_func = TestSameTransfoms.ox_trans["Transpose"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['Transpose'](**args) + mx_func = TestSameTransfoms.mx_trans["Transpose"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - pt_transpose = TestSameTransfoms.pt_trans['Transpose'](**args) - pt_totensor = TestSameTransfoms.pt_trans['ToTensor']() - pt_compose = TestSameTransfoms.pt_trans['Compose']([pt_totensor, pt_transpose]) + pt_transpose = TestSameTransfoms.pt_trans["Transpose"](**args) + pt_totensor = TestSameTransfoms.pt_trans["ToTensor"]() + pt_compose = TestSameTransfoms.pt_trans["Compose"]([pt_totensor, pt_transpose]) pt_result = pt_compose((TestSameTransfoms.pt_img, None))[0] - - self.assertEqual(tf_result.shape, (3,10,10)) - self.assertEqual(ox_result.shape, (3,10,10)) - self.assertEqual(mx_result.shape, (3,10,10)) - self.assertEqual(pt_result.shape, (10,3,10)) + + self.assertEqual(tf_result.shape, (3, 10, 10)) + self.assertEqual(ox_result.shape, (3, 10, 10)) + self.assertEqual(mx_result.shape, (3, 10, 10)) + self.assertEqual(pt_result.shape, (10, 3, 10)) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (3,10,10)) - + self.assertEqual(tf_result.shape, (3, 10, 10)) + def testCenterCrop(self): - args = {'size':[4,4]} - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + args = {"size": [4, 4]} + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['CenterCrop'](**args) + pt_func = TestSameTransfoms.pt_trans["CenterCrop"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CenterCrop'](**args) + mx_func = TestSameTransfoms.mx_trans["CenterCrop"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(pt_result.size, (4,4)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(pt_result.size, (4, 4)) + self.assertEqual(mx_result.shape, (4, 4, 3)) self.assertEqual(np.array(pt_result)[0][0][0], mx_result.asnumpy()[0][0][0]) self.assertEqual(np.array(pt_result)[0][0][0], int(tf_result[0][0][0])) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) - tf_result = tf_func((tf.constant(TestSameTransfoms.img.reshape((1,10,10,3))), None))[0] + tf_result = tf_func((tf.constant(TestSameTransfoms.img.reshape((1, 10, 10, 3))), None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (1,4,4,3)) + self.assertEqual(tf_result.shape, (1, 4, 4, 3)) - args = {'size':4} - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + args = {"size": 4} + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['CenterCrop'](**args) + pt_func = TestSameTransfoms.pt_trans["CenterCrop"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CenterCrop'](**args) + mx_func = TestSameTransfoms.mx_trans["CenterCrop"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(pt_result.size, (4,4)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(pt_result.size, (4, 4)) + self.assertEqual(mx_result.shape, (4, 4, 3)) self.assertEqual(np.array(pt_result)[0][0][0], mx_result.asnumpy()[0][0][0]) self.assertEqual(np.array(pt_result)[0][0][0], int(tf_result[0][0][0])) - - args = {'size':[4]} - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + + args = {"size": [4]} + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) with self.assertRaises(ValueError): - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((np.array([[TestSameTransfoms.img]]), None)) with self.assertRaises(ValueError): - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) - tf_result = tf_func((tf.constant(TestSameTransfoms.img.reshape((1,1,10,10,3))), None)) + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) + tf_result = tf_func((tf.constant(TestSameTransfoms.img.reshape((1, 1, 10, 10, 3))), None)) - args = {'size':[20]} + args = {"size": [20]} with self.assertRaises(ValueError): - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None)) with self.assertRaises(ValueError): - tf_func = TestSameTransfoms.tf_trans['CenterCrop'](**args) + tf_func = TestSameTransfoms.tf_trans["CenterCrop"](**args) tf_result = tf_func((TestSameTransfoms.tf_img, None)) def testResizeWithRatio(self): - args = {'padding': True} - label = [[0.1,0.1,0.5,0.5], [], [], []] - tf_func = TestSameTransfoms.tf_trans['ResizeWithRatio'](**args) + args = {"padding": True} + label = [[0.1, 0.1, 0.5, 0.5], [], [], []] + tf_func = TestSameTransfoms.tf_trans["ResizeWithRatio"](**args) tf_result = tf_func((TestSameTransfoms.img, label))[0] - self.assertEqual(tf_result.shape, (1365,1365,3)) - - args = {'padding': False} - tf_func = TestSameTransfoms.tf_trans['ResizeWithRatio'](**args) + self.assertEqual(tf_result.shape, (1365, 1365, 3)) + + args = {"padding": False} + tf_func = TestSameTransfoms.tf_trans["ResizeWithRatio"](**args) tf_result = tf_func((TestSameTransfoms.img, label))[0] - self.assertTrue((tf_result.shape[0]==800 or tf_result.shape[1] ==1365)) - + self.assertTrue((tf_result.shape[0] == 800 or tf_result.shape[1] == 1365)) + def testResize(self): - tf_func = TestSameTransfoms.tf_trans['Resize'](**{'size':[4,5]}) + tf_func = TestSameTransfoms.tf_trans["Resize"](**{"size": [4, 5]}) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['Resize'](**{'size':[4,5]}) + pt_func = TestSameTransfoms.pt_trans["Resize"](**{"size": [4, 5]}) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['Resize'](**{'size':[4,5]}) + mx_func = TestSameTransfoms.mx_trans["Resize"](**{"size": [4, 5]}) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (5,4,3)) - self.assertEqual(pt_result.size, (5,4)) - self.assertEqual(mx_result.shape, (4,5,3)) + self.assertEqual(tf_result.shape, (5, 4, 3)) + self.assertEqual(pt_result.size, (5, 4)) + self.assertEqual(mx_result.shape, (4, 5, 3)) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (4,5,3)) + self.assertEqual(tf_result.shape, (4, 5, 3)) - args = {'size': 4} - tf_func = TestSameTransfoms.tf_trans['Resize'](**args) + args = {"size": 4} + tf_func = TestSameTransfoms.tf_trans["Resize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['Resize'](**args) + pt_func = TestSameTransfoms.pt_trans["Resize"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['Resize'](**args) + mx_func = TestSameTransfoms.mx_trans["Resize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(pt_result.size, (4,4)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(pt_result.size, (4, 4)) + self.assertEqual(mx_result.shape, (4, 4, 3)) - args = {'size': [4]} - tf_func = TestSameTransfoms.tf_trans['Resize'](**args) + args = {"size": [4]} + tf_func = TestSameTransfoms.tf_trans["Resize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['Resize'](**args) + mx_func = TestSameTransfoms.mx_trans["Resize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(mx_result.shape, (4, 4, 3)) - args = {'size': 4, 'interpolation':'test'} + args = {"size": 4, "interpolation": "test"} with self.assertRaises(ValueError): - TestSameTransfoms.tf_trans['Resize'](**args) + TestSameTransfoms.tf_trans["Resize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['Resize'](**args) + TestSameTransfoms.pt_trans["Resize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.mx_trans['Resize'](**args) - + TestSameTransfoms.mx_trans["Resize"](**args) + def testRandomResizedCrop(self): - tf_func = TestSameTransfoms.tf_trans['RandomResizedCrop'](**{'size':[4,5]}) + tf_func = TestSameTransfoms.tf_trans["RandomResizedCrop"](**{"size": [4, 5]}) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['RandomResizedCrop'](**{'size':[4,5]}) + pt_func = TestSameTransfoms.pt_trans["RandomResizedCrop"](**{"size": [4, 5]}) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['RandomResizedCrop'](**{'size':[4,5]}) + mx_func = TestSameTransfoms.mx_trans["RandomResizedCrop"](**{"size": [4, 5]}) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (5,4,3)) - self.assertEqual(pt_result.size, (5,4)) - self.assertEqual(mx_result.shape, (4,5,3)) - + self.assertEqual(tf_result.shape, (5, 4, 3)) + self.assertEqual(pt_result.size, (5, 4)) + self.assertEqual(mx_result.shape, (4, 5, 3)) + tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (4,5,3)) + self.assertEqual(tf_result.shape, (4, 5, 3)) - args = {'size': [4]} - tf_func = TestSameTransfoms.tf_trans['RandomResizedCrop'](**args) + args = {"size": [4]} + tf_func = TestSameTransfoms.tf_trans["RandomResizedCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - mx_func = TestSameTransfoms.mx_trans['RandomResizedCrop'](**args) + self.assertEqual(tf_result.shape, (4, 4, 3)) + mx_func = TestSameTransfoms.mx_trans["RandomResizedCrop"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(mx_result.shape, (4, 4, 3)) - args = {'size': 4} - tf_func = TestSameTransfoms.tf_trans['RandomResizedCrop'](**args) + args = {"size": 4} + tf_func = TestSameTransfoms.tf_trans["RandomResizedCrop"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['RandomResizedCrop'](**args) + pt_func = TestSameTransfoms.pt_trans["RandomResizedCrop"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['RandomResizedCrop'](**args) + mx_func = TestSameTransfoms.mx_trans["RandomResizedCrop"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - self.assertEqual(tf_result.shape, (4,4,3)) - self.assertEqual(pt_result.size, (4,4)) - self.assertEqual(mx_result.shape, (4,4,3)) + self.assertEqual(tf_result.shape, (4, 4, 3)) + self.assertEqual(pt_result.size, (4, 4)) + self.assertEqual(mx_result.shape, (4, 4, 3)) - args = {'size': 4, 'scale':(0.8, 0.2)} + args = {"size": 4, "scale": (0.8, 0.2)} with self.assertRaises(ValueError): - TestSameTransfoms.tf_trans['RandomResizedCrop'](**args) + TestSameTransfoms.tf_trans["RandomResizedCrop"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['RandomResizedCrop'](**args) + TestSameTransfoms.pt_trans["RandomResizedCrop"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.mx_trans['RandomResizedCrop'](**args) - - args = {'size': 4, 'interpolation':'test'} + TestSameTransfoms.mx_trans["RandomResizedCrop"](**args) + + args = {"size": 4, "interpolation": "test"} with self.assertRaises(ValueError): - TestSameTransfoms.tf_trans['RandomResizedCrop'](**args) + TestSameTransfoms.tf_trans["RandomResizedCrop"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['RandomResizedCrop'](**args) + TestSameTransfoms.pt_trans["RandomResizedCrop"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.mx_trans['RandomResizedCrop'](**args) + TestSameTransfoms.mx_trans["RandomResizedCrop"](**args) def testCropResize(self): - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':[5,5]} - tf_func = TestSameTransfoms.tf_trans['CropResize'](**args) + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": [5, 5]} + tf_func = TestSameTransfoms.tf_trans["CropResize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CropResize'](**args) + mx_func = TestSameTransfoms.mx_trans["CropResize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - ox_func = TestSameTransfoms.ox_trans['CropResize'](**args) + ox_func = TestSameTransfoms.ox_trans["CropResize"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['CropResize'](**args) + pt_func = TestSameTransfoms.pt_trans["CropResize"](**args) pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) - self.assertEqual(mx_result.shape, (5,5,3)) - self.assertEqual(ox_result.shape, (5,5,3)) - self.assertEqual(pt_result.size, (5,5)) + self.assertEqual(tf_result.shape, (5, 5, 3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) + self.assertEqual(pt_result.size, (5, 5)) tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':5} - tf_func = TestSameTransfoms.tf_trans['CropResize'](**args) + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": 5} + tf_func = TestSameTransfoms.tf_trans["CropResize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CropResize'](**args) + mx_func = TestSameTransfoms.mx_trans["CropResize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - ox_func = TestSameTransfoms.ox_trans['CropResize'](**args) + ox_func = TestSameTransfoms.ox_trans["CropResize"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) - self.assertEqual(mx_result.shape, (5,5,3)) - self.assertEqual(ox_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':[5]} - tf_func = TestSameTransfoms.tf_trans['CropResize'](**args) + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": [5]} + tf_func = TestSameTransfoms.tf_trans["CropResize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CropResize'](**args) + mx_func = TestSameTransfoms.mx_trans["CropResize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - ox_func = TestSameTransfoms.ox_trans['CropResize'](**args) + ox_func = TestSameTransfoms.ox_trans["CropResize"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) - self.assertEqual(mx_result.shape, (5,5,3)) - self.assertEqual(ox_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':[5,5]} - tf_func = TestSameTransfoms.tf_trans['CropResize'](**args) + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": [5, 5]} + tf_func = TestSameTransfoms.tf_trans["CropResize"](**args) tf_result = tf_func((TestSameTransfoms.img, None))[0] - mx_func = TestSameTransfoms.mx_trans['CropResize'](**args) + mx_func = TestSameTransfoms.mx_trans["CropResize"](**args) mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] - ox_func = TestSameTransfoms.ox_trans['CropResize'](**args) + ox_func = TestSameTransfoms.ox_trans["CropResize"](**args) ox_result = ox_func((TestSameTransfoms.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) - self.assertEqual(mx_result.shape, (5,5,3)) - self.assertEqual(ox_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) + self.assertEqual(mx_result.shape, (5, 5, 3)) + self.assertEqual(ox_result.shape, (5, 5, 3)) - args = {'x':0, 'y':0, 'width':10, 'height':10, 'size':5, 'interpolation':'test'} + args = {"x": 0, "y": 0, "width": 10, "height": 10, "size": 5, "interpolation": "test"} with self.assertRaises(ValueError): - TestSameTransfoms.ox_trans['CropResize'](**args) + TestSameTransfoms.ox_trans["CropResize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.mx_trans['CropResize'](**args) + TestSameTransfoms.mx_trans["CropResize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.tf_trans['CropResize'](**args) + TestSameTransfoms.tf_trans["CropResize"](**args) with self.assertRaises(ValueError): - TestSameTransfoms.pt_trans['CropResize'](**args) + TestSameTransfoms.pt_trans["CropResize"](**args) def testRandomHorizontalFlip(self): - tf_func = TestSameTransfoms.tf_trans['RandomHorizontalFlip']() + tf_func = TestSameTransfoms.tf_trans["RandomHorizontalFlip"]() tf_result = tf_func((TestSameTransfoms.img, None))[0] - ox_func = TestSameTransfoms.ox_trans['RandomHorizontalFlip']() + ox_func = TestSameTransfoms.ox_trans["RandomHorizontalFlip"]() ox_result = ox_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['RandomHorizontalFlip']() + pt_func = TestSameTransfoms.pt_trans["RandomHorizontalFlip"]() pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['RandomHorizontalFlip']() + mx_func = TestSameTransfoms.mx_trans["RandomHorizontalFlip"]() mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] self.assertTrue( - (np.array(TestSameTransfoms.pt_img) == np.array(pt_result)).all() or - (np.fliplr(np.array(TestSameTransfoms.pt_img)) == np.array(pt_result)).all() + (np.array(TestSameTransfoms.pt_img) == np.array(pt_result)).all() + or (np.fliplr(np.array(TestSameTransfoms.pt_img)) == np.array(pt_result)).all() ) self.assertTrue( - (TestSameTransfoms.img == tf_result).all() or - (np.fliplr(TestSameTransfoms.img) == tf_result).all() + (TestSameTransfoms.img == tf_result).all() or (np.fliplr(TestSameTransfoms.img) == tf_result).all() ) self.assertTrue( - (TestSameTransfoms.img == ox_result).all() or - (np.fliplr(TestSameTransfoms.img) == ox_result).all() + (TestSameTransfoms.img == ox_result).all() or (np.fliplr(TestSameTransfoms.img) == ox_result).all() ) self.assertTrue( - (TestSameTransfoms.mx_img.asnumpy() == mx_result.asnumpy()).all() or - (np.fliplr(TestSameTransfoms.mx_img.asnumpy()) == mx_result.asnumpy()).all() + (TestSameTransfoms.mx_img.asnumpy() == mx_result.asnumpy()).all() + or (np.fliplr(TestSameTransfoms.mx_img.asnumpy()) == mx_result.asnumpy()).all() ) - + tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) self.assertTrue( - (TestSameTransfoms.img == tf_result).all() or - (np.fliplr(TestSameTransfoms.img) == tf_result).all() + (TestSameTransfoms.img == tf_result).all() or (np.fliplr(TestSameTransfoms.img) == tf_result).all() ) def testRandomVerticalFlip(self): - tf_func = TestSameTransfoms.tf_trans['RandomVerticalFlip']() + tf_func = TestSameTransfoms.tf_trans["RandomVerticalFlip"]() tf_result = tf_func((TestSameTransfoms.img, None))[0] - ox_func = TestSameTransfoms.ox_trans['RandomVerticalFlip']() + ox_func = TestSameTransfoms.ox_trans["RandomVerticalFlip"]() ox_result = ox_func((TestSameTransfoms.img, None))[0] - pt_func = TestSameTransfoms.pt_trans['RandomVerticalFlip']() + pt_func = TestSameTransfoms.pt_trans["RandomVerticalFlip"]() pt_result = pt_func((TestSameTransfoms.pt_img, None))[0] - mx_func = TestSameTransfoms.mx_trans['RandomVerticalFlip']() + mx_func = TestSameTransfoms.mx_trans["RandomVerticalFlip"]() mx_result = mx_func((TestSameTransfoms.mx_img, None))[0] self.assertTrue( - (np.array(TestSameTransfoms.pt_img) == np.array(pt_result)).all() or - (np.flipud(np.array(TestSameTransfoms.pt_img)) == np.array(pt_result)).all() + (np.array(TestSameTransfoms.pt_img) == np.array(pt_result)).all() + or (np.flipud(np.array(TestSameTransfoms.pt_img)) == np.array(pt_result)).all() ) self.assertTrue( - (TestSameTransfoms.img == tf_result).all() or - (np.flipud(TestSameTransfoms.img) == tf_result).all() + (TestSameTransfoms.img == tf_result).all() or (np.flipud(TestSameTransfoms.img) == tf_result).all() ) self.assertTrue( - (TestSameTransfoms.img == ox_result).all() or - (np.flipud(TestSameTransfoms.img) == ox_result).all() + (TestSameTransfoms.img == ox_result).all() or (np.flipud(TestSameTransfoms.img) == ox_result).all() ) self.assertTrue( - (TestSameTransfoms.mx_img.asnumpy() == mx_result.asnumpy()).all() or - (np.flipud(TestSameTransfoms.mx_img.asnumpy()) == mx_result.asnumpy()).all() + (TestSameTransfoms.mx_img.asnumpy() == mx_result.asnumpy()).all() + or (np.flipud(TestSameTransfoms.mx_img.asnumpy()) == mx_result.asnumpy()).all() ) - + tf_result = tf_func((TestSameTransfoms.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) self.assertTrue( - (TestSameTransfoms.img == tf_result).all() or - (np.flipud(TestSameTransfoms.img) == tf_result).all() + (TestSameTransfoms.img == tf_result).all() or (np.flipud(TestSameTransfoms.img) == tf_result).all() ) + class TestTFTransorm(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img = np.ones([10,10,3]) + cls.img = np.ones([10, 10, 3]) cls.tf_img = tf.constant(cls.img) - cls.transforms = TRANSFORMS('tensorflow', 'preprocess') + cls.transforms = TRANSFORMS("tensorflow", "preprocess") cls.tf_img = tf.constant(cls.img) def testRandomCrop(self): - args = {'size': [50]} - transform = TestTFTransorm.transforms['RandomCrop'](**args) + args = {"size": [50]} + transform = TestTFTransorm.transforms["RandomCrop"](**args) self.assertRaises(ValueError, transform, (TestTFTransorm.img, None)) self.assertRaises(ValueError, transform, (TestTFTransorm.tf_img, None)) - - args = {'size': [5, 5]} - transform = TestTFTransorm.transforms['RandomCrop'](**args) + + args = {"size": [5, 5]} + transform = TestTFTransorm.transforms["RandomCrop"](**args) img_result = transform((TestTFTransorm.img, None))[0] - self.assertEqual(img_result.shape, (5,5,3)) - tf_result = transform((tf.constant(TestTFTransorm.img.reshape((1,10,10,3))), None))[0] + self.assertEqual(img_result.shape, (5, 5, 3)) + tf_result = transform((tf.constant(TestTFTransorm.img.reshape((1, 10, 10, 3))), None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (1,5,5,3)) + self.assertEqual(tf_result.shape, (1, 5, 5, 3)) - args = {'size': [10,10]} - transform = TestTFTransorm.transforms['RandomCrop'](**args) + args = {"size": [10, 10]} + transform = TestTFTransorm.transforms["RandomCrop"](**args) img_result = transform((TestTFTransorm.img, None))[0] - self.assertEqual(img_result.shape, (10,10,3)) + self.assertEqual(img_result.shape, (10, 10, 3)) tf_result = transform((TestTFTransorm.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (10,10,3)) + self.assertEqual(tf_result.shape, (10, 10, 3)) def testPaddedCenterCrop(self): - args = {'size':[4,4]} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": [4, 4]} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (10,10,3)) + self.assertEqual(tf_result.shape, (10, 10, 3)) - args = {'size':[4,4], 'crop_padding': 4} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": [4, 4], "crop_padding": 4} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) - args = {'size':4} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": 4} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (10,10,3)) + self.assertEqual(tf_result.shape, (10, 10, 3)) - args = {'size':4, 'crop_padding':4} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": 4, "crop_padding": 4} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) - args = {'size':[4]} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + args = {"size": [4]} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (10,10,3)) - - args = {'size':[4], 'crop_padding':4} - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + self.assertEqual(tf_result.shape, (10, 10, 3)) + + args = {"size": [4], "crop_padding": 4} + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None))[0] - self.assertEqual(tf_result.shape, (5,5,3)) + self.assertEqual(tf_result.shape, (5, 5, 3)) - args = {'size':[4,5], 'crop_padding':4} + args = {"size": [4, 5], "crop_padding": 4} with self.assertRaises(ValueError): - tf_func = TestTFTransorm.transforms['PaddedCenterCrop'](**args) + tf_func = TestTFTransorm.transforms["PaddedCenterCrop"](**args) tf_result = tf_func((TestTFTransorm.img, None)) def testRescale(self): - transform = TestTFTransorm.transforms['Rescale']() + transform = TestTFTransorm.transforms["Rescale"]() img_result = transform((TestTFTransorm.img, None))[0] - comp_result = np.array(TestTFTransorm.img)/255. + comp_result = np.array(TestTFTransorm.img) / 255.0 self.assertAlmostEqual(img_result[0][0][0], comp_result[0][0][0], places=5) tf_result = transform((TestTFTransorm.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertAlmostEqual(tf_result[0][0][0], comp_result[0][0][0], places=5) + self.assertAlmostEqual(tf_result[0][0][0], comp_result[0][0][0], places=5) def testNormalize(self): - args = {'mean':[0.0,0.0,0.0], 'std':[0.2, 0.5, 0.1]} - normalize = TestTFTransorm.transforms['Normalize'](**args) + args = {"mean": [0.0, 0.0, 0.0], "std": [0.2, 0.5, 0.1]} + normalize = TestTFTransorm.transforms["Normalize"](**args) img_result = normalize((TestTFTransorm.img, None))[0] - comp_result = np.array(TestTFTransorm.img)/[0.2, 0.5, 0.1] + comp_result = np.array(TestTFTransorm.img) / [0.2, 0.5, 0.1] self.assertAlmostEqual(img_result[0][0][0], comp_result[0][0][0], places=5) self.assertAlmostEqual(img_result[0][0][1], comp_result[0][0][1], places=5) self.assertAlmostEqual(img_result[0][0][2], comp_result[0][0][2], places=5) - + tf_result = normalize((TestTFTransorm.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) self.assertAlmostEqual(tf_result[0][0][0], comp_result[0][0][0], places=5) - args = {'mean':[0.0,0.0,0.0], 'std':[0, 0, 0]} + args = {"mean": [0.0, 0.0, 0.0], "std": [0, 0, 0]} with self.assertRaises(ValueError): TestTFTransorm.transforms["Normalize"](**args) def testRandomResizedCrop(self): - args = {'size':[50]} + args = {"size": [50]} randomresizedcrop = TestTFTransorm.transforms["RandomResizedCrop"](**args) - compose = TestTFTransorm.transforms['Compose']([randomresizedcrop]) + compose = TestTFTransorm.transforms["Compose"]([randomresizedcrop]) image_result = compose((TestTFTransorm.img, None))[0] - self.assertEqual(image_result.shape, (50,50,3)) - args = {'size':[100, 100]} + self.assertEqual(image_result.shape, (50, 50, 3)) + args = {"size": [100, 100]} randomresizedcrop = TestTFTransorm.transforms["RandomResizedCrop"](**args) - compose = TestTFTransorm.transforms['Compose']([randomresizedcrop]) + compose = TestTFTransorm.transforms["Compose"]([randomresizedcrop]) image_result = compose((TestTFTransorm.img, None))[0] - self.assertEqual(image_result.shape, (100,100,3)) + self.assertEqual(image_result.shape, (100, 100, 3)) tf_result = randomresizedcrop((TestTFTransorm.tf_img, None))[0] tf_result = tf_result.eval(session=tf.compat.v1.Session()) - self.assertEqual(tf_result.shape, (100,100,3)) - args = {'size':[100, 100], 'scale':(0.8, 0.1)} + self.assertEqual(tf_result.shape, (100, 100, 3)) + args = {"size": [100, 100], "scale": (0.8, 0.1)} with self.assertRaises(ValueError): TestTFTransorm.transforms["RandomResizedCrop"](**args) def testSquadV1(self): - import urllib import json - vocab_url = "https://raw.githubusercontent.com/microsoft/SDNet/master/bert_vocab_files/bert-large-uncased-vocab.txt" + import urllib + + vocab_url = ( + "https://raw.githubusercontent.com/microsoft/SDNet/master/bert_vocab_files/bert-large-uncased-vocab.txt" + ) urllib.request.urlretrieve(vocab_url, "./vocab.txt") - label = [{ - "paragraphs":[ - {'context': - 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.', - 'qas': [{ - 'answers': [ - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}], - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', - 'id': '56be4db0acb8001400a502ec'}] - } - ] - }] - fake_json = json.dumps({'data': label}) - with open('dev.json', 'w') as f: + label = [ + { + "paragraphs": [ + { + "context": "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.", + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ], + } + ] + } + ] + fake_json = json.dumps({"data": label}) + with open("dev.json", "w") as f: f.write(fake_json) - args = { - 'label_file': './dev.json', - 'vocab_file': './vocab.txt' - } - post_transforms = TRANSFORMS('tensorflow', 'postprocess') - squadv1 = post_transforms['SquadV1'](**args) - + args = {"label_file": "./dev.json", "vocab_file": "./vocab.txt"} + post_transforms = TRANSFORMS("tensorflow", "postprocess") + squadv1 = post_transforms["SquadV1"](**args) + preds_0 = np.array([1000000000]) - preds_1 = np.random.uniform(low=-12.3, high=6.8, size=(1,384)) - preds_2 = np.random.uniform(low=-10.8, high=7.4, size=(1,384)) + preds_1 = np.random.uniform(low=-12.3, high=6.8, size=(1, 384)) + preds_2 = np.random.uniform(low=-10.8, high=7.4, size=(1, 384)) preds = [preds_0, preds_1, preds_2] result = squadv1((preds, label)) - self.assertTrue(result[1][0]['paragraphs'][0]['qas'][0]['id'] in result[0]) - os.remove('dev.json') - os.remove('vocab.txt') - + self.assertTrue(result[1][0]["paragraphs"][0]["qas"][0]["id"] in result[0]) + os.remove("dev.json") + os.remove("vocab.txt") + + class TestAlignImageChannel(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img1 = np.random.random_sample([100,100,3]) * 255 - cls.img2 = np.random.random_sample([100,100]) * 255 - cls.img3 = np.random.random_sample([100,100,4]) * 255 + cls.img1 = np.random.random_sample([100, 100, 3]) * 255 + cls.img2 = np.random.random_sample([100, 100]) * 255 + cls.img3 = np.random.random_sample([100, 100, 4]) * 255 cls.pt_img1 = Image.fromarray(cls.img1.astype(np.uint8)) cls.pt_img2 = Image.fromarray(cls.img2.astype(np.uint8)) cls.pt_img3 = Image.fromarray(cls.img3.astype(np.uint8)) - + def testTensorflow(self): - transforms = TRANSFORMS('tensorflow', 'preprocess') - align = transforms['AlignImageChannel'](**{'dim':1}) + transforms = TRANSFORMS("tensorflow", "preprocess") + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img1.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':1}) + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img2.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':3}) + align = transforms["AlignImageChannel"](**{"dim": 3}) image, _ = align((TestAlignImageChannel.img3.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 3) - align = transforms['AlignImageChannel'](**{'dim':2}) - self.assertRaises(ValueError, align, - (TestAlignImageChannel.img1.astype(np.uint8), None)) + align = transforms["AlignImageChannel"](**{"dim": 2}) + self.assertRaises(ValueError, align, (TestAlignImageChannel.img1.astype(np.uint8), None)) with self.assertRaises(ValueError): - transforms['AlignImageChannel'](**{'dim':5}) + transforms["AlignImageChannel"](**{"dim": 5}) def testONNX(self): - transforms = TRANSFORMS('onnxrt_qlinearops', 'preprocess') - align = transforms['AlignImageChannel'](**{'dim':1}) + transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img1.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':1}) + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img2.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':3}) + align = transforms["AlignImageChannel"](**{"dim": 3}) image, _ = align((TestAlignImageChannel.img3.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 3) - align = transforms['AlignImageChannel'](**{'dim':2}) - self.assertRaises(ValueError, align, - (TestAlignImageChannel.img1.astype(np.uint8), None)) + align = transforms["AlignImageChannel"](**{"dim": 2}) + self.assertRaises(ValueError, align, (TestAlignImageChannel.img1.astype(np.uint8), None)) with self.assertRaises(ValueError): - transforms['AlignImageChannel'](**{'dim':5}) + transforms["AlignImageChannel"](**{"dim": 5}) def testPyTorch(self): - transforms = TRANSFORMS('pytorch', 'preprocess') - align = transforms['AlignImageChannel'](**{'dim':1}) + transforms = TRANSFORMS("pytorch", "preprocess") + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.pt_img1, None)) - self.assertEqual(image.mode, 'L') + self.assertEqual(image.mode, "L") - align = transforms['AlignImageChannel'](**{'dim':1}) + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.pt_img2, None)) - self.assertEqual(image.mode, 'L') + self.assertEqual(image.mode, "L") - align = transforms['AlignImageChannel'](**{'dim':3}) + align = transforms["AlignImageChannel"](**{"dim": 3}) image, _ = align((TestAlignImageChannel.pt_img3, None)) - self.assertEqual(image.mode, 'RGB') + self.assertEqual(image.mode, "RGB") with self.assertRaises(ValueError): - align = transforms['AlignImageChannel'](**{'dim':2}) + align = transforms["AlignImageChannel"](**{"dim": 2}) with self.assertRaises(ValueError): - transforms['AlignImageChannel'](**{'dim':5}) + transforms["AlignImageChannel"](**{"dim": 5}) @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def testMXNet(self): - transforms = TRANSFORMS('mxnet', 'preprocess') - align = transforms['AlignImageChannel'](**{'dim':1}) + transforms = TRANSFORMS("mxnet", "preprocess") + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img1.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':1}) + align = transforms["AlignImageChannel"](**{"dim": 1}) image, _ = align((TestAlignImageChannel.img2.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 1) - align = transforms['AlignImageChannel'](**{'dim':3}) + align = transforms["AlignImageChannel"](**{"dim": 3}) image, _ = align((TestAlignImageChannel.img3.astype(np.uint8), None)) self.assertEqual(image.shape[-1], 3) - align = transforms['AlignImageChannel'](**{'dim':2}) - self.assertRaises(ValueError, align, - (TestAlignImageChannel.img1.astype(np.uint8), None)) + align = transforms["AlignImageChannel"](**{"dim": 2}) + self.assertRaises(ValueError, align, (TestAlignImageChannel.img1.astype(np.uint8), None)) with self.assertRaises(ValueError): - transforms['AlignImageChannel'](**{'dim':5}) + transforms["AlignImageChannel"](**{"dim": 5}) + class TestToArray(unittest.TestCase): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def testParse(self): - random_array = np.random.random_sample([10,10,3]) * 255 + random_array = np.random.random_sample([10, 10, 3]) * 255 random_array = random_array.astype(np.uint8) img1 = Image.fromarray(random_array) - onnx_transforms = TRANSFORMS('onnxrt_qlinearops', 'preprocess') - onnx_parse = onnx_transforms['ToArray']() + onnx_transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") + onnx_parse = onnx_transforms["ToArray"]() img, _ = onnx_parse((img1, None)) self.assertTrue(isinstance(img, np.ndarray)) - mxnet_transforms = TRANSFORMS('mxnet', 'preprocess') - mxnet_parse = mxnet_transforms['ToArray']() + mxnet_transforms = TRANSFORMS("mxnet", "preprocess") + mxnet_parse = mxnet_transforms["ToArray"]() img, _ = mxnet_parse((mx.nd.array(random_array), None)) self.assertTrue(isinstance(img, np.ndarray)) - self.assertRaises(ValueError, mxnet_parse, ([1,2], None)) + self.assertRaises(ValueError, mxnet_parse, ([1, 2], None)) + class TestMXNetTransform(unittest.TestCase): @classmethod def setUpClass(cls): if platform.system().lower() == "windows": cls.skipTest(cls, "not support mxnet on windows yet") - array = np.random.random_sample([100,100,3]) * 255 + array = np.random.random_sample([100, 100, 3]) * 255 cls.img = mx.nd.array(array) - cls.transforms = TRANSFORMS('mxnet', 'preprocess') + cls.transforms = TRANSFORMS("mxnet", "preprocess") def testRandomCrop(self): - args = {'size':[50]} + args = {"size": [50]} randomcrop = TestMXNetTransform.transforms["RandomCrop"](**args) - compose = TestMXNetTransform.transforms['Compose']([randomcrop]) + compose = TestMXNetTransform.transforms["Compose"]([randomcrop]) image_result = compose((TestMXNetTransform.img, None)) - self.assertEqual(image_result[0].shape, (50,50,3)) + self.assertEqual(image_result[0].shape, (50, 50, 3)) def testNormalize(self): - args = {'mean':[0.0,0.0,0.0], 'std':[0.29, 0.24, 0.25]} - normalize = TestMXNetTransform.transforms['Normalize'](**args) + args = {"mean": [0.0, 0.0, 0.0], "std": [0.29, 0.24, 0.25]} + normalize = TestMXNetTransform.transforms["Normalize"](**args) image_result = normalize((TestMXNetTransform.img, None)) - self.assertAlmostEqual(image_result[0].asnumpy()[0][0][0], - (TestMXNetTransform.img.asnumpy()/[0.29])[0][0][0], places=3) + self.assertAlmostEqual( + image_result[0].asnumpy()[0][0][0], (TestMXNetTransform.img.asnumpy() / [0.29])[0][0][0], places=3 + ) + class TestONNXTransfrom(unittest.TestCase): @classmethod def setUpClass(cls): - cls.img = np.random.random_sample([100,100,3]) * 255 - cls.transforms = TRANSFORMS('onnxrt_qlinearops', 'preprocess') + cls.img = np.random.random_sample([100, 100, 3]) * 255 + cls.transforms = TRANSFORMS("onnxrt_qlinearops", "preprocess") def testResize(self): - args = {'size':[224]} - resize = TestONNXTransfrom.transforms['Resize'](**args) - compose = TestONNXTransfrom.transforms['Compose']([resize]) + args = {"size": [224]} + resize = TestONNXTransfrom.transforms["Resize"](**args) + compose = TestONNXTransfrom.transforms["Compose"]([resize]) image_result = compose((self.img, None)) - self.assertEqual(image_result[0].shape, (224,224,3)) - args = {'size':[100, 100], 'interpolation':'test'} + self.assertEqual(image_result[0].shape, (224, 224, 3)) + args = {"size": [100, 100], "interpolation": "test"} with self.assertRaises(ValueError): - TestONNXTransfrom.transforms['Resize'](**args) + TestONNXTransfrom.transforms["Resize"](**args) - args = {'size':224} - resize = TestONNXTransfrom.transforms['Resize'](**args) - compose = TestONNXTransfrom.transforms['Compose']([resize]) + args = {"size": 224} + resize = TestONNXTransfrom.transforms["Resize"](**args) + compose = TestONNXTransfrom.transforms["Compose"]([resize]) image_result = compose((self.img, None)) - self.assertEqual(image_result[0].shape, (224,224,3)) - - args = {'size':[224,224]} - resize = TestONNXTransfrom.transforms['Resize'](**args) - compose = TestONNXTransfrom.transforms['Compose']([resize]) + self.assertEqual(image_result[0].shape, (224, 224, 3)) + + args = {"size": [224, 224]} + resize = TestONNXTransfrom.transforms["Resize"](**args) + compose = TestONNXTransfrom.transforms["Compose"]([resize]) image_result = compose((self.img, None)) - self.assertEqual(image_result[0].shape, (224,224,3)) - + self.assertEqual(image_result[0].shape, (224, 224, 3)) + def testNormalize(self): - args = {'mean':[0.0,0.0,0.0], 'std':[0.29, 0.24, 0.25]} - normalize = TestONNXTransfrom.transforms['Normalize'](**args) - compose = TestONNXTransfrom.transforms['Compose']([normalize]) + args = {"mean": [0.0, 0.0, 0.0], "std": [0.29, 0.24, 0.25]} + normalize = TestONNXTransfrom.transforms["Normalize"](**args) + compose = TestONNXTransfrom.transforms["Compose"]([normalize]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertTrue( - (image_result[0] == np.array(TestONNXTransfrom.img)/[0.29, 0.24, 0.25]).all()) + self.assertTrue((image_result[0] == np.array(TestONNXTransfrom.img) / [0.29, 0.24, 0.25]).all()) - args = {'mean':[0.0,0.0,0.0], 'std':[0,0,0]} + args = {"mean": [0.0, 0.0, 0.0], "std": [0, 0, 0]} with self.assertRaises(ValueError): TestONNXTransfrom.transforms["Normalize"](**args) def testRandomCrop(self): - args = {'size':[50]} + args = {"size": [50]} randomcrop = TestONNXTransfrom.transforms["RandomCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([randomcrop]) + compose = TestONNXTransfrom.transforms["Compose"]([randomcrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (50,50,3)) - args = {'size':[1000, 1000]} + self.assertEqual(image_result[0].shape, (50, 50, 3)) + args = {"size": [1000, 1000]} with self.assertRaises(ValueError): trans = TestONNXTransfrom.transforms["RandomCrop"](**args) trans((TestONNXTransfrom.img, None)) - args = {'size':50} + args = {"size": 50} randomcrop = TestONNXTransfrom.transforms["RandomCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([randomcrop]) + compose = TestONNXTransfrom.transforms["Compose"]([randomcrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (50,50,3)) - - args = {'size':[100,100]} + self.assertEqual(image_result[0].shape, (50, 50, 3)) + + args = {"size": [100, 100]} randomcrop = TestONNXTransfrom.transforms["RandomCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([randomcrop]) + compose = TestONNXTransfrom.transforms["Compose"]([randomcrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (100,100,3)) - + self.assertEqual(image_result[0].shape, (100, 100, 3)) + def testCenterCrop(self): - args = {'size':[100]} + args = {"size": [100]} centercrop = TestONNXTransfrom.transforms["CenterCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([centercrop]) + compose = TestONNXTransfrom.transforms["Compose"]([centercrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (100,100,3)) - args = {'size': 5} + self.assertEqual(image_result[0].shape, (100, 100, 3)) + args = {"size": 5} centercrop = TestONNXTransfrom.transforms["CenterCrop"](**args) image_result = centercrop((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (5,5,3)) - args = {'size': [5, 6]} + self.assertEqual(image_result[0].shape, (5, 5, 3)) + args = {"size": [5, 6]} centercrop = TestONNXTransfrom.transforms["CenterCrop"](**args) image_result = centercrop((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (5,6,3)) - args = {'size':[150]} + self.assertEqual(image_result[0].shape, (5, 6, 3)) + args = {"size": [150]} centercrop = TestONNXTransfrom.transforms["CenterCrop"](**args) with self.assertRaises(ValueError): centercrop((TestONNXTransfrom.img, None)) def testRandomResizedCrop(self): - args = {'size':[150]} + args = {"size": [150]} randomresizedcrop = TestONNXTransfrom.transforms["RandomResizedCrop"](**args) - compose = TestONNXTransfrom.transforms['Compose']([randomresizedcrop]) + compose = TestONNXTransfrom.transforms["Compose"]([randomresizedcrop]) image_result = compose((TestONNXTransfrom.img, None)) - self.assertEqual(image_result[0].shape, (150,150,3)) - args = {'size':[150, 150], 'scale':(0.9, 0.3)} + self.assertEqual(image_result[0].shape, (150, 150, 3)) + args = {"size": [150, 150], "scale": (0.9, 0.3)} with self.assertRaises(ValueError): TestONNXTransfrom.transforms["RandomResizedCrop"](**args) - args = {'size':150, 'interpolation':'test'} + args = {"size": 150, "interpolation": "test"} with self.assertRaises(ValueError): TestONNXTransfrom.transforms["RandomResizedCrop"](**args) + class TestImagenetTransform(unittest.TestCase): def testParseDecodeImagenet(self): - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpeg') + im.save("test.jpeg") - image = tf.compat.v1.gfile.FastGFile('test.jpeg','rb').read() + image = tf.compat.v1.gfile.FastGFile("test.jpeg", "rb").read() label = 10 - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/class/label': tf.train.Feature( - int64_list=tf.train.Int64List(value=[label])), - 'image/object/bbox/xmin': tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/ymin': tf.train.Feature( - float_list=tf.train.FloatList(value=[20])), - 'image/object/bbox/xmax': tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - 'image/object/bbox/ymax': tf.train.Feature( - float_list=tf.train.FloatList(value=[200])), - })) - with tf.io.TFRecordWriter('test-0-of-0') as writer: + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/class/label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[20])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[200])), + } + ) + ) + with tf.io.TFRecordWriter("test-0-of-0") as writer: writer.write(example.SerializeToString()) - eval_dataset = create_dataset( - 'tensorflow', {'ImageRecord':{'root':'./'}}, {'ParseDecodeImagenet':{}}, None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) + eval_dataset = create_dataset("tensorflow", {"ImageRecord": {"root": "./"}}, {"ParseDecodeImagenet": {}}, None) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) self.assertEqual(labels[0][0], 10) break from neural_compressor.experimental.data.transforms.imagenet_transform import ParseDecodeImagenet + func = ParseDecodeImagenet() out = func(example.SerializeToString()) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100,100,3)) + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100, 100, 3)) from neural_compressor.experimental.data.datasets.dataset import TensorflowTFRecordDataset - ds = TensorflowTFRecordDataset('test-0-of-0', func) - dataloader = DATALOADERS['tensorflow'](dataset=ds, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) + + ds = TensorflowTFRecordDataset("test-0-of-0", func) + dataloader = DATALOADERS["tensorflow"](dataset=ds, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) self.assertEqual(labels[0][0], 10) break - os.remove('test-0-of-0') - os.remove('test.jpeg') + os.remove("test-0-of-0") + os.remove("test.jpeg") + class TestCOCOTransform(unittest.TestCase): def testCOCODecode(self): - tf.compat.v1.disable_eager_execution() + tf.compat.v1.disable_eager_execution() - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpeg') - - image = tf.compat.v1.gfile.FastGFile('test.jpeg','rb').read() - source_id = '000000397133.jpg'.encode('utf-8') - label = 'person'.encode('utf-8') - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/object/class/text':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[label])), - 'image/source_id':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[source_id])), - 'image/object/bbox/xmin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/ymin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/xmax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - 'image/object/bbox/ymax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - })) - - with tf.io.TFRecordWriter('test.record') as writer: + im.save("test.jpeg") + + image = tf.compat.v1.gfile.FastGFile("test.jpeg", "rb").read() + source_id = "000000397133.jpg".encode("utf-8") + label = "person".encode("utf-8") + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/object/class/text": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label])), + "image/source_id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[source_id])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + } + ) + ) + + with tf.io.TFRecordWriter("test.record") as writer: writer.write(example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', {'COCORecord':{'root':'test.record'}}, - {'ParseDecodeCoco':{}, 'Resize': {'size': 50}, 'Cast':{'dtype':'int64'}, - 'CropToBoundingBox':{'offset_height':2, 'offset_width':2, 'target_height':5, 'target_width':5}, - 'CenterCrop':{'size':[4,4]}, - 'RandomResizedCrop':{'size':[4,5]}, - }, None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,4,5,3)) - self.assertEqual(labels[0].shape, (1,1,4)) + "tensorflow", + {"COCORecord": {"root": "test.record"}}, + { + "ParseDecodeCoco": {}, + "Resize": {"size": 50}, + "Cast": {"dtype": "int64"}, + "CropToBoundingBox": {"offset_height": 2, "offset_width": 2, "target_height": 5, "target_width": 5}, + "CenterCrop": {"size": [4, 4]}, + "RandomResizedCrop": {"size": [4, 5]}, + }, + None, + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 4, 5, 3)) + self.assertEqual(labels[0].shape, (1, 1, 4)) - from neural_compressor.experimental.data.transforms.transform import TensorflowResizeWithRatio from neural_compressor.experimental.data.datasets.coco_dataset import ParseDecodeCoco + from neural_compressor.experimental.data.transforms.transform import TensorflowResizeWithRatio + func = ParseDecodeCoco() out = func(example.SerializeToString()) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100,100,3)) + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100, 100, 3)) func = ParseDecodeCoco() out = func(example.SerializeToString()) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100,100,3)) + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100, 100, 3)) - func = TensorflowResizeWithRatio(**{'padding':True}) + func = TensorflowResizeWithRatio(**{"padding": True}) out = func(out) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (1365,1365,3)) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image])), - 'image/source_id':tf.train.Feature( - bytes_list=tf.train.BytesList(value=[source_id])), - 'image/object/bbox/xmin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/ymin':tf.train.Feature( - float_list=tf.train.FloatList(value=[10])), - 'image/object/bbox/xmax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - 'image/object/bbox/ymax':tf.train.Feature( - float_list=tf.train.FloatList(value=[100])), - })) - - with tf.io.TFRecordWriter('test2.record') as writer: + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (1365, 1365, 3)) + + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])), + "image/source_id": tf.train.Feature(bytes_list=tf.train.BytesList(value=[source_id])), + "image/object/bbox/xmin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/ymin": tf.train.Feature(float_list=tf.train.FloatList(value=[10])), + "image/object/bbox/xmax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + "image/object/bbox/ymax": tf.train.Feature(float_list=tf.train.FloatList(value=[100])), + } + ) + ) + + with tf.io.TFRecordWriter("test2.record") as writer: writer.write(example.SerializeToString()) - self.assertRaises(ValueError, create_dataset, - 'tensorflow', {'COCORecord':{'root':'test2.record'}}, None, None) + self.assertRaises( + ValueError, create_dataset, "tensorflow", {"COCORecord": {"root": "test2.record"}}, None, None + ) + + os.remove("test2.record") + os.remove("test.record") + os.remove("test.jpeg") - os.remove('test2.record') - os.remove('test.record') - os.remove('test.jpeg') class TestVOCTransform(unittest.TestCase): def testVOCDecode(self): import shutil - tf.compat.v1.disable_eager_execution() + + tf.compat.v1.disable_eager_execution() def _bytes_list_feature(values): import six + def norm2bytes(value): return value.encode() if isinstance(value, str) and six.PY3 else value - return tf.train.Feature( - bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) + + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[norm2bytes(values)])) def _int64_list_feature(values): import collections import collections.abc + if not isinstance(values, collections.abc.Iterable): values = [values] return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) - random_array = np.random.random_sample([100,100,3]) * 255 + random_array = np.random.random_sample([100, 100, 3]) * 255 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.jpg') - random_array = np.random.random_sample([100,100,3]) * 0 + im.save("test.jpg") + random_array = np.random.random_sample([100, 100, 3]) * 0 random_array = random_array.astype(np.uint8) im = Image.fromarray(random_array) - im.save('test.png') - image_data = tf.compat.v1.gfile.GFile('test.jpg', 'rb').read() - seg_data = tf.compat.v1.gfile.GFile('test.png', 'rb').read() - filename = 'test' - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': _bytes_list_feature(image_data), - 'image/filename': _bytes_list_feature(filename), - 'image/format': _bytes_list_feature('png'), - 'image/height': _int64_list_feature(100), - 'image/width': _int64_list_feature(100), - 'image/channels': _int64_list_feature(3), - 'image/segmentation/class/encoded': ( - _bytes_list_feature(seg_data)), - 'image/segmentation/class/format': _bytes_list_feature('png'), - })) - - if not os.path.exists('./test_record'): - os.mkdir('./test_record') - with tf.io.TFRecordWriter('./test_record/val-test.record') as writer: + im.save("test.png") + image_data = tf.compat.v1.gfile.GFile("test.jpg", "rb").read() + seg_data = tf.compat.v1.gfile.GFile("test.png", "rb").read() + filename = "test" + + example = tf.train.Example( + features=tf.train.Features( + feature={ + "image/encoded": _bytes_list_feature(image_data), + "image/filename": _bytes_list_feature(filename), + "image/format": _bytes_list_feature("png"), + "image/height": _int64_list_feature(100), + "image/width": _int64_list_feature(100), + "image/channels": _int64_list_feature(3), + "image/segmentation/class/encoded": (_bytes_list_feature(seg_data)), + "image/segmentation/class/format": _bytes_list_feature("png"), + } + ) + ) + + if not os.path.exists("./test_record"): + os.mkdir("./test_record") + with tf.io.TFRecordWriter("./test_record/val-test.record") as writer: writer.write(example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', {'VOCRecord':{'root':'./test_record'}}, {'ParseDecodeVoc':{}}, None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=1) - for (inputs, labels) in dataloader: - self.assertEqual(inputs.shape, (1,100,100,3)) - self.assertEqual(labels[0].shape, (100,100,1)) + "tensorflow", {"VOCRecord": {"root": "./test_record"}}, {"ParseDecodeVoc": {}}, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=1) + for inputs, labels in dataloader: + self.assertEqual(inputs.shape, (1, 100, 100, 3)) + self.assertEqual(labels[0].shape, (100, 100, 1)) from neural_compressor.experimental.data.transforms.transform import ParseDecodeVocTransform + func = ParseDecodeVocTransform() out = func(example.SerializeToString()) - self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100,100,3)) + self.assertEqual(out[0].eval(session=tf.compat.v1.Session()).shape, (100, 100, 3)) + + os.remove("./test_record/val-test.record") + os.remove("test.jpg") + os.remove("test.png") + shutil.rmtree("./test_record") - os.remove('./test_record/val-test.record') - os.remove('test.jpg') - os.remove('test.png') - shutil.rmtree('./test_record') if __name__ == "__main__": unittest.main() diff --git a/test/distillation/test_distillation_1.x.py b/test/distillation/test_distillation_1.x.py index 32fc28504af..d754b6dea88 100644 --- a/test/distillation/test_distillation_1.x.py +++ b/test/distillation/test_distillation_1.x.py @@ -2,14 +2,17 @@ import os import shutil import unittest + +import tensorflow as tf import torch -import torchvision import torch.nn as nn -import tensorflow as tf -from neural_compressor.data import Datasets +import torchvision + +from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig +from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 + def build_fake_yaml(): fake_yaml = """ @@ -51,9 +54,10 @@ def build_fake_yaml(): shape: [128, 3, 224, 224] label: True """ - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_fake_yaml_1(): fake_yaml = """ model: @@ -94,9 +98,10 @@ def build_fake_yaml_1(): shape: [128, 224, 224, 3] label: True """ - with open('fake_1.yaml', 'w', encoding="utf-8") as f: + with open("fake_1.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_fake_yaml_2(): fake_yaml = """ model: @@ -139,11 +144,11 @@ def build_fake_yaml_2(): shape: [128, 3, 224, 224] label: True """ - with open('fake_2.yaml', 'w', encoding="utf-8") as f: + with open("fake_2.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) -class TestDistillation(unittest.TestCase): +class TestDistillation(unittest.TestCase): student_model = torchvision.models.resnet18() teacher_model = torchvision.models.resnet34() @@ -158,54 +163,57 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - os.remove('fake_1.yaml') - os.remove('fake_2.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fake.yaml") + os.remove("fake_1.yaml") + os.remove("fake_2.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_distillation(self): - from neural_compressor.experimental import Distillation from neural_compressor.conf.config import DistillationConf - conf = DistillationConf('fake.yaml') + from neural_compressor.experimental import Distillation + + conf = DistillationConf("fake.yaml") distiller = Distillation(conf) distiller = Distillation() from neural_compressor.conf.config import conf - conf.model.framework = 'pytorch' + + conf.model.framework = "pytorch" conf.distillation.train.end_epoch = 3 conf.distillation.train.iteration = 10 conf.distillation.train.optimizer = { - 'SGD': {'learning_rate': 0.001, 'momentum': 0.1, 'nesterov': True, 'weight_decay': 0.001}} + "SGD": {"learning_rate": 0.001, "momentum": 0.1, "nesterov": True, "weight_decay": 0.001} + } conf.distillation.train.dataloader.batch_size = 30 - conf.distillation.train.dataloader.dataset = {'dummy': {'shape': [128, 3, 224, 224], 'label': True}} + conf.distillation.train.dataloader.dataset = {"dummy": {"shape": [128, 3, 224, 224], "label": True}} conf.evaluation.accuracy.dataloader.batch_size = 30 - conf.evaluation.accuracy.dataloader.dataset = {'dummy': {'shape': [128, 3, 224, 224], 'label': True}} + conf.evaluation.accuracy.dataloader.dataset = {"dummy": {"shape": [128, 3, 224, 224], "label": True}} distiller = Distillation(conf) distiller.student_model = self.student_model distiller.teacher_model = self.teacher_model - print('student model: {}'.format(distiller.student_model)) + print("student model: {}".format(distiller.student_model)) distilled_model = distiller.fit() - distilled_model.save('./saved') - stat = torch.load('./saved/best_model.pt') + distilled_model.save("./saved") + stat = torch.load("./saved/best_model.pt") self.student_model.load_state_dict(stat) def test_distillation_intermediate_layers(self): - from neural_compressor.experimental import Distillation, common from neural_compressor.conf.config import DistillationConf - conf = DistillationConf('fake_2.yaml') - conf.usr_cfg.distillation.train.criterion.\ - IntermediateLayersKnowledgeDistillationLoss.layer_mappings[1][1][-1] = \ - lambda x: x[:, :2,...] + from neural_compressor.experimental import Distillation, common + + conf = DistillationConf("fake_2.yaml") + conf.usr_cfg.distillation.train.criterion.IntermediateLayersKnowledgeDistillationLoss.layer_mappings[1][1][ + -1 + ] = lambda x: x[:, :2, ...] distiller = Distillation(conf) distiller.student_model = common.Model(self.student_model) distiller.teacher_model = common.Model(self.teacher_model) - print('student model: {}'.format(distiller.student_model)) + print("student model: {}".format(distiller.student_model)) _ = distiller.fit() def test_distillation_external(self): - from neural_compressor.experimental.common.criterion import \ - TensorflowKnowledgeDistillationLossExternal + from neural_compressor.experimental.common.criterion import TensorflowKnowledgeDistillationLossExternal criterion = TensorflowKnowledgeDistillationLossExternal() criterion.teacher_model_forward(None) @@ -216,12 +224,13 @@ def test_distillation_external(self): def test_distillation_external_new_API(self): from neural_compressor.training import prepare_compression - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) + + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(100, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) criterion = nn.CrossEntropyLoss() - distillation_criterion = KnowledgeDistillationLossConfig(loss_types=['CE', 'KL']) + distillation_criterion = KnowledgeDistillationLossConfig(loss_types=["CE", "KL"]) optimizer = torch.optim.SGD(self.student_model.parameters(), lr=0.0001) conf = DistillationConfig(self.teacher_model, distillation_criterion) compression_manager = prepare_compression(copy.deepcopy(self.student_model), conf) @@ -235,7 +244,7 @@ def test_distillation_external_new_API(self): compression_manager.callbacks.on_epoch_begin(nepoch) for image, target in dummy_dataloader: compression_manager.callbacks.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -248,21 +257,23 @@ def test_distillation_external_new_API(self): break compression_manager.callbacks.on_epoch_end() - model.save('./saved') - stat = torch.load('./saved/best_model.pt') + model.save("./saved") + stat = torch.load("./saved/best_model.pt") opt_model = self.student_model.load_state_dict(stat) - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.3.0'), " keras requires higher version than tf-2.3.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.3.0"), " keras requires higher version than tf-2.3.0") def test_tf_distillation(self): - from neural_compressor.experimental import Distillation from neural_compressor.conf.config import DistillationConf - conf = DistillationConf('fake_1.yaml') + from neural_compressor.experimental import Distillation + + conf = DistillationConf("fake_1.yaml") distiller = Distillation(conf) - distiller = Distillation('fake_1.yaml') + distiller = Distillation("fake_1.yaml") distiller.student_model = self.student_model_tf distiller.teacher_model = self.teacher_model_tf - print('student model: {}'.format(distiller.student_model)) + print("student model: {}".format(distiller.student_model)) _ = distiller.fit() + if __name__ == "__main__": unittest.main() diff --git a/test/distillation/test_distillation_2.x.py b/test/distillation/test_distillation_2.x.py index 1a263aa5e07..431baf98193 100644 --- a/test/distillation/test_distillation_2.x.py +++ b/test/distillation/test_distillation_2.x.py @@ -1,30 +1,35 @@ import copy +import datetime import os import shutil import unittest + +import tensorflow as tf import torch -import datetime -import torchvision import torch.nn as nn -import tensorflow as tf +import torchvision + from neural_compressor.adaptor import FRAMEWORKS -from neural_compressor.conf.dotdict import DotDict -from neural_compressor.utils import create_obj_from_config from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 -from neural_compressor.config import DistillationConfig, \ - KnowledgeDistillationLossConfig, IntermediateLayersKnowledgeDistillationLossConfig +from neural_compressor.conf.dotdict import DotDict +from neural_compressor.config import ( + DistillationConfig, + IntermediateLayersKnowledgeDistillationLossConfig, + KnowledgeDistillationLossConfig, +) from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader from neural_compressor.training import prepare_compression +from neural_compressor.utils import create_obj_from_config -class TestDistillation(unittest.TestCase): +class TestDistillation(unittest.TestCase): student_model = torchvision.models.resnet18() teacher_model = torchvision.models.resnet34() - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(100, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) @classmethod @@ -33,12 +38,12 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_distillation(self): criterion = nn.CrossEntropyLoss() - distillation_criterion_conf = KnowledgeDistillationLossConfig(loss_types=['CE', 'KL']) + distillation_criterion_conf = KnowledgeDistillationLossConfig(loss_types=["CE", "KL"]) optimizer = torch.optim.SGD(self.student_model.parameters(), lr=0.0001) conf = DistillationConfig(self.teacher_model, distillation_criterion_conf) @@ -53,7 +58,7 @@ def test_distillation(self): cnt = 0 for image, target in self.dummy_dataloader: compression_manager.callbacks.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -65,23 +70,29 @@ def test_distillation(self): break compression_manager.callbacks.on_epoch_end() - model.save('./saved') - stat = torch.load('./saved/best_model.pt') + model.save("./saved") + stat = torch.load("./saved/best_model.pt") opt_model = self.student_model.load_state_dict(stat) def test_distillation_intermediate_layers(self): criterion = nn.CrossEntropyLoss() distillation_criterion_conf = IntermediateLayersKnowledgeDistillationLossConfig( layer_mappings=[ - ['', ], - ['layer1.0', ], - [['layer1.1.conv1', ''], ['layer1.1.conv1', '0']], + [ + "", + ], + [ + "layer1.0", + ], + [["layer1.1.conv1", ""], ["layer1.1.conv1", "0"]], ], - loss_types=['L1', 'KL', 'MSE'], - loss_weights=[0.5, 0.2, 0.3]) + loss_types=["L1", "KL", "MSE"], + loss_weights=[0.5, 0.2, 0.3], + ) - distillation_criterion_conf.config.IntermediateLayersKnowledgeDistillationLoss.layer_mappings[2][1][-1] = \ - lambda x: x[:, :2,...] + distillation_criterion_conf.config.IntermediateLayersKnowledgeDistillationLoss.layer_mappings[2][1][ + -1 + ] = lambda x: x[:, :2, ...] optimizer = torch.optim.SGD(self.student_model.parameters(), lr=0.0001) conf = DistillationConfig(self.teacher_model, distillation_criterion_conf) compression_manager = prepare_compression(copy.deepcopy(self.student_model), conf) @@ -95,7 +106,7 @@ def test_distillation_intermediate_layers(self): cnt = 0 for image, target in self.dummy_dataloader: compression_manager.callbacks.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -107,37 +118,41 @@ def test_distillation_intermediate_layers(self): break compression_manager.callbacks.on_epoch_end() - model.save('./saved') - stat = torch.load('./saved/best_model.pt') + model.save("./saved") + stat = torch.load("./saved/best_model.pt") opt_model = self.student_model.load_state_dict(stat) def test_distillation_tf(self): - tf_datasets = Datasets('tensorflow') - dummy_dataset = tf_datasets['dummy'](shape=(100, 224, 224, 3), low=0., high=1., label=True) - default_workspace = './nc_workspace/{}/'.format( - datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) - train_dataloader = TensorflowDataLoader(dataset=dummy_dataset , batch_size=100) + tf_datasets = Datasets("tensorflow") + dummy_dataset = tf_datasets["dummy"](shape=(100, 224, 224, 3), low=0.0, high=1.0, label=True) + default_workspace = "./nc_workspace/{}/".format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + train_dataloader = TensorflowDataLoader(dataset=dummy_dataset, batch_size=100) framework_specific_info = { - 'device': 'cpu', 'random_seed': 9527, - 'workspace_path': default_workspace, - 'format': 'default', 'backend': 'default' + "device": "cpu", + "random_seed": 9527, + "workspace_path": default_workspace, + "format": "default", + "backend": "default", } - adaptor = FRAMEWORKS['tensorflow'](framework_specific_info) + adaptor = FRAMEWORKS["tensorflow"](framework_specific_info) train_cfg = { - 'start_epoch': 0, - 'end_epoch': 2, - 'iteration': 10, - 'frequency': 1, - 'dataloader': train_dataloader, - 'criterion': {'KnowledgeDistillationLoss': {'temperature': 1.0, - 'loss_types': ['CE', 'CE'], - 'loss_weights': [0.5, 0.5]}}, - 'optimizer': {'SGD': {'learning_rate': 0.001, 'momentum': 0.1, - 'weight_decay': 0.001, 'nesterov': True}}, + "start_epoch": 0, + "end_epoch": 2, + "iteration": 10, + "frequency": 1, + "dataloader": train_dataloader, + "criterion": { + "KnowledgeDistillationLoss": { + "temperature": 1.0, + "loss_types": ["CE", "CE"], + "loss_weights": [0.5, 0.5], + } + }, + "optimizer": {"SGD": {"learning_rate": 0.001, "momentum": 0.1, "weight_decay": 0.001, "nesterov": True}}, } train_cfg = DotDict(train_cfg) - model = tf.keras.applications.MobileNet(weights='imagenet') - teacher_model = tf.keras.applications.DenseNet201(weights='imagenet') + model = tf.keras.applications.MobileNet(weights="imagenet") + teacher_model = tf.keras.applications.DenseNet201(weights="imagenet") distil_loss = KnowledgeDistillationLossConfig() conf = DistillationConfig(teacher_model=teacher_model, criterion=distil_loss) compression_manager = prepare_compression(model, conf) @@ -145,14 +160,16 @@ def test_distillation_tf(self): model = compression_manager.model train_func = create_obj_from_config.create_train_func( - 'tensorflow', \ - train_dataloader, \ - adaptor, \ - train_cfg, \ - hooks=compression_manager.callbacks.callbacks_list[0].hooks) + "tensorflow", + train_dataloader, + adaptor, + train_cfg, + hooks=compression_manager.callbacks.callbacks_list[0].hooks, + ) train_func(model) compression_manager.callbacks.on_train_end() + if __name__ == "__main__": unittest.main() diff --git a/test/distillation/test_self_distillation_2.x.py b/test/distillation/test_self_distillation_2.x.py index 20a695ac211..d36a986734e 100644 --- a/test/distillation/test_self_distillation_2.x.py +++ b/test/distillation/test_self_distillation_2.x.py @@ -5,9 +5,9 @@ import torch import torch.nn as nn import torchvision + from neural_compressor.data import Datasets -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import \ - PyTorchDataLoader +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader def build_fake_yaml(): @@ -65,7 +65,6 @@ def build_fake_yaml(): class TestSelfDistillation(unittest.TestCase): - model = torchvision.models.resnet50() @classmethod @@ -80,27 +79,28 @@ def tearDownClass(cls): def test_self_distillation(self): import copy + + from neural_compressor.config import DistillationConfig, SelfKnowledgeDistillationLossConfig from neural_compressor.training import prepare_compression - from neural_compressor.config import DistillationConfig, \ - SelfKnowledgeDistillationLossConfig datasets = Datasets("pytorch") - dummy_dataset = datasets["dummy"]( - shape=(100, 3, 224, 224), low=0.0, high=1.0, label=True - ) + dummy_dataset = datasets["dummy"](shape=(100, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) distil_loss = SelfKnowledgeDistillationLossConfig( layer_mappings=[ - [['resblock.1.feature.output', 'resblock.deepst.feature.output'], - ['resblock.2.feature.output','resblock.deepst.feature.output']], - [['resblock.2.fc','resblock.deepst.fc'], - ['resblock.3.fc','resblock.deepst.fc']], - [['resblock.1.fc','resblock.deepst.fc'], - ['resblock.2.fc','resblock.deepst.fc'], - ['resblock.3.fc','resblock.deepst.fc']] + [ + ["resblock.1.feature.output", "resblock.deepst.feature.output"], + ["resblock.2.feature.output", "resblock.deepst.feature.output"], + ], + [["resblock.2.fc", "resblock.deepst.fc"], ["resblock.3.fc", "resblock.deepst.fc"]], + [ + ["resblock.1.fc", "resblock.deepst.fc"], + ["resblock.2.fc", "resblock.deepst.fc"], + ["resblock.3.fc", "resblock.deepst.fc"], + ], ], temperature=3.0, - loss_types=['L2', 'KL', 'CE'], + loss_types=["L2", "KL", "CE"], loss_weights=[0.5, 0.05, 0.02], add_origin_loss=True, ) @@ -124,15 +124,9 @@ def training_func_for_nc(model): output = model(image) loss = criterion(output, target) outputs_features = dict() - outputs_features["resblock.deepst.feature.output"] = torch.randn( - 128, 1024 - ) - outputs_features["resblock.2.feature.output"] = torch.randn( - 128, 1024 - ) - outputs_features["resblock.1.feature.output"] = torch.randn( - 128, 1024 - ) + outputs_features["resblock.deepst.feature.output"] = torch.randn(128, 1024) + outputs_features["resblock.2.feature.output"] = torch.randn(128, 1024) + outputs_features["resblock.1.feature.output"] = torch.randn(128, 1024) outputs_features["resblock.deepst.fc"] = torch.randn(128, 100) outputs_features["resblock.3.fc"] = torch.randn(128, 100) outputs_features["resblock.2.fc"] = torch.randn(128, 100) diff --git a/test/distributed/test_distributed_metrics.py b/test/distributed/test_distributed_metrics.py index 4aa6c09c92c..b42b194985a 100644 --- a/test/distributed/test_distributed_metrics.py +++ b/test/distributed/test_distributed_metrics.py @@ -1,16 +1,19 @@ """Tests for the distributed metrics.""" import os -import sys -import cpuinfo -import signal +import re import shutil +import signal import subprocess +import sys import unittest -import re + +import cpuinfo import tensorflow as tf -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2, version1_gte_version2 + +from neural_compressor.adaptor.tf_utils.util import version1_gte_version2, version1_lt_version2 from neural_compressor.utils import logger + def build_fake_ut(): fake_ut = """ import numpy as np @@ -68,7 +71,7 @@ def tearDown(self): def test_mIOU(self): metrics = METRICS('tensorflow') miou = metrics['mIOU']() - miou.hvd = hvd + miou.hvd = hvd if hvd.rank() == 0: preds = np.array([0]) labels = np.array([0]) @@ -84,7 +87,7 @@ def test_mIOU(self): labels = np.array([0, 1]) else: preds = np.array([1, 1]) - labels = np.array([1, 1]) + labels = np.array([1, 1]) miou.update(preds, labels) self.assertAlmostEqual(miou.result(), 0.58333333) @@ -128,7 +131,7 @@ def test_tensorflow_F1(self): labels = [0, 1, 1, 1] else: preds = [1, 1, 1, 1, 1, 1] - labels = [1, 1, 1, 1, 1, 1] + labels = [1, 1, 1, 1, 1, 1] F1.update(preds, labels) self.assertEqual(F1.result(), 0.9) @@ -179,7 +182,7 @@ def test_tensorflow_topk(self): top1.hvd = hvd top2.hvd = hvd top3.hvd = hvd - + if hvd.rank() == 0: predicts = [[0, 0.2, 0.9, 0.3]] labels = [[0, 1, 0, 0]] @@ -200,7 +203,7 @@ def test_tensorflow_topk(self): self.assertEqual(top1.result(), 0.0) self.assertEqual(top2.result(), 0.5) self.assertEqual(top3.result(), 1) - + # test functionality of sparse label top1.reset() top2.reset() @@ -250,7 +253,7 @@ def test_tensorflow_mAP(self): ground_truth = [ np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([['a', 'b']]), np.array([[]]), np.array([b'000000397133.jpg']) ] @@ -265,7 +268,7 @@ def test_tensorflow_mAP(self): ground_truth = [ np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[b'dog', b'dog']]), + np.array([[b'dog', b'dog']]), np.array([[]]), np.array([b'000000397133.jpg']) ] @@ -273,7 +276,7 @@ def test_tensorflow_mAP(self): mAP.result() self.assertEqual(format(mAP.result(), '.5f'), '0.00000') - + detection = [ np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], [0.5589304 , 0. , 0.98301625, 0.520178 ], @@ -319,7 +322,7 @@ def test_tensorflow_mAP(self): [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), np.array([[]]), np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\\ - 56, 50, 56, 56, 79, 57, 81]]), + 56, 50, 56, 56, 79, 57, 81]]), np.array([b'000000397133.jpg']) ] ground_truth_2 = [ @@ -343,11 +346,11 @@ def test_tensorflow_mAP(self): ] mAP = metrics['mAP']() - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) self.assertEqual(format(mAP.result(), '.5f'), '0.18182') @@ -434,7 +437,7 @@ def test_tensorflow_VOCmAP(self): ground_truth = [ np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([['a', 'b']]), np.array([[]]), np.array([b'000000397133.jpg']) ] @@ -486,7 +489,7 @@ def test_tensorflow_VOCmAP(self): [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), np.array([[]]), np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\\ - 56, 50, 56, 56, 79, 57, 81]]), + 56, 50, 56, 56, 79, 57, 81]]), np.array([b'000000397133.jpg']) ] ground_truth_2 = [ @@ -508,11 +511,11 @@ def test_tensorflow_VOCmAP(self): np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), np.array([b'000000037777.jpg']) ] - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) self.assertEqual(format(mAP.result(), '.5f'), '0.18182') @@ -597,7 +600,7 @@ def test_tensorflow_COCOmAP(self): ground_truth = [ np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([['a', 'b']]), np.array([[]]), np.array([b'000000397133.jpg']) ] @@ -648,7 +651,7 @@ def test_tensorflow_COCOmAP(self): [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), np.array([[]]), np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\\ - 56, 50, 56, 56, 79, 57, 81]]), + 56, 50, 56, 56, 79, 57, 81]]), np.array([b'000000397133.jpg']) ] ground_truth_2 = [ @@ -670,11 +673,11 @@ def test_tensorflow_COCOmAP(self): np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), np.array([b'000000037777.jpg']) ] - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) self.assertEqual(format(mAP.result(), '.5f'), '0.14149') @@ -749,7 +752,7 @@ def test__accuracy(self): labels2 = [[1, 1]] predicts3 = [[[0, 1], [0, 1], [0, 1]]] labels3 = [[[1, 0], [1, 0], [1, 0]]] - predicts4 = [[0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] + predicts4 = [[0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] labels4 = [1, 0, 0] metrics = METRICS('tensorflow') @@ -859,7 +862,7 @@ def test_mae(self): mae.update(predicts2, labels2) mae_result = mae.result() self.assertEqual(mae_result, 0.5) - + self.assertRaises(AssertionError, mae.update, [1], [1, 2]) self.assertRaises(AssertionError, mae.update, 1, [1,2]) self.assertRaises(AssertionError, mae.update, [1, 2], [1]) @@ -913,7 +916,7 @@ def test_loss(self): labels2 = [0] predicts3 = [0, 1] labels3 = [0, 0] - + metrics = METRICS('tensorflow') loss = metrics['Loss']() loss.hvd = hvd @@ -945,9 +948,10 @@ def test_loss(self): unittest.main() """ - with open('fake_ut.py', 'w', encoding="utf-8") as f: + with open("fake_ut.py", "w", encoding="utf-8") as f: f.write(fake_ut) + class TestDistributed(unittest.TestCase): @classmethod def setUpClass(cls): @@ -955,9 +959,9 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake_ut.py') - shutil.rmtree('./saved', ignore_errors = True) - shutil.rmtree('runs', ignore_errors = True) + os.remove("fake_ut.py") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def setUp(self): logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -966,19 +970,21 @@ def setUp(self): def tearDown(self): logger.info(f"{self._testMethodName} done.\n") - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0') or - version1_gte_version2(tf.version.VERSION, '2.12.0'), - "Only test equal or above TF 2.10.0 and less than 2.12.0") + @unittest.skipIf( + version1_lt_version2(tf.version.VERSION, "2.10.0") or version1_gte_version2(tf.version.VERSION, "2.12.0"), + "Only test equal or above TF 2.10.0 and less than 2.12.0", + ) def test_distributed(self): - distributed_cmd = 'horovodrun -np 2 python fake_ut.py' - p = subprocess.Popen(distributed_cmd, preexec_fn = os.setsid, stdout = subprocess.PIPE, - stderr = subprocess.PIPE, shell=True) # nosec + distributed_cmd = "horovodrun -np 2 python fake_ut.py" + p = subprocess.Popen( + distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) # nosec try: out, error = p.communicate() - matches = re.findall(r'FAILED', error.decode('utf-8')) + matches = re.findall(r"FAILED", error.decode("utf-8")) self.assertEqual(matches, []) - matches = re.findall(r'OK', error.decode('utf-8')) + matches = re.findall(r"OK", error.decode("utf-8")) self.assertTrue(len(matches) > 0) except KeyboardInterrupt: diff --git a/test/distributed/test_distributed_pt_train.py b/test/distributed/test_distributed_pt_train.py index e10db82eeca..6617ce27723 100644 --- a/test/distributed/test_distributed_pt_train.py +++ b/test/distributed/test_distributed_pt_train.py @@ -1,17 +1,18 @@ import os import shutil -import unittest import signal import subprocess +import unittest +import horovod.torch as hvd import torch -import torchvision import torch.nn as nn -import horovod.torch as hvd +import torchvision from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader + def build_fake_py(): fake_py = """ import os @@ -43,9 +44,10 @@ def test_pruning_internal(self): if __name__ == "__main__": unittest.main() """ - with open('fake.py', 'w', encoding="utf-8") as f: + with open("fake.py", "w", encoding="utf-8") as f: f.write(fake_py) + def build_fake_yaml(): fake_yaml = """ model: @@ -103,12 +105,13 @@ def build_fake_yaml(): shape: [128, 3, 224, 224] label: True """ - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) class TestDistributed(unittest.TestCase): model = torchvision.models.resnet18() + @classmethod def setUpClass(cls): build_fake_yaml() @@ -116,34 +119,37 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - os.remove('fake.py') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - + os.remove("fake.yaml") + os.remove("fake.py") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_distributed(self): - distributed_cmd = 'horovodrun -np 2 python fake.py' - p = subprocess.Popen(distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) # nosec + distributed_cmd = "horovodrun -np 2 python fake.py" + p = subprocess.Popen( + distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) # nosec try: out, error = p.communicate() import re - matches = re.findall(r'.*rank ([01]) in size 2.*', out.decode('utf-8')) - assert '0' in matches - assert '1' in matches + + matches = re.findall(r".*rank ([01]) in size 2.*", out.decode("utf-8")) + assert "0" in matches + assert "1" in matches except KeyboardInterrupt: os.killpg(os.getpgid(p.pid), signal.SIGKILL) assert 0 def test_single_node(self): from neural_compressor.experimental import Pruning, common - prune = Pruning('fake.yaml') + + prune = Pruning("fake.yaml") prune.model = self.model _ = prune() # assert hvd hook is registered. pruner has 2 on_train_begin hooks: hvd and prune - assert len(prune.hooks_dict['on_train_begin'])==2 + assert len(prune.hooks_dict["on_train_begin"]) == 2 + if __name__ == "__main__": unittest.main() diff --git a/test/distributed/test_distributed_tf_dataloader.py b/test/distributed/test_distributed_tf_dataloader.py index c0a6599e530..52ecabf8a0c 100644 --- a/test/distributed/test_distributed_tf_dataloader.py +++ b/test/distributed/test_distributed_tf_dataloader.py @@ -1,21 +1,23 @@ """Tests for Distributed TensorFlow Dataloader.""" -import numpy as np import collections import json import os +import shutil import sys -import cpuinfo import unittest -import shutil + +import cpuinfo +import numpy as np import tensorflow as tf + from neural_compressor import data -from neural_compressor.utils.create_obj_from_config import create_dataset, create_dataloader -from neural_compressor.data import DataLoader -from neural_compressor.data import Datasets, DATALOADERS, TRANSFORMS -from neural_compressor.utils import logger from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 +from neural_compressor.data import DATALOADERS, TRANSFORMS, DataLoader, Datasets +from neural_compressor.utils import logger +from neural_compressor.utils.create_obj_from_config import create_dataloader, create_dataset + -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") class TestDistributedTFDataDataloader(unittest.TestCase): def setUp(self): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" @@ -30,14 +32,16 @@ def tearDown(self): logger.info(f"{self._testMethodName} done.\n") def check_tf_dataset_with_batch_raise(self, batch_size, last_batch, distributed): - dataset_with_batch = tf.data.Dataset.from_tensors\ - ((tf.ones([3, 224, 224]), tf.ones([1000]))).repeat(600).batch(2) - dataloader = DATALOADERS['tensorflow']\ - (dataset_with_batch, batch_size=batch_size, last_batch=last_batch, distributed=distributed) + dataset_with_batch = ( + tf.data.Dataset.from_tensors((tf.ones([3, 224, 224]), tf.ones([1000]))).repeat(600).batch(2) + ) + dataloader = DATALOADERS["tensorflow"]( + dataset_with_batch, batch_size=batch_size, last_batch=last_batch, distributed=distributed + ) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -46,12 +50,13 @@ def check_tf_dataset_with_batch_raise(self, batch_size, last_batch, distributed) self.count += 1 def check_distributed_raise(self, batch_size, last_batch, distributed): - dataloader = DATALOADERS['tensorflow']\ - (self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed) + dataloader = DATALOADERS["tensorflow"]( + self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed + ) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -60,36 +65,36 @@ def check_distributed_raise(self, batch_size, last_batch, distributed): self.count += 1 def test_dis_tf_data_dataloader_1(self): - self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 32, 'rollover', True) - + self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 32, "rollover", True) + def test_dis_tf_data_dataloader_2(self): - self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 32, 'no_rollover', True) + self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 32, "no_rollover", True) def test_dis_tf_data_dataloader_3(self): - self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 1, 'rollover', True) - + self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 1, "rollover", True) + def test_dis_tf_data_dataloader_4(self): - self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 1, 'no_rollover', True) + self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 1, "no_rollover", True) def test_dis_tf_data_dataloader_5(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'rollover', True) - + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "rollover", True) + def test_dis_tf_data_dataloader_6(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "no_rollover", True) def test_dis_tf_data_dataloader_7(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'rollover', True) - + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "rollover", True) + def test_dis_tf_data_dataloader_8(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "no_rollover", True) def test_dis_tf_data_dataloader_9(self): batch_size = 32 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -99,11 +104,11 @@ def test_dis_tf_data_dataloader_9(self): def test_dis_tf_data_dataloader_10(self): batch_size = 32 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='no_rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="no_rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -113,11 +118,11 @@ def test_dis_tf_data_dataloader_10(self): def test_dis_tf_data_dataloader_11(self): batch_size = 1 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -127,11 +132,11 @@ def test_dis_tf_data_dataloader_11(self): def test_dis_tf_data_dataloader_12(self): batch_size = 1 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='no_rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="no_rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -141,11 +146,11 @@ def test_dis_tf_data_dataloader_12(self): def test_dis_tf_data_dataloader_13(self): batch_size = 600 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -155,11 +160,11 @@ def test_dis_tf_data_dataloader_13(self): def test_dis_tf_data_dataloader_14(self): batch_size = 600 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='no_rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="no_rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -167,13 +172,14 @@ def test_dis_tf_data_dataloader_14(self): self.assertIsInstance(x, np.ndarray) self.count += 1 -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") class TestDefaultDataLoaderSequentialSampler(unittest.TestCase): @classmethod def tearDownClass(cls): - if os.path.exists('minist'): - shutil.rmtree('minist') - + if os.path.exists("minist"): + shutil.rmtree("minist") + def setUp(self): self.count = 0 logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -184,30 +190,30 @@ def tearDown(self): def check_get_len_raise(self, batch_size, last_batch, distributed): dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': last_batch, - 'distributed': distributed + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": last_batch, + "distributed": distributed, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) len_dataloader = len(dataloader) def check_distributed_raise(self, batch_size, last_batch, distributed): dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': last_batch, - 'distributed': distributed + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": last_batch, + "distributed": distributed, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -216,43 +222,43 @@ def check_distributed_raise(self, batch_size, last_batch, distributed): self.count += 1 def test_sequential_sampler1(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, "rollover", True) def test_sequential_sampler2(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, "no_rollover", True) def test_sequential_sampler3(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, "rollover", True) def test_sequential_sampler4(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, "no_rollover", True) def test_sequential_sampler5(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "rollover", True) def test_sequential_sampler6(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "no_rollover", True) def test_sequential_sampler7(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "rollover", True) def test_sequential_sampler8(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "no_rollover", True) def test_sequential_sampler9(self): batch_size = 3332 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -263,17 +269,17 @@ def test_sequential_sampler9(self): def test_sequential_sampler10(self): batch_size = 3332 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'no_rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "no_rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -284,17 +290,17 @@ def test_sequential_sampler10(self): def test_sequential_sampler11(self): batch_size = 1 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -305,17 +311,17 @@ def test_sequential_sampler11(self): def test_sequential_sampler12(self): batch_size = 1 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'no_rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "no_rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -326,17 +332,17 @@ def test_sequential_sampler12(self): def test_sequential_sampler13(self): batch_size = 10000 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -347,17 +353,17 @@ def test_sequential_sampler13(self): def test_sequential_sampler14(self): batch_size = 10000 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'no_rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "no_rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -365,17 +371,18 @@ def test_sequential_sampler14(self): self.assertIsInstance(x, np.ndarray) self.count += 1 -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") class TestDefaultDataLoaderIterableSampler(unittest.TestCase): class iter_dataset(object): def __iter__(self): sample_size = 250 - for i in range(1, sample_size+1): + for i in range(1, sample_size + 1): yield np.array([i]) def setUp(self): self.rank = 0 - self.size = 1 + self.size = 1 self.count = 1 self.dataset = self.iter_dataset() logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -385,160 +392,210 @@ def tearDown(self): logger.info(f"{self._testMethodName} done.\n") def check_get_len_raise(self, batch_size, last_batch, distributed): - dataloader = DATALOADERS['tensorflow']\ - (self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed) + dataloader = DATALOADERS["tensorflow"]( + self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed + ) len_dataloader = len(dataloader) def check_distributed_raise(self, batch_size, last_batch, distributed): - dataloader = DATALOADERS['tensorflow']\ - (self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed) + dataloader = DATALOADERS["tensorflow"]( + self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed + ) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 - + def test_iterable_sampler1(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, "rollover", True) def test_iterable_sampler2(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, "no_rollover", True) def test_iterable_sampler3(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, "rollover", True) def test_iterable_sampler4(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, "no_rollover", True) def test_iterable_sampler5(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "rollover", True) def test_iterable_sampler6(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "no_rollover", True) def test_iterable_sampler7(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "rollover", True) def test_iterable_sampler8(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "no_rollover", True) def test_iterable_sampler9(self): batch_size = 128 - last_batch = 'rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler10(self): batch_size = 128 - last_batch = 'no_rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "no_rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler11(self): batch_size = 1 - last_batch = 'rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler12(self): batch_size = 1 - last_batch = 'no_rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "no_rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler13(self): batch_size = 1000 - last_batch = 'rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler14(self): batch_size = 1000 - last_batch = 'no_rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "no_rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") class TestTensorflowBertDataLoader(unittest.TestCase): - label = [{ - "paragraphs0":[ - {'context': - 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.', - 'qas': [{ - 'answers': [ - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}], - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', - 'id': '56be4db0acb8001400a502ec'}] - } - ] - }] + label = [ + { + "paragraphs0": [ + { + "context": "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.", + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ], + } + ] + } + ] unique_id = 1000000000 - input_ids = [101, 2029, 5088, 2136, 3421, 1996, 10511, 2012, 3565, 4605, 2753, 1029, 102, 3565, 4605, 2753,\ - 1007, 2005, 1996, 2325, 2161, 1012, 1996, 2137, 2374, 3034, 1006] + input_ids = [ + 101, + 2029, + 5088, + 2136, + 3421, + 1996, + 10511, + 2012, + 3565, + 4605, + 2753, + 1029, + 102, + 3565, + 4605, + 2753, + 1007, + 2005, + 1996, + 2325, + 2161, + 1012, + 1996, + 2137, + 2374, + 3034, + 1006, + ] input_mask = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] segment_ids = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - fake_json = json.dumps({'data': label, 'version': '1.1'}) - with open('dev.json', 'w') as f: - f.write(fake_json) - + fake_json = json.dumps({"data": label, "version": "1.1"}) + with open("dev.json", "w") as f: + f.write(fake_json) + @classmethod def tearDownClass(cls): - os.remove('test.record') - os.remove('dev.json') + os.remove("test.record") + os.remove("dev.json") def setUp(self): logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -548,79 +605,60 @@ def tearDown(self): logger.info(f"{self._testMethodName} done.\n") def check_not_implement(self, batch_size, distributed): - with tf.io.TFRecordWriter('./test.record') as writer: + with tf.io.TFRecordWriter("./test.record") as writer: features = collections.OrderedDict() - features["unique_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list([self.unique_id]))) - features["input_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_ids))) - features["input_mask"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_mask))) - features["segment_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.segment_ids))) + features["unique_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list([self.unique_id]))) + features["input_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_ids))) + features["input_mask"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_mask))) + features["segment_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.segment_ids))) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', - {'bert':{'root':'test.record', 'label_file': './dev.json'}}, - None, - None) - dataloader = DATALOADERS['tensorflow']\ - (dataset=eval_dataset, batch_size=batch_size, distributed=distributed) + "tensorflow", {"bert": {"root": "test.record", "label_file": "./dev.json"}}, None, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=batch_size, distributed=distributed) def test_tf_bert_dataloader_1(self): self.assertRaises(NotImplementedError, self.check_not_implement, 32, True) - + def test_tf_bert_dataloader_2(self): batch_size = 128 - with tf.io.TFRecordWriter('./test.record') as writer: + with tf.io.TFRecordWriter("./test.record") as writer: features = collections.OrderedDict() - features["unique_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list([self.unique_id]))) - features["input_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_ids))) - features["input_mask"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_mask))) - features["segment_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.segment_ids))) + features["unique_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list([self.unique_id]))) + features["input_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_ids))) + features["input_mask"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_mask))) + features["segment_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.segment_ids))) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', - {'bert':{'root':'test.record', 'label_file': './dev.json'}}, - None, - None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=batch_size) + "tensorflow", {"bert": {"root": "test.record", "label_file": "./dev.json"}}, None, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=batch_size) for inputs, labels in dataloader: - self.assertEqual(inputs[0], 'test.record') + self.assertEqual(inputs[0], "test.record") self.assertEqual(inputs[1], batch_size) self.assertEqual(len(labels), 1) def test_tf_bert_dataloader_3(self): batch_size = 1 - with tf.io.TFRecordWriter('./test.record') as writer: + with tf.io.TFRecordWriter("./test.record") as writer: features = collections.OrderedDict() - features["unique_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list([self.unique_id]))) - features["input_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_ids))) - features["input_mask"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_mask))) - features["segment_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.segment_ids))) + features["unique_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list([self.unique_id]))) + features["input_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_ids))) + features["input_mask"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_mask))) + features["segment_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.segment_ids))) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', - {'bert':{'root':'test.record', 'label_file': './dev.json'}}, - None, - None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=batch_size) + "tensorflow", {"bert": {"root": "test.record", "label_file": "./dev.json"}}, None, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=batch_size) for inputs, labels in dataloader: - self.assertEqual(inputs[0], 'test.record') + self.assertEqual(inputs[0], "test.record") self.assertEqual(inputs[1], batch_size) self.assertEqual(len(labels), 1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/export/test_onnx_qlieanr_to_qdq.py b/test/export/test_onnx_qlieanr_to_qdq.py index cb443c0c0b8..92ac0c4f7d2 100644 --- a/test/export/test_onnx_qlieanr_to_qdq.py +++ b/test/export/test_onnx_qlieanr_to_qdq.py @@ -1,79 +1,89 @@ +import copy import os import shutil import unittest -import copy -import onnx + import numpy as np -from onnx import helper, TensorProto, numpy_helper, onnx_pb -from neural_compressor.adaptor.ox_utils.quantizer import Quantizer -from neural_compressor.adaptor.ox_utils.util import QuantizedInitializer, QuantizedValue, QuantizationMode +import onnx import onnxruntime as ort +from onnx import TensorProto, helper, numpy_helper, onnx_pb + +from neural_compressor.adaptor.ox_utils.quantizer import Quantizer +from neural_compressor.adaptor.ox_utils.util import QuantizationMode, QuantizedInitializer, QuantizedValue from neural_compressor.config import ONNXQlinear2QDQConfig from neural_compressor.experimental.common import Model OPSET = onnx.OperatorSetIdProto() OPSET.version = 17 + def build_model(): initializers = [] - input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 3, 15, 15]) - output = helper.make_tensor_value_info('reshape_output', TensorProto.FLOAT, [88, 11]) - - add_node = onnx.helper.make_node('Add', ['input', 'add_init'], ['add_out'], name='add') + input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 15, 15]) + output = helper.make_tensor_value_info("reshape_output", TensorProto.FLOAT, [88, 11]) + + add_node = onnx.helper.make_node("Add", ["input", "add_init"], ["add_out"], name="add") conv1_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name='conv1_weight') - conv1_node = helper.make_node('Conv', ['add_out', 'conv1_weight'], ['conv1_output'], name='conv1') + np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name="conv1_weight" + ) + conv1_node = helper.make_node("Conv", ["add_out", "conv1_weight"], ["conv1_output"], name="conv1") conv2_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name='conv2_weight') - conv2_node = helper.make_node('Conv', ['add_out', 'conv2_weight'], ['conv2_output'], name='conv2') + np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name="conv2_weight" + ) + conv2_node = helper.make_node("Conv", ["add_out", "conv2_weight"], ["conv2_output"], name="conv2") # 1, 8, 13, 13 - concat_node = helper.make_node('Concat', ['conv1_output', 'conv2_output'], [ - 'concat_output'], name='Concat', axis=1) + concat_node = helper.make_node("Concat", ["conv1_output", "conv2_output"], ["concat_output"], name="Concat", axis=1) # 1, 8, 11, 11 - avg_args = {'kernel_shape': [3, 3]} - avgpool_node = helper.make_node('AveragePool', ['concat_output'], ['avg_output'], name='AveragePool', **avg_args) - reshape_node = onnx.helper.make_node('Reshape', ['avg_output', 'shape'], ['reshape_output'], name='Reshape') + avg_args = {"kernel_shape": [3, 3]} + avgpool_node = helper.make_node("AveragePool", ["concat_output"], ["avg_output"], name="AveragePool", **avg_args) + reshape_node = onnx.helper.make_node("Reshape", ["avg_output", "shape"], ["reshape_output"], name="Reshape") initializers = [conv1_weight_initializer, conv2_weight_initializer] - initializers.append(onnx.numpy_helper.from_array(np.array([88, 11], dtype=np.int64), name='shape')) - initializers.append(onnx.numpy_helper.from_array(np.zeros((1, 3, 15, 15), dtype=np.float32), name='add_init')) - graph = helper.make_graph([conv1_node, conv2_node, concat_node, avgpool_node, reshape_node, add_node], - 'test', [input], [output], initializer=initializers) + initializers.append(onnx.numpy_helper.from_array(np.array([88, 11], dtype=np.int64), name="shape")) + initializers.append(onnx.numpy_helper.from_array(np.zeros((1, 3, 15, 15), dtype=np.float32), name="add_init")) + graph = helper.make_graph( + [conv1_node, conv2_node, concat_node, avgpool_node, reshape_node, add_node], + "test", + [input], + [output], + initializer=initializers, + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) return model -class TestAdaptorONNXRT(unittest.TestCase): +class TestAdaptorONNXRT(unittest.TestCase): qlinear_backend = QuantizationMode.QLinearOps - qdq_backend = 'qdqops' + qdq_backend = "qdqops" integer_backend = QuantizationMode.IntegerOps - static_q_config = {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'} - } - dynamic_q_config = {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'dynamic'}} + static_q_config = { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + }, + } + dynamic_q_config = { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "dynamic", + }, + } config = ONNXQlinear2QDQConfig() @classmethod def setUpClass(cls): - os.makedirs('./onnxrt_test') + os.makedirs("./onnxrt_test") @classmethod def tearDownClass(cls): @@ -81,26 +91,19 @@ def tearDownClass(cls): os.remove("test.onnx") def qlinear_test(self, model, q_config, quantize_params, quantizable_op_types, **kwargs): - quantizer = Quantizer(copy.deepcopy(model), - q_config, - self.qlinear_backend, - True, - quantize_params, - quantizable_op_types, - **kwargs) + quantizer = Quantizer( + copy.deepcopy(model), q_config, self.qlinear_backend, True, quantize_params, quantizable_op_types, **kwargs + ) model = quantizer.quantize_model() return Model(model) def dynamic_test(self, model, q_config, quantize_params, quantizable_op_types): - quantizer = Quantizer(copy.deepcopy(model), - q_config, - self.integer_backend, - False, - quantize_params, - quantizable_op_types) + quantizer = Quantizer( + copy.deepcopy(model), q_config, self.integer_backend, False, quantize_params, quantizable_op_types + ) quantizer.quantize_model() return Model(model) - + def test_argmax(self): input_name = "input" output_name = "output" @@ -155,15 +158,15 @@ def test_argmax(self): ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 # use stable onnx ir version - q_config = {'conv_node': self.static_q_config, - 'argmax_node': self.static_q_config} - quantize_params = {'input': [np.uint8(0), np.float32(10.)], - 'conv_weight': [np.uint8(0), np.float32(10.)], - 'conv_output': [np.uint8(0), np.float32(10.)], - 'output': [np.uint8(0), np.float32(10.)], - } - q_model = self.qlinear_test(model, q_config, quantize_params, ['Conv', 'ArgMax']) - q_model.export('./test.onnx', self.config) + q_config = {"conv_node": self.static_q_config, "argmax_node": self.static_q_config} + quantize_params = { + "input": [np.uint8(0), np.float32(10.0)], + "conv_weight": [np.uint8(0), np.float32(10.0)], + "conv_output": [np.uint8(0), np.float32(10.0)], + "output": [np.uint8(0), np.float32(10.0)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ["Conv", "ArgMax"]) + q_model.export("./test.onnx", self.config) def test_gemm(self): input_name = "input" @@ -181,14 +184,8 @@ def test_gemm(self): bias_data = np.random.normal(0, 0.1, bias_shape).astype(np.float32) initializers.append(onnx.numpy_helper.from_array(bias_data, name=bias_name)) - gemm1_node = onnx.helper.make_node( - "Gemm", - [input_name, weight_name, bias_name], - [output_name], - alpha=1.0, - beta=1.0, - transB=1, - name=node_name + gemm1_node = onnx.helper.make_node( + "Gemm", [input_name, weight_name, bias_name], [output_name], alpha=1.0, beta=1.0, transB=1, name=node_name ) gemm1_output_name = "gemm1_output" @@ -204,24 +201,19 @@ def test_gemm(self): ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 # use stable onnx ir version - q_config = {'gemm': self.static_q_config} - quantize_params = {'input': [np.uint8(0), np.float32(10.)], - 'linear1.weight': [np.uint8(0), np.float32(10.)], - 'linear1.bias': [np.uint8(0), np.float32(10.)], - 'output': [np.uint8(0), np.float32(10.)], - } - q_model = self.qlinear_test(model, q_config, quantize_params, ['Gemm']) - q_model.export('./test.onnx', self.config) + q_config = {"gemm": self.static_q_config} + quantize_params = { + "input": [np.uint8(0), np.float32(10.0)], + "linear1.weight": [np.uint8(0), np.float32(10.0)], + "linear1.bias": [np.uint8(0), np.float32(10.0)], + "output": [np.uint8(0), np.float32(10.0)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ["Gemm"]) + q_model.export("./test.onnx", self.config) bias_tensor = helper.make_tensor_value_info(bias_name, TensorProto.FLOAT, [100]) - gemm2_node = onnx.helper.make_node( - "Gemm", - [input_name, weight_name, bias_name], - [output_name], - alpha=1.0, - beta=1.0, - transB=1, - name=node_name + gemm2_node = onnx.helper.make_node( + "Gemm", [input_name, weight_name, bias_name], [output_name], alpha=1.0, beta=1.0, transB=1, name=node_name ) initializers = [] initializers.append(onnx.numpy_helper.from_array(weight_data, name=weight_name)) @@ -235,416 +227,469 @@ def test_gemm(self): ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) model.ir_version = 7 - q_model = self.qlinear_test(model, q_config, quantize_params, ['Gemm']) - q_model.export('./test.onnx', self.config) + q_model = self.qlinear_test(model, q_config, quantize_params, ["Gemm"]) + q_model.export("./test.onnx", self.config) def test_embed(self): input_ids_shape = [1, 4] - input_ids_tensor = helper.make_tensor_value_info('input_ids', TensorProto.INT32, input_ids_shape) + input_ids_tensor = helper.make_tensor_value_info("input_ids", TensorProto.INT32, input_ids_shape) segment_ids_shape = [1, 4] - segment_ids_tensor = helper.make_tensor_value_info('segment_ids', TensorProto.INT32, segment_ids_shape) + segment_ids_tensor = helper.make_tensor_value_info("segment_ids", TensorProto.INT32, segment_ids_shape) mask_shape = [1, 4] - mask_tensor = helper.make_tensor_value_info('mask', TensorProto.INT32, input_ids_shape) + mask_tensor = helper.make_tensor_value_info("mask", TensorProto.INT32, input_ids_shape) # EmbedLayerNormalization Node Constants and Weights: word_embed_shape = [32, 4] - word_embed_weights = np.random.random_sample(word_embed_shape).astype(dtype='float32') - word_embed_initializer = onnx.numpy_helper.from_array(word_embed_weights, name='word_embed') + word_embed_weights = np.random.random_sample(word_embed_shape).astype(dtype="float32") + word_embed_initializer = onnx.numpy_helper.from_array(word_embed_weights, name="word_embed") pos_embed_shape = [16, 4] - pos_embed_weights = np.random.random_sample(pos_embed_shape).astype(dtype='float32') - pos_embed_initializer = onnx.numpy_helper.from_array(pos_embed_weights, name='pos_embed') + pos_embed_weights = np.random.random_sample(pos_embed_shape).astype(dtype="float32") + pos_embed_initializer = onnx.numpy_helper.from_array(pos_embed_weights, name="pos_embed") seg_embed_shape = [2, 4] - seg_embed_weights = np.random.random_sample(seg_embed_shape).astype(dtype='float32') - seg_embed_initializer = onnx.numpy_helper.from_array(seg_embed_weights, name='seg_embed') + seg_embed_weights = np.random.random_sample(seg_embed_shape).astype(dtype="float32") + seg_embed_initializer = onnx.numpy_helper.from_array(seg_embed_weights, name="seg_embed") gamma_shape = [4] - gamma = np.random.random_sample(gamma_shape).astype(dtype='float32') - gamma_initializer = onnx.numpy_helper.from_array(gamma, name='gamma') + gamma = np.random.random_sample(gamma_shape).astype(dtype="float32") + gamma_initializer = onnx.numpy_helper.from_array(gamma, name="gamma") beta_shape = [4] - beta = np.random.random_sample(beta_shape).astype(dtype='float32') - beta_initializer = onnx.numpy_helper.from_array(beta, name='beta') + beta = np.random.random_sample(beta_shape).astype(dtype="float32") + beta_initializer = onnx.numpy_helper.from_array(beta, name="beta") # EmbedLayerNormalization Outputs: layernorm_out_shape = [1, 4, 4] - layernorm_out_tensor = helper.make_tensor_value_info('layernorm_out', TensorProto.FLOAT, layernorm_out_shape) + layernorm_out_tensor = helper.make_tensor_value_info("layernorm_out", TensorProto.FLOAT, layernorm_out_shape) mask_index_out_shape = [1] - mask_index_out_tensor = helper.make_tensor_value_info('mask_index_out', TensorProto.INT32, mask_index_out_shape) + mask_index_out_tensor = helper.make_tensor_value_info("mask_index_out", TensorProto.INT32, mask_index_out_shape) # EmbedLayerNormalization Node: embed_layer_norm_inputs = [ - 'input_ids', 'segment_ids', 'word_embed', 'pos_embed', 'seg_embed', 'gamma', 'beta', 'mask' + "input_ids", + "segment_ids", + "word_embed", + "pos_embed", + "seg_embed", + "gamma", + "beta", + "mask", ] - embed_layer_norm_outputs = ['layernorm_out', 'mask_index_out'] - embed_layer_norm_node = helper.make_node('EmbedLayerNormalization', - embed_layer_norm_inputs, - embed_layer_norm_outputs, - domain='com.microsoft', - name='Embed') + embed_layer_norm_outputs = ["layernorm_out", "mask_index_out"] + embed_layer_norm_node = helper.make_node( + "EmbedLayerNormalization", + embed_layer_norm_inputs, + embed_layer_norm_outputs, + domain="com.microsoft", + name="Embed", + ) # Construct the Graph and Model: nodes = [embed_layer_norm_node] - graph_name = 'embed_layernorm_graph' + graph_name = "embed_layernorm_graph" inputs = [input_ids_tensor, segment_ids_tensor, mask_tensor] outputs = [layernorm_out_tensor, mask_index_out_tensor] initializers = [ - word_embed_initializer, pos_embed_initializer, seg_embed_initializer, gamma_initializer, beta_initializer + word_embed_initializer, + pos_embed_initializer, + seg_embed_initializer, + gamma_initializer, + beta_initializer, ] graph = helper.make_graph(nodes, graph_name, inputs, outputs, initializer=initializers) - model = helper.make_model(graph, - opset_imports=[helper.make_opsetid("com.microsoft", 1), helper.make_opsetid("ai.onnx", 12)]) - model.ir_version = 7 # use stable onnx ir version - - q_config = {'Embed': self.static_q_config} - quantize_params = {'word_embed': [np.uint8(10.), np.float32(0)], - 'pos_embed': [np.uint8(10.), np.float32(0)], - 'seg_embed': [np.uint8(10.), np.float32(0)], - 'gamma': [np.uint8(10.), np.float32(0)], - 'beta': [np.uint8(10.), np.float32(0)], - 'layernorm_out': [np.uint8(10.), np.float32(0)], - 'mask_index_out': [np.uint8(10.), np.float32(0)], - 'input_ids': [np.uint8(10.), np.float32(0)], - } - q_model = self.qlinear_test(model, q_config, quantize_params, ['EmbedLayerNormalization']) - q_model.export('./test.onnx', self.config) + model = helper.make_model( + graph, opset_imports=[helper.make_opsetid("com.microsoft", 1), helper.make_opsetid("ai.onnx", 12)] + ) + model.ir_version = 7 # use stable onnx ir version + + q_config = {"Embed": self.static_q_config} + quantize_params = { + "word_embed": [np.uint8(10.0), np.float32(0)], + "pos_embed": [np.uint8(10.0), np.float32(0)], + "seg_embed": [np.uint8(10.0), np.float32(0)], + "gamma": [np.uint8(10.0), np.float32(0)], + "beta": [np.uint8(10.0), np.float32(0)], + "layernorm_out": [np.uint8(10.0), np.float32(0)], + "mask_index_out": [np.uint8(10.0), np.float32(0)], + "input_ids": [np.uint8(10.0), np.float32(0)], + } + q_model = self.qlinear_test(model, q_config, quantize_params, ["EmbedLayerNormalization"]) + q_model.export("./test.onnx", self.config) def test_concat_reshape_pooling(self): model = build_model() - q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ - 'Concat':self.static_q_config, 'AveragePool':self.static_q_config, 'add':self.static_q_config} - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'conv1_weight': [np.uint8(10.), np.float32(0)], - 'conv1_output': [np.uint8(10.), np.float32(0)], - 'conv2_weight': [np.uint8(10.), np.float32(0)], - 'conv2_output': [np.uint8(10.), np.float32(0)], - 'concat_output': [np.uint8(10.), np.float32(0)], - 'avg_output': [np.uint8(10.), np.float32(0)], - 'add_out': [np.uint8(10.), np.float32(0)], - 'add_init': [np.uint8(10.), np.float32(0)], - 'shape': [np.uint8(10.), np.float32(0)], - 'reshape_output': [np.uint8(10.), np.float32(0)]} - quantizable_op_types = ['Reshape', 'Conv', 'Concat', 'AveragePool', 'Add'] - q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types, **{'dedicated_qdq_pair': True}) - q_model.export('./test.onnx', self.config) - - q_config = {'Reshape':self.static_q_config, 'conv1':'fp32', 'conv2':self.static_q_config, \ - 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + q_config = { + "Reshape": self.static_q_config, + "conv1": self.static_q_config, + "conv2": self.static_q_config, + "Concat": self.static_q_config, + "AveragePool": self.static_q_config, + "add": self.static_q_config, + } + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "conv1_weight": [np.uint8(10.0), np.float32(0)], + "conv1_output": [np.uint8(10.0), np.float32(0)], + "conv2_weight": [np.uint8(10.0), np.float32(0)], + "conv2_output": [np.uint8(10.0), np.float32(0)], + "concat_output": [np.uint8(10.0), np.float32(0)], + "avg_output": [np.uint8(10.0), np.float32(0)], + "add_out": [np.uint8(10.0), np.float32(0)], + "add_init": [np.uint8(10.0), np.float32(0)], + "shape": [np.uint8(10.0), np.float32(0)], + "reshape_output": [np.uint8(10.0), np.float32(0)], + } + quantizable_op_types = ["Reshape", "Conv", "Concat", "AveragePool", "Add"] + q_model = self.qlinear_test( + model, q_config, quantize_params, quantizable_op_types, **{"dedicated_qdq_pair": True} + ) + q_model.export("./test.onnx", self.config) + + q_config = { + "Reshape": self.static_q_config, + "conv1": "fp32", + "conv2": self.static_q_config, + "Concat": self.static_q_config, + "AveragePool": self.static_q_config, + } q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - - q_config = {'Reshape':self.static_q_config, 'conv1':'fp32', 'conv2':'fp32', \ - 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + q_model.export("./test.onnx", self.config) + + q_config = { + "Reshape": self.static_q_config, + "conv1": "fp32", + "conv2": "fp32", + "Concat": self.static_q_config, + "AveragePool": self.static_q_config, + } q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - - q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ - 'Concat':self.static_q_config, 'AveragePool':'fp32'} + q_model.export("./test.onnx", self.config) + + q_config = { + "Reshape": self.static_q_config, + "conv1": self.static_q_config, + "conv2": self.static_q_config, + "Concat": self.static_q_config, + "AveragePool": "fp32", + } q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - - quantize_params = {'input': [np.uint8(10.), np.float32(0)], - 'conv1_weight': [np.uint8(10.), np.float32(0)], - 'conv1_output': [np.uint8(10.), np.float32(0)], - 'conv2_weight': [np.uint8(10.), np.float32(0)], - 'conv2_output': [np.uint8(10.), np.float32(0)], - 'concat_output': [np.uint8(10.), np.float32(0)], - 'avg_output': [np.uint8(10.), np.float32(0)], - 'shape': [np.uint8(10.), np.float32(0)], - 'add_out': [np.uint8(10.), np.float32(0)], - 'add_init': [np.uint8(10.), np.float32(0)], - 'reshape_output': [np.uint8(10.), np.float32(0)]} - q_config = {'Reshape':self.static_q_config, 'conv1':self.static_q_config, 'conv2':self.static_q_config, \ - 'Concat':self.static_q_config, 'AveragePool':self.static_q_config} + q_model.export("./test.onnx", self.config) + + quantize_params = { + "input": [np.uint8(10.0), np.float32(0)], + "conv1_weight": [np.uint8(10.0), np.float32(0)], + "conv1_output": [np.uint8(10.0), np.float32(0)], + "conv2_weight": [np.uint8(10.0), np.float32(0)], + "conv2_output": [np.uint8(10.0), np.float32(0)], + "concat_output": [np.uint8(10.0), np.float32(0)], + "avg_output": [np.uint8(10.0), np.float32(0)], + "shape": [np.uint8(10.0), np.float32(0)], + "add_out": [np.uint8(10.0), np.float32(0)], + "add_init": [np.uint8(10.0), np.float32(0)], + "reshape_output": [np.uint8(10.0), np.float32(0)], + } + q_config = { + "Reshape": self.static_q_config, + "conv1": self.static_q_config, + "conv2": self.static_q_config, + "Concat": self.static_q_config, + "AveragePool": self.static_q_config, + } q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - + q_model.export("./test.onnx", self.config) + def test_conv(self): - for op in ['Conv']: - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 3, 3, 1]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 5, 5, 1]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 1]) - conv_node = onnx.helper.make_node(op, ['A', 'B', 'C'], ['D'], - name=op, - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - graph = helper.make_graph([conv_node], 'test_graph_1', [A, B, C], [D]) + for op in ["Conv"]: + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5, 1]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 3, 3, 1]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 5, 5, 1]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 1]) + conv_node = onnx.helper.make_node( + op, ["A", "B", "C"], ["D"], name=op, kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + graph = helper.make_graph([conv_node], "test_graph_1", [A, B, C], [D]) model = helper.make_model(graph, opset_imports=[OPSET]) - q_config = {op: self.static_q_config}, - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + q_config = ({op: self.static_q_config},) + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = [op] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) def test_matmul(self): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 5, 1]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 1, 5, 1]) - matmul_node = onnx.helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') - graph = helper.make_graph([matmul_node], 'test_graph_1', [A, B], [C]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 5, 1]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 1, 5, 1]) + matmul_node = onnx.helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") + graph = helper.make_graph([matmul_node], "test_graph_1", [A, B], [C]) model = helper.make_model(graph, opset_imports=[OPSET]) q_config = {"Matmul": self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Matmul"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) q_config = {"Matmul": self.dynamic_q_config} q_model = self.dynamic_test(model, q_config, None, quantizable_op_types) - q_model.export('./test.onnx', self.config) - quantize_params = {"A": [np.float32(10.)], - "B": [np.float32(10.)], - "C": [np.float32(10.)]} - - q_config = {"Matmul": {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'dynamic'}}} + q_model.export("./test.onnx", self.config) + quantize_params = {"A": [np.float32(10.0)], "B": [np.float32(10.0)], "C": [np.float32(10.0)]} + + q_config = { + "Matmul": { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "dynamic", + }, + } + } quantize_params = {} q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) def test_attention(self): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 128, 768]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [768, 2304]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [2304]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 128, 768]) - mask = helper.make_tensor_value_info('mask', TensorProto.INT32, [1, 128]) - - node = onnx.helper.make_node('Attention', ['A', 'B', 'C', 'mask'], ['D'], name='Attention', num_heads=1) - graph = helper.make_graph([node], 'test_graph_1', [A, B, C, mask], [D]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 128, 768]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [768, 2304]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [2304]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 128, 768]) + mask = helper.make_tensor_value_info("mask", TensorProto.INT32, [1, 128]) + + node = onnx.helper.make_node("Attention", ["A", "B", "C", "mask"], ["D"], name="Attention", num_heads=1) + graph = helper.make_graph([node], "test_graph_1", [A, B, C, mask], [D]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) q_config = {"Attention": self.static_q_config} - quantize_params = {"A": [np.uint8(0), np.float32(0.5)], - "B": [np.uint8(0), np.float32(0.5)], - "C": [np.uint8(0), np.float32(0.5)], - "D": [np.uint8(0), np.float32(0.5)]} + quantize_params = { + "A": [np.uint8(0), np.float32(0.5)], + "B": [np.uint8(0), np.float32(0.5)], + "C": [np.uint8(0), np.float32(0.5)], + "D": [np.uint8(0), np.float32(0.5)], + } quantizable_op_types = ["Attention"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) q_config = {"Attention": self.dynamic_q_config} def test_gather(self): a_value = np.random.randn(100, 4).astype(np.float32) - A_init = helper.make_tensor('A', TensorProto.FLOAT, [100, 4], - a_value.reshape(400).tolist()) + A_init = helper.make_tensor("A", TensorProto.FLOAT, [100, 4], a_value.reshape(400).tolist()) b_value = np.random.randint(2, size=(1, 10)).astype(np.int32) - B_init = helper.make_tensor('B', TensorProto.INT32, [1, 10], - b_value.reshape(10).tolist()) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [100, 4]) - B = helper.make_tensor_value_info('B', TensorProto.INT32, [1, 10]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 10, 4]) - node = onnx.helper.make_node('Gather', ['A', 'B'], ['C'], name='Gather') - graph = helper.make_graph([node], 'test_graph_1', [A, B], [C], [A_init, B_init]) + B_init = helper.make_tensor("B", TensorProto.INT32, [1, 10], b_value.reshape(10).tolist()) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [100, 4]) + B = helper.make_tensor_value_info("B", TensorProto.INT32, [1, 10]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 10, 4]) + node = onnx.helper.make_node("Gather", ["A", "B"], ["C"], name="Gather") + graph = helper.make_graph([node], "test_graph_1", [A, B], [C], [A_init, B_init]) model = helper.make_model(graph, opset_imports=[OPSET]) - q_config = {'Gather': {"weight":{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'} - }} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)]} + q_config = { + "Gather": { + "weight": {"dtype": 2, "algorithm": "minmax", "scheme": "asym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + }, + } + } + quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "C": [np.uint8(10.0), np.float32(0)]} quantizable_op_types = ["Gather"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - q_config = {'Gather': {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'dynamic'} - }} + q_model.export("./test.onnx", self.config) + q_config = { + "Gather": { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "dynamic", + }, + } + } q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - graph = helper.make_graph([node], 'test_graph_1', [A, B], [C]) + q_model.export("./test.onnx", self.config) + graph = helper.make_graph([node], "test_graph_1", [A, B], [C]) model = helper.make_model(graph, opset_imports=[OPSET]) - q_config = {'Gather': {"weight":{'dtype': 3, - 'algorithm': 'minmax', - 'scheme':'sym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'dynamic'} - }} + q_config = { + "Gather": { + "weight": {"dtype": 3, "algorithm": "minmax", "scheme": "sym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "dynamic", + }, + } + } quantize_params = {} q_model = self.dynamic_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) def test_binary(self): - for op in ['Mul', 'Add']: - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 10]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 10]) - node = onnx.helper.make_node(op, ['A', 'B'], ['C'], name=op) - graph = helper.make_graph([node], 'test_graph_1', [A, B], [C]) + for op in ["Mul", "Add"]: + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 10]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 10]) + node = onnx.helper.make_node(op, ["A", "B"], ["C"], name=op) + graph = helper.make_graph([node], "test_graph_1", [A, B], [C]) model = helper.make_model(graph, opset_imports=[OPSET]) q_config = {op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = [op] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) q_model = self.qlinear_test(model, q_config, {}, quantizable_op_types) - q_model.export('./test.onnx', self.config) - + q_model.export("./test.onnx", self.config) + def test_activation(self): - config = {"weight":{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity': 'per_tensor'}, - 'activation':{'dtype': 2, - 'algorithm': 'minmax', - 'scheme':'asym', - 'granularity':'per_tensor', - 'quant_mode': 'static'} - } - + config = { + "weight": {"dtype": 2, "algorithm": "minmax", "scheme": "asym", "granularity": "per_tensor"}, + "activation": { + "dtype": 2, + "algorithm": "minmax", + "scheme": "asym", + "granularity": "per_tensor", + "quant_mode": "static", + }, + } + for op in ["Relu", "LeakyRelu", "Sigmoid"]: - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 10]) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 10]) - node = onnx.helper.make_node(op, ['A'], ['B'], name=op) - graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 10]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 10]) + node = onnx.helper.make_node(op, ["A"], ["B"], name=op) + graph = helper.make_graph([node], "test_graph_1", [A], [B]) model = helper.make_model(graph, opset_imports=[OPSET]) q_config = {op: config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)]} + quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "B": [np.uint8(10.0), np.float32(0)]} quantizable_op_types = [op] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) a_value = np.random.randn(1, 10).astype(np.float32) - A_init = helper.make_tensor('A', TensorProto.FLOAT, [1, 10], - a_value.reshape(10).tolist()) - graph = helper.make_graph([node], 'test_graph_1', [A], [B], [A_init]) + A_init = helper.make_tensor("A", TensorProto.FLOAT, [1, 10], a_value.reshape(10).tolist()) + graph = helper.make_graph([node], "test_graph_1", [A], [B], [A_init]) model = helper.make_model(graph, opset_imports=[OPSET]) q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) def test_pooling(self): op = "MaxPool" - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 5, 5, 1]) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) - node = onnx.helper.make_node(op, ['A'], ['B'], - name=op, - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 5, 5, 1]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5, 1]) + node = onnx.helper.make_node(op, ["A"], ["B"], name=op, kernel_shape=[3, 3], pads=[1, 1, 1, 1]) + graph = helper.make_graph([node], "test_graph_1", [A], [B]) q_config = {op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)]} + quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "B": [np.uint8(10.0), np.float32(0)]} quantizable_op_types = [op] for opset_version in [12, 13]: opset = onnx.OperatorSetIdProto() opset.version = opset_version model = helper.make_model(graph, opset_imports=[opset]) q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='Conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - pool_node = onnx.helper.make_node(op, ['C'], ['D'], name=op, kernel_shape=[1, 1]) - graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + q_model.export("./test.onnx", self.config) + + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 3, 3]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 5]) + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + pool_node = onnx.helper.make_node(op, ["C"], ["D"], name=op, kernel_shape=[1, 1]) + graph = helper.make_graph([conv_node, pool_node], "test_graph_1", [A, B], [D]) model = helper.make_model(graph, opset_imports=[OPSET]) - + q_config = {"Conv": self.static_q_config, op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Conv", op] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) op = "GlobalAveragePool" - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 5, 1, 1]) - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) - node = onnx.helper.make_node(op, ['A'], ['B'], - name=op) - graph = helper.make_graph([node], 'test_graph_1', [A], [B]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 5, 1, 1]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5, 1]) + node = onnx.helper.make_node(op, ["A"], ["B"], name=op) + graph = helper.make_graph([node], "test_graph_1", [A], [B]) q_config = {op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)]} + quantize_params = {"A": [np.uint8(10.0), np.float32(0)], "B": [np.uint8(10.0), np.float32(0)]} quantizable_op_types = [op] for opset_version in [12, 13]: opset = onnx.OperatorSetIdProto() opset.version = opset_version model = helper.make_model(graph, opset_imports=[opset]) q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 3, 3]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 1, 1]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='Conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - pool_node = onnx.helper.make_node(op, ['C'], ['D'], name=op) - graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + q_model.export("./test.onnx", self.config) + + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 3, 3]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 1, 1]) + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + pool_node = onnx.helper.make_node(op, ["C"], ["D"], name=op) + graph = helper.make_graph([conv_node, pool_node], "test_graph_1", [A, B], [D]) model = helper.make_model(graph, opset_imports=[OPSET]) - + q_config = {"Conv": self.static_q_config, op: self.static_q_config} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Conv", op] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) - + q_model.export("./test.onnx", self.config) def test_exclude_node(self): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5, 1]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [3, 3, 1, 1]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 3, 5, 1]) - conv_node = onnx.helper.make_node('Conv', ['A', 'B'], ['C'], - name='Conv', - kernel_shape=[3, 3], - pads=[1, 1, 1, 1]) - pool_node = onnx.helper.make_node("MaxPool", ['C'], ['D'], name="MaxPool", kernel_shape=[1, 1]) - graph = helper.make_graph([conv_node, pool_node], 'test_graph_1', [A, B], [D]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5, 1]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [3, 3, 1, 1]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 3, 5, 1]) + conv_node = onnx.helper.make_node( + "Conv", ["A", "B"], ["C"], name="Conv", kernel_shape=[3, 3], pads=[1, 1, 1, 1] + ) + pool_node = onnx.helper.make_node("MaxPool", ["C"], ["D"], name="MaxPool", kernel_shape=[1, 1]) + graph = helper.make_graph([conv_node, pool_node], "test_graph_1", [A, B], [D]) model = helper.make_model(graph, opset_imports=[OPSET]) q_config = {"Conv": self.static_q_config, "MaxPool": "fp32"} - quantize_params = {"A": [np.uint8(10.), np.float32(0)], - "B": [np.uint8(10.), np.float32(0)], - "C": [np.uint8(10.), np.float32(0)], - "D": [np.uint8(10.), np.float32(0)]} + quantize_params = { + "A": [np.uint8(10.0), np.float32(0)], + "B": [np.uint8(10.0), np.float32(0)], + "C": [np.uint8(10.0), np.float32(0)], + "D": [np.uint8(10.0), np.float32(0)], + } quantizable_op_types = ["Conv", "MaxPool"] - self.config.exclude_output_quantization = ['Conv'] + self.config.exclude_output_quantization = ["Conv"] q_model = self.qlinear_test(model, q_config, quantize_params, quantizable_op_types) - q_model.export('./test.onnx', self.config) + q_model.export("./test.onnx", self.config) + if __name__ == "__main__": unittest.main() diff --git a/test/export/test_torch2onnx.py b/test/export/test_torch2onnx.py index 978f3504f8f..554738abaa5 100644 --- a/test/export/test_torch2onnx.py +++ b/test/export/test_torch2onnx.py @@ -1,19 +1,19 @@ -import os import copy +import os import shutil -import torch import unittest + import numpy as np -import copy -from neural_compressor import quantization -from neural_compressor.experimental.common import Model +import torch +import torch.utils.data as data +from transformers import AutoModelForSequenceClassification, AutoTokenizer + +from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig, quantization from neural_compressor.config import Torch2ONNXConfig -from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig +from neural_compressor.data import DATALOADERS, Datasets +from neural_compressor.experimental.common import Model from neural_compressor.training import prepare_compression -from neural_compressor.data import Datasets, DATALOADERS -from transformers import AutoModelForSequenceClassification, AutoTokenizer from neural_compressor.utils.constant import FP32 -import torch.utils.data as data def train_func_cv(compression_manager, model): @@ -29,6 +29,7 @@ def train_func_cv(compression_manager, model): compression_manager.callbacks.on_train_end() return model + def train_func_nlp(compression_manager, model, input): compression_manager.callbacks.on_train_begin() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) @@ -41,17 +42,21 @@ def train_func_nlp(compression_manager, model, input): compression_manager.callbacks.on_train_end() return model + def check_CV_onnx(model_path, dataloader): import onnxruntime as ort + ort_session = ort.InferenceSession(model_path) it = iter(dataloader) input = next(it) - input_dict = {'input': input[0].detach().cpu().numpy()} + input_dict = {"input": input[0].detach().cpu().numpy()} ort_session.run(None, input_dict) return True + def check_NLP_onnx(model_path, input): import onnxruntime as ort + ort_session = ort.InferenceSession(model_path, None) input_dict = {} for k, v in input.items(): @@ -59,18 +64,20 @@ def check_NLP_onnx(model_path, input): ort_session.run(None, input_dict) return True + # This fake eval_func is used to avoid performance_only setting, # which will overwrite the fp32 model def eval_func(model): return 1 + class DummyNLPDataloader(object): def __init__(self, model_name): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.sequence_a = "intel-extension-for-transformers is based in SH" self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH" - self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors='pt') - self.encoded_dict['labels'] = 1 + self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors="pt") + self.encoded_dict["labels"] = 1 self.batch_size = 1 def __iter__(self): @@ -79,32 +86,32 @@ def __iter__(self): def __next__(self): return self.encoded_dict + class TestPytorch2ONNX(unittest.TestCase): @classmethod def setUpClass(self): from torchvision.models.quantization import resnet18 + self.cv_model = resnet18() self.cv_dataset = Datasets("pytorch")["dummy"]((10, 3, 224, 224)) self.cv_dataloader = DATALOADERS["pytorch"](self.cv_dataset) self.nlp_model = AutoModelForSequenceClassification.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english" ) - self.nlp_dataloader = DummyNLPDataloader( - "distilbert-base-uncased-finetuned-sst-2-english" - ) + self.nlp_dataloader = DummyNLPDataloader("distilbert-base-uncased-finetuned-sst-2-english") input = next(self.nlp_dataloader) - input.pop('labels') + input.pop("labels") self.nlp_input = input @classmethod def tearDownClass(self): - shutil.rmtree('nc_workspace', ignore_errors=True) - os.remove('fp32-cv-model.onnx') - os.remove('int8-cv-qdq-model.onnx') - os.remove('int8-cv-qlinear-model.onnx') - os.remove('fp32-nlp-model.onnx') - os.remove('int8-nlp-qdq-model.onnx') - os.remove('int8-nlp-qlinear-model.onnx') + shutil.rmtree("nc_workspace", ignore_errors=True) + os.remove("fp32-cv-model.onnx") + os.remove("int8-cv-qdq-model.onnx") + os.remove("int8-cv-qlinear-model.onnx") + os.remove("fp32-nlp-model.onnx") + os.remove("int8-nlp-qdq-model.onnx") + os.remove("int8-nlp-qlinear-model.onnx") def test_fp32_CV_models(self): model = copy.deepcopy(self.cv_model) @@ -112,13 +119,12 @@ def test_fp32_CV_models(self): fp32_onnx_config = Torch2ONNXConfig( dtype="fp32", example_inputs=torch.randn(1, 3, 224, 224), - input_names=['input'], - output_names=['output'], - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, + input_names=["input"], + output_names=["output"], + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, ) - inc_model.export('fp32-cv-model.onnx', fp32_onnx_config) - check_CV_onnx('fp32-cv-model.onnx', self.cv_dataloader) + inc_model.export("fp32-cv-model.onnx", fp32_onnx_config) + check_CV_onnx("fp32-cv-model.onnx", self.cv_dataloader) def test_int8_CV_models(self): for fake_yaml in ["static", "qat", "dynamic"]: @@ -132,12 +138,7 @@ def test_int8_CV_models(self): quant_conf = PostTrainingQuantConfig(approach="dynamic") elif fake_yaml == "static": # Random fallback one op to test - fallback_op= { - "conv1": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} - } - } + fallback_op = {"conv1": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}} quant_conf = PostTrainingQuantConfig( approach="static", op_name_dict=fallback_op, @@ -146,37 +147,35 @@ def test_int8_CV_models(self): model, quant_conf, eval_func=eval_func, - calib_dataloader=self.cv_dataloader if fake_yaml == "static" else None) + calib_dataloader=self.cv_dataloader if fake_yaml == "static" else None, + ) int8_onnx_config = Torch2ONNXConfig( dtype="int8", opset_version=14, quant_format="QDQ", example_inputs=torch.randn(1, 3, 224, 224), - input_names=['input'], - output_names=['output'], - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, + input_names=["input"], + output_names=["output"], + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, ) - q_model.export('int8-cv-qdq-model.onnx', int8_onnx_config) - check_CV_onnx('int8-cv-qdq-model.onnx', self.cv_dataloader) + q_model.export("int8-cv-qdq-model.onnx", int8_onnx_config) + check_CV_onnx("int8-cv-qdq-model.onnx", self.cv_dataloader) int8_onnx_config = Torch2ONNXConfig( dtype="int8", opset_version=14, quant_format="QLinear", example_inputs=torch.randn(1, 3, 224, 224), - input_names=['input'], - output_names=['output'], - dynamic_axes={"input": {0: "batch_size"}, - "output": {0: "batch_size"}}, + input_names=["input"], + output_names=["output"], + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, ) - q_model.export('int8-cv-qlinear-model.onnx', int8_onnx_config) - check_CV_onnx('int8-cv-qlinear-model.onnx', self.cv_dataloader) - + q_model.export("int8-cv-qlinear-model.onnx", int8_onnx_config) + check_CV_onnx("int8-cv-qlinear-model.onnx", self.cv_dataloader) def test_fp32_NLP_models(self): - symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + symbolic_names = {0: "batch_size", 1: "max_seq_len"} dynamic_axes = {k: symbolic_names for k in self.nlp_input.keys()} model = copy.deepcopy(self.nlp_model) @@ -185,50 +184,47 @@ def test_fp32_NLP_models(self): dtype="fp32", example_inputs=self.nlp_input, input_names=list(self.nlp_input.keys()), - output_names=['labels'], + output_names=["labels"], dynamic_axes=dynamic_axes, ) - inc_model.export('fp32-nlp-model.onnx', fp32_onnx_config) - check_NLP_onnx('fp32-nlp-model.onnx', self.nlp_input) + inc_model.export("fp32-nlp-model.onnx", fp32_onnx_config) + check_NLP_onnx("fp32-nlp-model.onnx", self.nlp_input) def test_int8_NLP_models(self): - symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + symbolic_names = {0: "batch_size", 1: "max_seq_len"} dynamic_axes = {k: symbolic_names for k in self.nlp_input.keys()} for fake_yaml in ["static", "qat", "dynamic"]: model = copy.deepcopy(self.nlp_model) if fake_yaml == "qat": quant_conf = QuantizationAwareTrainingConfig( - op_type_dict={"Embedding":FP32}, + op_type_dict={"Embedding": FP32}, ) compression_manager = prepare_compression(copy.deepcopy(model), quant_conf) - q_model = train_func_nlp( - compression_manager, - compression_manager.model, - self.nlp_input - ) + q_model = train_func_nlp(compression_manager, compression_manager.model, self.nlp_input) else: if fake_yaml == "dynamic": quant_conf = PostTrainingQuantConfig(approach="dynamic") elif fake_yaml == "static": # Random fallback one op to test - fallback_op= { + fallback_op = { "distilbert.transformer.layer.5.ffn.lin2": { - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]} + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]}, } } quant_conf = PostTrainingQuantConfig( approach="static", op_name_dict=fallback_op, - op_type_dict={"Embedding":FP32}, + op_type_dict={"Embedding": FP32}, ) - + q_model = quantization.fit( model, quant_conf, eval_func=eval_func, - calib_dataloader=self.nlp_dataloader if fake_yaml == "static" else None) + calib_dataloader=self.nlp_dataloader if fake_yaml == "static" else None, + ) int8_onnx_config = Torch2ONNXConfig( dtype="int8", @@ -236,11 +232,11 @@ def test_int8_NLP_models(self): quant_format="QDQ", example_inputs=tuple(self.nlp_input.values()), input_names=list(self.nlp_input.keys()), - output_names=['labels'], + output_names=["labels"], dynamic_axes=dynamic_axes, ) - q_model.export('int8-nlp-qdq-model.onnx', int8_onnx_config) - check_NLP_onnx('int8-nlp-qdq-model.onnx', self.nlp_input) + q_model.export("int8-nlp-qdq-model.onnx", int8_onnx_config) + check_NLP_onnx("int8-nlp-qdq-model.onnx", self.nlp_input) int8_onnx_config = Torch2ONNXConfig( dtype="int8", @@ -248,12 +244,12 @@ def test_int8_NLP_models(self): quant_format="QLinear", example_inputs=tuple(self.nlp_input.values()), input_names=list(self.nlp_input.keys()), - output_names=['labels'], + output_names=["labels"], dynamic_axes=dynamic_axes, ) - q_model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config) - check_NLP_onnx('int8-nlp-qlinear-model.onnx', self.nlp_input) + q_model.export("int8-nlp-qlinear-model.onnx", int8_onnx_config) + check_NLP_onnx("int8-nlp-qlinear-model.onnx", self.nlp_input) + if __name__ == "__main__": unittest.main() - diff --git a/test/graph_optimization/test_graph_optimization.py b/test/graph_optimization/test_graph_optimization.py index 447ab7742c9..072b90f8281 100644 --- a/test/graph_optimization/test_graph_optimization.py +++ b/test/graph_optimization/test_graph_optimization.py @@ -1,24 +1,25 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import yaml import platform -import numpy as np -from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.utils.utility import CpuInfo -from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper -from packaging.version import Version - +import unittest +import numpy as np import tensorflow as tf +import yaml +from packaging.version import Version +from tensorflow.compat.v1 import graph_util from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import dtypes -from tensorflow.compat.v1 import graph_util + +from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.adaptor.tf_utils.util import disable_random +from neural_compressor.utils.utility import CpuInfo + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -36,15 +37,15 @@ def build_fake_yaml(): relative: 0.0001 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml_2(): - fake_yaml_2 = ''' + fake_yaml_2 = """ model: name: fake_yaml_2 framework: tensorflow @@ -52,15 +53,15 @@ def build_fake_yaml_2(): outputs: op_to_store graph_optimization: precisions: [bf16] - ''' + """ y = yaml.load(fake_yaml_2, Loader=yaml.SafeLoader) - with open('fake_yaml_2.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_2.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml_3(): - fake_yaml_3 = ''' + fake_yaml_3 = """ model: name: fake_yaml_3 framework: tensorflow @@ -70,14 +71,15 @@ def build_fake_yaml_3(): precisions: - bf16 - fp32 - ''' + """ y = yaml.load(fake_yaml_3, Loader=yaml.SafeLoader) - with open('fake_yaml_3.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_3.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_yaml_4(): - fake_yaml_4 = ''' + fake_yaml_4 = """ model: name: fake_yaml_4 framework: pytorch @@ -85,14 +87,15 @@ def build_fake_yaml_4(): outputs: op_to_store graph_optimization: precisions: [bf16] - ''' + """ y = yaml.load(fake_yaml_4, Loader=yaml.SafeLoader) - with open('fake_yaml_4.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_4.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_yaml_5(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml_5 framework: tensorflow @@ -116,14 +119,15 @@ def build_fake_yaml_5(): exit_policy: max_trials: 3 timeout: 50 - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_5.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_5.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_yaml_6(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml_6 framework: tensorflow @@ -147,34 +151,39 @@ def build_fake_yaml_6(): exit_policy: max_trials: 3 timeout: 50 - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_6.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_6.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class MyMetric(object): def __init__(self, *args): self.pred_list = [] self.label_list = [] self.samples = 0 + def update(self, predict, label): self.pred_list.extend(predict) self.label_list.extend(label) self.samples += len(label) + def reset(self): self.pred_list = [] self.label_list = [] self.samples = 0 + def result(self): pred = np.array(self.pred_list) label = np.array(self.label_list) ones = np.ones(pred.ndim, dtype=np.int32) ones[0] = label.shape[0] label = np.array(self.label_list).reshape(ones) - correct_num = np.sum(pred == label) + correct_num = np.sum(pred == label) return correct_num / self.samples + class TestGraphOptimizationOnNonBF16Host(unittest.TestCase): @classmethod def setUpClass(self): @@ -182,7 +191,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() @unittest.skipIf(tf.__version__ < "2.0", "does not support on 1.15up3") @@ -191,39 +200,39 @@ def test_bf16_cfg_on_non_bf16_enabled_host(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) - from neural_compressor.experimental import GraphOptimization, common + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.conf.config import GraphOptConf - conf = GraphOptConf('fake_yaml.yaml') + from neural_compressor.experimental import GraphOptimization, common + + conf = GraphOptConf("fake_yaml.yaml") graph_optimizer = GraphOptimization(conf) - dataset = graph_optimizer.dataset('dummy', shape=(100, 300, 300, 16), label=True) + dataset = graph_optimizer.dataset("dummy", shape=(100, 300, 300, 16), label=True) graph_optimizer.eval_dataloader = common.DataLoader(dataset) graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break @@ -232,11 +241,12 @@ def test_bf16_cfg_on_non_bf16_enabled_host(self): else: self.assertEqual(found_cast_op, False) + @unittest.skipIf(tf.__version__ < "2.0", "does not support on 1.15up3") class TestGraphOptimization(unittest.TestCase): @classmethod def setUpClass(self): - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" if platform.system().lower() == "windows": self.skipTest(self, "Graph Optimization NOT Support Windows Yet") build_fake_yaml() @@ -248,27 +258,29 @@ def setUpClass(self): @classmethod def tearDownClass(self): - del os.environ['FORCE_BF16'] - os.remove('fake_yaml.yaml') - os.remove('fake_yaml_2.yaml') - os.remove('fake_yaml_3.yaml') - os.remove('fake_yaml_4.yaml') - os.remove('fake_yaml_5.yaml') - os.remove('fake_yaml_6.yaml') + del os.environ["FORCE_BF16"] + os.remove("fake_yaml.yaml") + os.remove("fake_yaml_2.yaml") + os.remove("fake_yaml_3.yaml") + os.remove("fake_yaml_4.yaml") + os.remove("fake_yaml_5.yaml") + os.remove("fake_yaml_6.yaml") def test_not_supported_model(self): import neural_compressor.adaptor.pytorch as nc_torch + PT_VERSION = nc_torch.get_torch_version() - if PT_VERSION > Version("1.8.0-rc1") \ - and PT_VERSION < Version("1.9.0-rc1"): + if PT_VERSION > Version("1.8.0-rc1") and PT_VERSION < Version("1.9.0-rc1"): pass else: import torchvision + model = torchvision.models.resnet18() from neural_compressor.experimental import Graph_Optimization - graph_optimizer = Graph_Optimization('fake_yaml_4.yaml') - graph_optimizer.input = 'input' - graph_optimizer.output = 'op_to_store' + + graph_optimizer = Graph_Optimization("fake_yaml_4.yaml") + graph_optimizer.input = "input" + graph_optimizer.output = "op_to_store" graph_optimizer.model = model try: output_graph = graph_optimizer.fit() @@ -277,17 +289,19 @@ def test_not_supported_model(self): def test_not_supported_model_without_yaml(self): import neural_compressor.adaptor.pytorch as nc_torch + PT_VERSION = nc_torch.get_torch_version() - if PT_VERSION > Version("1.8.0-rc1") \ - and PT_VERSION < Version("1.9.0-rc1"): + if PT_VERSION > Version("1.8.0-rc1") and PT_VERSION < Version("1.9.0-rc1"): pass else: import torchvision + model = torchvision.models.resnet18() from neural_compressor.experimental import Graph_Optimization + graph_optimizer = Graph_Optimization() - graph_optimizer.input = 'input' - graph_optimizer.output = 'op_to_store' + graph_optimizer.input = "input" + graph_optimizer.output = "op_to_store" try: graph_optimizer.model = model except SystemExit: @@ -295,63 +309,64 @@ def test_not_supported_model_without_yaml(self): def test_not_supported_model_with_conf(self): import neural_compressor.adaptor.pytorch as nc_torch + PT_VERSION = nc_torch.get_torch_version() - if PT_VERSION > Version("1.8.0-rc1") \ - and PT_VERSION < Version("1.9.0-rc1"): + if PT_VERSION > Version("1.8.0-rc1") and PT_VERSION < Version("1.9.0-rc1"): pass else: - from neural_compressor.experimental import Graph_Optimization - from neural_compressor.conf.config import conf import torchvision + + from neural_compressor.conf.config import conf + from neural_compressor.experimental import Graph_Optimization + model = torchvision.models.resnet18() - conf.model.inputs = 'input' - conf.model.outputs = 'op_to_store' - conf.graph_optimization.precisions = 'bf16' + conf.model.inputs = "input" + conf.model.outputs = "op_to_store" + conf.graph_optimization.precisions = "bf16" graph_optimizer = Graph_Optimization(conf) try: graph_optimizer.model = model except SystemExit: pass - @disable_random() def test_graph_optimization_with_evaluation(self): x = tf.compat.v1.placeholder(tf.float32, [1, 300, 300, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import GraphOptimization, common - graph_optimizer = GraphOptimization('fake_yaml.yaml') - dataset = graph_optimizer.dataset('dummy', shape=(100, 300, 300, 16), label=True) + + graph_optimizer = GraphOptimization("fake_yaml.yaml") + dataset = graph_optimizer.dataset("dummy", shape=(100, 300, 300, 16), label=True) graph_optimizer.eval_dataloader = common.DataLoader(dataset) graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break @@ -359,41 +374,40 @@ def test_graph_optimization_with_evaluation(self): @disable_random() def test_graph_optimization_without_evaluation(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization, common - graph_optimizer = Graph_Optimization('fake_yaml_2.yaml') + + graph_optimizer = Graph_Optimization("fake_yaml_2.yaml") graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break @@ -405,86 +419,87 @@ def test_graph_optimization_without_yaml(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') - relu62 = tf.nn.relu6(conv_bias, name='op2_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") + relu62 = tf.nn.relu6(conv_bias, name="op2_to_store") with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( sess=sess, input_graph_def=sess.graph_def, - output_node_names=[relu6.name.split(':')[0], relu62.name.split(':')[0]]) + output_node_names=[relu6.name.split(":")[0], relu62.name.split(":")[0]], + ) from neural_compressor.experimental import Graph_Optimization + graph_optimizer = Graph_Optimization() - graph_optimizer.precisions = 'fp32' - graph_optimizer.input = 'input' - graph_optimizer.output = 'op_to_store, op2_to_store' + graph_optimizer.precisions = "fp32" + graph_optimizer.input = "input" + graph_optimizer.output = "op_to_store, op2_to_store" graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break input_name = graph_optimizer.input output_name = graph_optimizer.output self.assertEqual(found_cast_op, False) - self.assertEqual(input_name, 'input') - self.assertEqual(output_name, 'op_to_store, op2_to_store') + self.assertEqual(input_name, "input") + self.assertEqual(output_name, "op_to_store, op2_to_store") @disable_random() def test_graph_optimization_with_yaml(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization - graph_optimizer = Graph_Optimization('fake_yaml_3.yaml') + + graph_optimizer = Graph_Optimization("fake_yaml_3.yaml") graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break @@ -492,47 +507,47 @@ def test_graph_optimization_with_yaml(self): @disable_random() def test_graph_optimization_with_custom_metric_without_postprocess(self): - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" x = tf.compat.v1.placeholder(tf.float32, [1, 300, 300, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization, common - graph_optimizer = Graph_Optimization('fake_yaml_3.yaml') + + graph_optimizer = Graph_Optimization("fake_yaml_3.yaml") graph_optimizer.metric = common.Metric(MyMetric) - dataset = graph_optimizer.dataset('dummy', shape=(100, 300, 300, 16), label=True) - graph_optimizer.precisions = ['fp32', 'bf16'] + dataset = graph_optimizer.dataset("dummy", shape=(100, 300, 300, 16), label=True) + graph_optimizer.precisions = ["fp32", "bf16"] graph_optimizer.eval_dataloader = common.DataLoader(dataset) graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False self.assertIsNotNone(output_graph.graph_def) - + for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break @@ -544,31 +559,31 @@ def test_graph_optimization_without_custom_metric_with_postprocess(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.data import Postprocess from neural_compressor.experimental import Graph_Optimization, common, data - graph_optimizer = Graph_Optimization('fake_yaml.yaml') - dataset = graph_optimizer.dataset('dummy', shape=(100, 300, 300, 16), label=True) + + graph_optimizer = Graph_Optimization("fake_yaml.yaml") + dataset = graph_optimizer.dataset("dummy", shape=(100, 300, 300, 16), label=True) graph_optimizer.eval_dataloader = common.DataLoader(dataset) graph_optimizer.postprocess = Postprocess(data.transforms.transform.TensorflowWrapFunction(np.array)) graph_optimizer.model = output_graph_def @@ -576,7 +591,7 @@ def test_graph_optimization_without_custom_metric_with_postprocess(self): found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break @@ -584,37 +599,36 @@ def test_graph_optimization_without_custom_metric_with_postprocess(self): @disable_random() def test_graph_optimization_with_eval_func(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 300, 300, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization, common - graph_optimizer = Graph_Optimization('fake_yaml.yaml') - dataset = graph_optimizer.dataset('dummy', shape=(100, 300, 300, 16), label=True) + graph_optimizer = Graph_Optimization("fake_yaml.yaml") + + dataset = graph_optimizer.dataset("dummy", shape=(100, 300, 300, 16), label=True) graph_optimizer.eval_dataloader = common.DataLoader(dataset) graph_optimizer.model = output_graph_def graph_optimizer.eval_func = None @@ -623,44 +637,43 @@ def test_graph_optimization_with_eval_func(self): found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break self.assertEqual(found_cast_op, True) @disable_random() def test_graph_optimization_multimetric_noweight(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 300, 300, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization, common - graph_optimizer = Graph_Optimization('fake_yaml_5.yaml') - dataset = graph_optimizer.dataset('dummy', shape=(100, 300, 300, 16), label=True) + graph_optimizer = Graph_Optimization("fake_yaml_5.yaml") + + dataset = graph_optimizer.dataset("dummy", shape=(100, 300, 300, 16), label=True) graph_optimizer.eval_dataloader = common.DataLoader(dataset) graph_optimizer.model = output_graph_def graph_optimizer.eval_func = None @@ -669,44 +682,43 @@ def test_graph_optimization_multimetric_noweight(self): found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break self.assertEqual(found_cast_op, True) @disable_random() def test_graph_optimization_multimetric_weight(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 300, 300, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization, common - graph_optimizer = Graph_Optimization('fake_yaml_6.yaml') - dataset = graph_optimizer.dataset('dummy', shape=(100, 300, 300, 16), label=True) + graph_optimizer = Graph_Optimization("fake_yaml_6.yaml") + + dataset = graph_optimizer.dataset("dummy", shape=(100, 300, 300, 16), label=True) graph_optimizer.eval_dataloader = common.DataLoader(dataset) graph_optimizer.model = output_graph_def graph_optimizer.eval_func = None @@ -715,55 +727,53 @@ def test_graph_optimization_multimetric_weight(self): found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break self.assertEqual(found_cast_op, True) - - @disable_random() def test_graph_optimization_with_force_bf16(self): - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization + graph_optimizer = Graph_Optimization() - graph_optimizer.input = 'input' - graph_optimizer.output = 'op_to_store' + graph_optimizer.input = "input" + graph_optimizer.output = "op_to_store" - graph_optimizer.precisions = 'bf16' + graph_optimizer.precisions = "bf16" graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break @@ -775,30 +785,24 @@ def test_graph_optimization_with_bn(self): relu_name = "relu" float_graph_def = graph_pb2.GraphDef() input_constant = Helper.create_constant_node( - input_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 6]) + input_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 6] + ) float_graph_def.node.extend([input_constant]) - relu_node = Helper.create_node("Relu", relu_name, - [input_constant_name]) + relu_node = Helper.create_node("Relu", relu_name, [input_constant_name]) Helper.set_attr_dtype(relu_node, "T", dtypes.float32) float_graph_def.node.extend([relu_node]) b_constant_name = "b_constant" conv2d_name = "conv2d_1" b_constant = Helper.create_constant_node( - b_constant_name, - value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - dtype=dtypes.float32, - shape=[1, 2, 6, 6]) + b_constant_name, value=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtypes.float32, shape=[1, 2, 6, 6] + ) float_graph_def.node.extend([b_constant]) - conv2d_node = Helper.create_node( - "Conv2D", conv2d_name, [relu_name, b_constant_name]) + conv2d_node = Helper.create_node("Conv2D", conv2d_name, [relu_name, b_constant_name]) Helper.set_attr_dtype(conv2d_node, "T", dtypes.float32) Helper.set_attr_string(conv2d_node, "padding", b"SAME") - Helper.set_attr_int_list(conv2d_node, "strides", [1,1,1,1]) + Helper.set_attr_int_list(conv2d_node, "strides", [1, 1, 1, 1]) float_graph_def.node.extend([conv2d_node]) @@ -806,56 +810,48 @@ def test_graph_optimization_with_bn(self): offset_constant_name = "offset_constant" offset_constant = Helper.create_constant_node( - offset_constant_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) + offset_constant_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) float_graph_def.node.extend([offset_constant]) - bias_add_node = Helper.create_node( - "BiasAdd", bias_add_name, [conv2d_name, offset_constant_name]) + bias_add_node = Helper.create_node("BiasAdd", bias_add_name, [conv2d_name, offset_constant_name]) Helper.set_attr_dtype(bias_add_node, "T", dtypes.float32) float_graph_def.node.extend([bias_add_node]) - bn_scale_name = 'bn_scale' + bn_scale_name = "bn_scale" bn_scale_node = Helper.create_constant_node( - bn_scale_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) - bn_offset_name = 'bn_offset' + bn_scale_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) + bn_offset_name = "bn_offset" bn_offset_node = Helper.create_constant_node( - bn_offset_name, - value=[1, 2, 3, 4, 5, 6], - dtype=dtypes.float32, - shape=[6]) - bn_mean_name = 'bn_mean' + bn_offset_name, value=[1, 2, 3, 4, 5, 6], dtype=dtypes.float32, shape=[6] + ) + bn_mean_name = "bn_mean" bn_mean_node = Helper.create_constant_node( - bn_mean_name, value=[ + bn_mean_name, + value=[ 1, 2, - ], dtype=dtypes.float32, shape=[ + ], + dtype=dtypes.float32, + shape=[ 2, - ]) - bn_var_name = 'bn_var' - bn_var_node = Helper.create_constant_node( - bn_var_name, value=[], dtype=dtypes.float32, shape=[0]) - fused_bn_node_name = 'bn' + ], + ) + bn_var_name = "bn_var" + bn_var_node = Helper.create_constant_node(bn_var_name, value=[], dtype=dtypes.float32, shape=[0]) + fused_bn_node_name = "bn" fused_bn_node = Helper.create_node( - "FusedBatchNormV3", fused_bn_node_name, [ - bias_add_name, bn_scale_name, bn_offset_name, bn_mean_name, - bn_var_name - ]) + "FusedBatchNormV3", + fused_bn_node_name, + [bias_add_name, bn_scale_name, bn_offset_name, bn_mean_name, bn_var_name], + ) Helper.set_attr_dtype(fused_bn_node, "T", dtypes.float32) Helper.set_attr_dtype(fused_bn_node, "U", dtypes.float32) - float_graph_def.node.extend([ - fused_bn_node, bn_scale_node, bn_offset_node, bn_mean_node, - bn_var_node - ]) + float_graph_def.node.extend([fused_bn_node, bn_scale_node, bn_offset_node, bn_mean_node, bn_var_node]) post_relu_name = "post_relu" - post_relu_node = Helper.create_node( - "Relu", post_relu_name, [fused_bn_node_name]) + post_relu_node = Helper.create_node("Relu", post_relu_name, [fused_bn_node_name]) Helper.set_attr_dtype(post_relu_node, "T", dtypes.float32) float_graph_def.node.extend([post_relu_node]) @@ -864,62 +860,63 @@ def test_graph_optimization_with_bn(self): graph_optimizer = Graph_Optimization() - graph_optimizer.precisions = 'bf16' + graph_optimizer.precisions = "bf16" graph_optimizer.model = float_graph_def output_graph = graph_optimizer.fit() bn_bf16 = False for i in output_graph.graph_def.node: - if i.op == 'FusedBatchNormV3' and i.attr['T'].type == dtypes.bfloat16: + if i.op == "FusedBatchNormV3" and i.attr["T"].type == dtypes.bfloat16: bn_bf16 = True - if i.op == 'Conv2D' and i.attr['T'].type == dtypes.bfloat16: + if i.op == "Conv2D" and i.attr["T"].type == dtypes.bfloat16: bn_bf16 = True self.assertEqual(bn_bf16, True) -class TestGraphOptmizationFP32(unittest.TestCase): + +class TestGraphOptmizationFP32(unittest.TestCase): @disable_random() def test_graph_optimization_without_yaml_without_precisions(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization + graph_optimizer = Graph_Optimization() - graph_optimizer.input = 'input' - graph_optimizer.output = 'op_to_store' + graph_optimizer.input = "input" + graph_optimizer.output = "op_to_store" graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break precision = graph_optimizer.precisions self.assertEqual(found_cast_op, False) - self.assertEqual(precision, 'fp32') + self.assertEqual(precision, "fp32") @disable_random() def test_graph_optimization_without_yaml_with_precisions(self): @@ -927,38 +924,38 @@ def test_graph_optimization_without_yaml_with_precisions(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization + graph_optimizer = Graph_Optimization() - graph_optimizer.precisions = 'fp32' + graph_optimizer.precisions = "fp32" graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break @@ -966,48 +963,49 @@ def test_graph_optimization_without_yaml_with_precisions(self): @disable_random() def test_graph_optimization_fp32_only_with_force_bf16(self): - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Graph_Optimization + graph_optimizer = Graph_Optimization() - graph_optimizer.input = 'input' - graph_optimizer.output = 'op_to_store' + graph_optimizer.input = "input" + graph_optimizer.output = "op_to_store" graph_optimizer.model = output_graph_def output_graph = graph_optimizer.fit() found_cast_op = False for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast_op = True break self.assertEqual(found_cast_op, False) + if __name__ == "__main__": unittest.main() diff --git a/test/hpo/test_hpo.py b/test/hpo/test_hpo.py index 5e8d7fc4d70..891e88967c3 100644 --- a/test/hpo/test_hpo.py +++ b/test/hpo/test_hpo.py @@ -1,45 +1,58 @@ +import sys import unittest + import numpy as np -import sys -sys.path.insert(0, './') + +sys.path.insert(0, "./") +from neural_compressor.compression.hpo import ( + ContinuousSearchSpace, + DiscreteSearchSpace, + GridSearcher, + SearchSpace, + SimulatedAnnealingOptimizer, + prepare_hpo, +) from neural_compressor.config import HPOConfig -from neural_compressor.compression.hpo import (GridSearcher, - DiscreteSearchSpace, - ContinuousSearchSpace, - SearchSpace, - prepare_hpo, - SimulatedAnnealingOptimizer) class TestHPO(unittest.TestCase): search_space = { - 'learning_rate': SearchSpace((0.0001, 0.001)), - 'num_train_epochs': SearchSpace(bound=(20, 100), interval=1), - 'weight_decay': SearchSpace((0.0001, 0.001)), - 'cooldown_epochs': SearchSpace(bound=(0, 10), interval=1), - 'sparsity_warm_epochs': SearchSpace(bound=(0, 5), interval=1), - 'per_device_train_batch_size': SearchSpace((5, 20), 1) + "learning_rate": SearchSpace((0.0001, 0.001)), + "num_train_epochs": SearchSpace(bound=(20, 100), interval=1), + "weight_decay": SearchSpace((0.0001, 0.001)), + "cooldown_epochs": SearchSpace(bound=(0, 10), interval=1), + "sparsity_warm_epochs": SearchSpace(bound=(0, 5), interval=1), + "per_device_train_batch_size": SearchSpace((5, 20), 1), } def test_searcher(self): - hpo_config = HPOConfig({'num_train_epochs': self.search_space['num_train_epochs'], - 'cooldown_epochs': self.search_space['cooldown_epochs']}, searcher='grid') - searcher = GridSearcher({'num_train_epochs': self.search_space['num_train_epochs'], - 'cooldown_epochs': self.search_space['cooldown_epochs']}) + hpo_config = HPOConfig( + { + "num_train_epochs": self.search_space["num_train_epochs"], + "cooldown_epochs": self.search_space["cooldown_epochs"], + }, + searcher="grid", + ) + searcher = GridSearcher( + { + "num_train_epochs": self.search_space["num_train_epochs"], + "cooldown_epochs": self.search_space["cooldown_epochs"], + } + ) conf_searcher = prepare_hpo(hpo_config) self.assertEqual(searcher.__class__, conf_searcher.__class__) for _ in range(5): self.assertEqual(searcher.suggest(), conf_searcher.suggest()) - hpo_config = HPOConfig(self.search_space, 'random') + hpo_config = HPOConfig(self.search_space, "random") searcher = prepare_hpo(hpo_config) for _ in range(5): searcher.suggest() - hpo_config = HPOConfig(self.search_space, 'bo') + hpo_config = HPOConfig(self.search_space, "bo") searcher = prepare_hpo(hpo_config) for _ in range(10): searcher.suggest() searcher.get_feedback(np.random.random()) - hpo_config = HPOConfig(self.search_space, 'xgb', higher_is_better=True, min_train_samples=3) + hpo_config = HPOConfig(self.search_space, "xgb", higher_is_better=True, min_train_samples=3) searcher = prepare_hpo(hpo_config) for _ in range(5): searcher.suggest() @@ -67,6 +80,7 @@ def test_search_space(self): def test_sa(self): def f(x): return np.mean(np.log(x**2), axis=1) + points = np.random.randn(5, 6) optimizer = SimulatedAnnealingOptimizer(T0=100, Tf=0, alpha=0.9, higher_is_better=True) optimizer.gen_next_params(f, points) diff --git a/test/ipex/test_adaptor_ipex.py b/test/ipex/test_adaptor_ipex.py index e90bdbf4c37..a15234e6866 100644 --- a/test/ipex/test_adaptor_ipex.py +++ b/test/ipex/test_adaptor_ipex.py @@ -1,22 +1,19 @@ -import neural_compressor.adaptor.pytorch as nc_torch import os import shutil +import unittest + import torch import torch.utils.data as data -import unittest -from neural_compressor import set_workspace -from neural_compressor.experimental import common -from neural_compressor.utils.utility import LazyImport +from packaging.version import Version +from transformers import AutoModelForSequenceClassification, AutoTokenizer + +import neural_compressor.adaptor.pytorch as nc_torch +from neural_compressor import mix_precision, set_workspace from neural_compressor.conf.pythonic_config import config +from neural_compressor.config import MixedPrecisionConfig +from neural_compressor.experimental import common from neural_compressor.utils.pytorch import load -from packaging.version import Version -from transformers import ( - AutoModelForSequenceClassification, - AutoTokenizer, -) -from neural_compressor import mix_precision from neural_compressor.utils.utility import LazyImport -from neural_compressor.config import MixedPrecisionConfig torch_utils = LazyImport("neural_compressor.adaptor.torch_utils") @@ -26,6 +23,7 @@ try: import intel_extension_for_pytorch as ipex + TEST_IPEX = True IPEX_VERSION = Version(ipex.__version__) except: @@ -36,6 +34,7 @@ # get torch and IPEX version PT_VERSION = nc_torch.get_torch_version().release + class DummyDataloader(data.DataLoader): def __init__(self): self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) @@ -51,11 +50,12 @@ def __getitem__(self, index): """Returns one data pair (source and target).""" if index < 10: return self.encoded_dict - + def __iter__(self): for _ in range(10): yield self.encoded_dict + class M(torch.nn.Module): def __init__(self): super().__init__() @@ -80,69 +80,77 @@ def calib_func(model): class Dataloader: def __init__(self) -> None: - self.batch_size=1 + self.batch_size = 1 + def __iter__(self): yield torch.randn(1, 3, 224, 224) -@unittest.skipIf(PT_VERSION >= Version("1.12.0").release or PT_VERSION < Version("1.10.0").release, - "Please use Intel extension for Pytorch version 1.10 or 1.11") +@unittest.skipIf( + PT_VERSION >= Version("1.12.0").release or PT_VERSION < Version("1.10.0").release, + "Please use Intel extension for Pytorch version 1.10 or 1.11", +) class TestPytorchIPEX_1_10_Adaptor(unittest.TestCase): @classmethod def setUpClass(self): - config.quantization.backend = 'ipex' - config.quantization.approach = 'post_training_static_quant' + config.quantization.backend = "ipex" + config.quantization.approach = "post_training_static_quant" config.quantization.use_bf16 = False set_workspace("./saved") @classmethod def tearDownClass(self): - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_tuning_ipex(self): from neural_compressor.experimental import Quantization + model = M() quantizer = Quantization(config) quantizer.model = model - quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = True - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) quantizer.calib_dataloader = dataloader quantizer.eval_dataloader = dataloader nc_model = quantizer.fit() - nc_model.save('./saved') - q_model = load('./saved', model, dataloader=dataloader) + nc_model.save("./saved") + q_model = load("./saved", model, dataloader=dataloader) from neural_compressor.experimental import Benchmark + evaluator = Benchmark(config) evaluator.model = q_model evaluator.b_dataloader = dataloader - evaluator.fit('accuracy') + evaluator.fit("accuracy") + -@unittest.skipIf(PT_VERSION < Version("1.12.0").release, - "Please use Intel extension for Pytorch version higher or equal to 1.12") +@unittest.skipIf( + PT_VERSION < Version("1.12.0").release, "Please use Intel extension for Pytorch version higher or equal to 1.12" +) class TestPytorchIPEX_1_12_Adaptor(unittest.TestCase): @classmethod def setUpClass(self): - config.quantization.backend = 'ipex' + config.quantization.backend = "ipex" config.quantization.accuracy_criterion.tolerable_loss = 0.0001 config.quantization.accuracy_criterion.higher_is_better = False - config.quantization.approach = 'post_training_static_quant' + config.quantization.approach = "post_training_static_quant" config.quantization.use_bf16 = False set_workspace("./saved") @classmethod def tearDownClass(self): - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_tuning_ipex(self): from neural_compressor.experimental import Quantization + model = M() quantizer = Quantization(config) quantizer.model = model - quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = False - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = False + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) quantizer.calib_dataloader = dataloader quantizer.calib_func = calib_func @@ -150,27 +158,31 @@ def test_tuning_ipex(self): nc_model = quantizer.fit() sparsity = nc_model.report_sparsity() self.assertTrue(sparsity[-1] >= 0.0) - nc_model.save('./saved') - q_model = load('./saved', model, dataloader=dataloader) + nc_model.save("./saved") + q_model = load("./saved", model, dataloader=dataloader) from neural_compressor.experimental import Benchmark + evaluator = Benchmark(config) evaluator.model = q_model evaluator.b_dataloader = dataloader - evaluator.fit('accuracy') + evaluator.fit("accuracy") def test_tuning_ipex_for_ipex_autotune_func(self): from neural_compressor.experimental import Quantization + model = M() if PT_VERSION < Version("2.1").release: qconfig = ipex.quantization.default_static_qconfig else: qconfig = ipex.quantization.default_static_qconfig_mapping - prepared_model = ipex.quantization.prepare(model, qconfig, example_inputs=torch.ones(1, 3, 224, 224), inplace=False) + prepared_model = ipex.quantization.prepare( + model, qconfig, example_inputs=torch.ones(1, 3, 224, 224), inplace=False + ) quantizer = Quantization(config) quantizer.model = prepared_model - quantizer.conf.usr_cfg.tuning.exit_policy['max_trials'] = 5 - quantizer.conf.usr_cfg.tuning.exit_policy['timeout'] = 100 - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.conf.usr_cfg.tuning.exit_policy["max_trials"] = 5 + quantizer.conf.usr_cfg.tuning.exit_policy["timeout"] = 100 + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) quantizer.calib_dataloader = dataloader quantizer.eval_dataloader = dataloader @@ -182,23 +194,28 @@ def test_copy_prepared_model(self): qconfig = ipex.quantization.default_static_qconfig else: qconfig = ipex.quantization.default_static_qconfig_mapping - prepared_model = ipex.quantization.prepare(model, qconfig, example_inputs=torch.ones(1, 3, 224, 224), inplace=False) + prepared_model = ipex.quantization.prepare( + model, qconfig, example_inputs=torch.ones(1, 3, 224, 224), inplace=False + ) copy_model = torch_utils.util.auto_copy(prepared_model) self.assertTrue(isinstance(copy_model, torch.nn.Module)) - + def test_bf16(self): from neural_compressor.experimental import Quantization + model = M() if PT_VERSION < Version("2.1").release: qconfig = ipex.quantization.default_static_qconfig else: qconfig = ipex.quantization.default_static_qconfig_mapping - prepared_model = ipex.quantization.prepare(model, qconfig, example_inputs=torch.ones(1, 3, 224, 224), inplace=False) + prepared_model = ipex.quantization.prepare( + model, qconfig, example_inputs=torch.ones(1, 3, 224, 224), inplace=False + ) config.quantization.use_bf16 = True config.quantization.performance_only = True quantizer = Quantization(config) quantizer.model = model - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) quantizer.calib_dataloader = dataloader quantizer.eval_dataloader = dataloader @@ -206,12 +223,13 @@ def test_bf16(self): def test_example_inputs(self): from neural_compressor.experimental import Quantization + model = M() config.quantization.example_inputs = torch.randn([1, 3, 224, 224]) quantizer = Quantization(config) quantizer.model = model - quantizer.conf.usr_cfg.tuning.exit_policy['performance_only'] = False - dataset = quantizer.dataset('dummy', (100, 3, 224, 224), label=True) + quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = False + dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True) dataloader = torch.utils.data.DataLoader(dataset) quantizer.calib_dataloader = dataloader nc_model = quantizer.fit() @@ -219,6 +237,7 @@ def test_example_inputs(self): def test_new_API(self): model = M() from neural_compressor import PostTrainingQuantConfig, quantization + op_type_dict = { "add": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}, "linear": { @@ -247,7 +266,7 @@ def test_new_API(self): conf, calib_dataloader=calib_dataloader, ) - q_model.save('./saved') + q_model.save("./saved") def test_fallback_fused_op_type(self): class M(torch.nn.Module): @@ -262,9 +281,10 @@ def forward(self, a): x = x.view(1, -1) x = self.linear(x) return x - + model = M() from neural_compressor import PostTrainingQuantConfig, quantization + op_type_dict = { "Conv2d&add": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}, } @@ -279,7 +299,7 @@ def forward(self, a): conf, calib_dataloader=calib_dataloader, ) - + def test_tune_add(self): class M(torch.nn.Module): def __init__(self): @@ -293,28 +313,19 @@ def forward(self, a): x += x x = self.linear(x) return x - + model = M() from neural_compressor import PostTrainingQuantConfig, quantization - - + acc_lst = [1, 0.8, 1.1, 1.2] + def fake_eval(model): res = acc_lst.pop(0) return res - - conf = PostTrainingQuantConfig( - backend="ipex", - quant_level=0 - ) + conf = PostTrainingQuantConfig(backend="ipex", quant_level=0) calib_dataloader = Dataloader() - q_model = quantization.fit( - model, - conf, - calib_dataloader=calib_dataloader, - eval_func=fake_eval - ) + q_model = quantization.fit(model, conf, calib_dataloader=calib_dataloader, eval_func=fake_eval) def test_tune_add_with_recipe(self): class M(torch.nn.Module): @@ -329,38 +340,28 @@ def forward(self, a): x += x x = self.linear(x) return x - + model = M() from neural_compressor import PostTrainingQuantConfig, quantization - - + acc_lst = [1, 0.8, 1.1, 1.2] + def fake_eval(model): res = acc_lst.pop(0) return res - conf = PostTrainingQuantConfig( - backend="ipex", - quant_level=0, - recipes={'smooth_quant': True, - 'smooth_quant_args': { 'alpha': 0.5} - } - ) - calib_dataloader = Dataloader() - q_model = quantization.fit( - model, - conf, - calib_dataloader=calib_dataloader, - eval_func=fake_eval + backend="ipex", quant_level=0, recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}} ) + calib_dataloader = Dataloader() + q_model = quantization.fit(model, conf, calib_dataloader=calib_dataloader, eval_func=fake_eval) - @unittest.skipIf(IPEX_VERSION.release < Version("2.1.0").release, - "Please use Intel extension for Pytorch version higher or equal to 2.1.0") + @unittest.skipIf( + IPEX_VERSION.release < Version("2.1.0").release, + "Please use Intel extension for Pytorch version higher or equal to 2.1.0", + ) def test_dict_inputs_for_model(self): - model = AutoModelForSequenceClassification.from_pretrained( - MODEL_NAME - ) + model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) dummy_dataloader = DummyDataloader() from neural_compressor import PostTrainingQuantConfig, quantization @@ -372,42 +373,43 @@ def test_dict_inputs_for_model(self): conf, calib_dataloader=dummy_dataloader, ) - q_model.save('./saved') + q_model.save("./saved") - @unittest.skipIf(IPEX_VERSION.release < Version("2.1.0").release, - "Please use Intel extension for Pytorch version higher or equal to 2.1.0") + @unittest.skipIf( + IPEX_VERSION.release < Version("2.1.0").release, + "Please use Intel extension for Pytorch version higher or equal to 2.1.0", + ) def test_dict_inputs_for_model_calib_func(self): - model = AutoModelForSequenceClassification.from_pretrained( - MODEL_NAME - ) + model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) example_inputs = DummyDataloader()[0] from neural_compressor import PostTrainingQuantConfig, quantization def calib_func(p_model): p_model(**example_inputs) - conf = PostTrainingQuantConfig( - backend="ipex", - example_inputs=example_inputs - ) + conf = PostTrainingQuantConfig(backend="ipex", example_inputs=example_inputs) q_model = quantization.fit( model, conf, calib_func=calib_func, ) - q_model.save('./saved') + q_model.save("./saved") + class TestMixedPrecision(unittest.TestCase): @classmethod def setUpClass(self): - os.environ['FORCE_FP16'] = '1' - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_FP16"] = "1" + os.environ["FORCE_BF16"] = "1" self.pt_model = M() - @unittest.skipIf(IPEX_VERSION.release < Version("1.11.0").release, - "Please use PyTroch 1.11 or higher version for mixed precision.") + @unittest.skipIf( + IPEX_VERSION.release < Version("1.11.0").release, + "Please use PyTroch 1.11 or higher version for mixed precision.", + ) def test_mixed_precision_with_eval_func_ipex(self): torch = LazyImport("torch") + def eval(model): return 0.5 @@ -418,5 +420,7 @@ def eval(model): eval_func=eval, ) self.assertTrue(isinstance(output_model._model, torch.jit.ScriptModule)) + + if __name__ == "__main__": unittest.main() diff --git a/test/itex/test_keras_in_keras_out.py b/test/itex/test_keras_in_keras_out.py index 3222956e953..ff05f433418 100644 --- a/test/itex/test_keras_in_keras_out.py +++ b/test/itex/test_keras_in_keras_out.py @@ -15,16 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest import os -import time import shutil +import time +import unittest + import numpy as np import tensorflow as tf from tensorflow import keras + from neural_compressor.utils import logger -test_mode = 'accuracy' +test_mode = "accuracy" + def build_model(): # Load MNIST dataset @@ -36,19 +39,20 @@ def build_model(): test_images = test_images / 255.0 # Define the model architecture. - model = keras.Sequential([ - keras.layers.InputLayer(input_shape=(28, 28)), - keras.layers.Reshape(target_shape=(28, 28, 1)), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.MaxPooling2D(pool_size=(2, 2)), - keras.layers.Flatten(), - keras.layers.Dense(10) - ]) + model = keras.Sequential( + [ + keras.layers.InputLayer(input_shape=(28, 28)), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10), + ] + ) # Train the digit classification model - model.compile(optimizer='adam', - loss=tf.keras.losses.SparseCategoricalCrossentropy( - from_logits=True), - metrics=['accuracy']) + model.compile( + optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"] + ) model.fit( train_images, @@ -57,12 +61,12 @@ def build_model(): validation_split=0.1, ) - _, baseline_model_accuracy = model.evaluate( - test_images, test_labels, verbose=0) + _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0) - print('Baseline test accuracy:', baseline_model_accuracy) + print("Baseline test accuracy:", baseline_model_accuracy) model.save("baseline_model") + def build_dataset(): # Load the data and split it between train and test sets (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() @@ -79,6 +83,7 @@ def build_dataset(): y_test = keras.utils.to_categorical(y_test, 10) return x_train, y_train, x_test, y_test + def eval_func(model): x_train, y_train, x_test, y_test = build_dataset() start = time.time() @@ -86,12 +91,13 @@ def eval_func(model): score = model.evaluate(x_test, y_test) end = time.time() - if test_mode == 'performance': + if test_mode == "performance": latency = end - start print("Latency: {:.3f} ms".format(latency * 1000)) - print("Throughput: {:.3f} data/sec".format(1. / latency)) + print("Throughput: {:.3f} data/sec".format(1.0 / latency)) return score[1] + class Dataset(object): def __init__(self, batch_size=100): mnist = keras.datasets.mnist @@ -113,78 +119,83 @@ def __getitem__(self, idx): class TestKerasInKerasOut(unittest.TestCase): @classmethod def setUpClass(self): - os.environ["ITEX_ONEDNN_GRAPH"] = '1' + os.environ["ITEX_ONEDNN_GRAPH"] = "1" @classmethod def tearDownClass(self): - shutil.rmtree('baseline_model',ignore_errors=True) - shutil.rmtree('itex_qdq_keras_model',ignore_errors=True) + shutil.rmtree("baseline_model", ignore_errors=True) + shutil.rmtree("itex_qdq_keras_model", ignore_errors=True) def test_keras_in_keras_out(self): logger.info("Run test_keras_in_keras_out case...") global test_mode - test_mode = 'accuracy' + test_mode = "accuracy" build_model() - from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig from neural_compressor import set_random_seed + from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.data.dataloaders.dataloader import DataLoader + from neural_compressor.quantization import fit + set_random_seed(9527) - config = PostTrainingQuantConfig(backend='itex') + config = PostTrainingQuantConfig(backend="itex") logger.info("=================Run Quantization...") - q_model = fit(keras.models.load_model('./baseline_model'), - conf=config, - calib_dataloader=DataLoader(framework="tensorflow", dataset=Dataset()), - eval_func=eval_func) + q_model = fit( + keras.models.load_model("./baseline_model"), + conf=config, + calib_dataloader=DataLoader(framework="tensorflow", dataset=Dataset()), + eval_func=eval_func, + ) q_model.save("itex_qdq_keras_model") - model = keras.models.load_model('./itex_qdq_keras_model') + model = keras.models.load_model("./itex_qdq_keras_model") model.summary() found_quantize = False found_dequantize = False for layer in model.layers: - if 'quantize' in layer.name: + if "quantize" in layer.name: found_quantize = True - if 'dequantize' in layer.name: + if "dequantize" in layer.name: found_dequantize = True self.assertEqual(found_quantize, True) self.assertEqual(found_dequantize, True) from neural_compressor.benchmark import fit from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(backend='itex', iteration=100, cores_per_instance=1, num_of_instance=1) + + conf = BenchmarkConfig(backend="itex", iteration=100, cores_per_instance=1, num_of_instance=1) logger.info("=================Run BenchMark...") - test_mode = 'performance' + test_mode = "performance" fit(model, conf, b_func=eval_func) def test_keras_model_interface(self): logger.info("Run test_keras_model_interface case...") global test_mode - test_mode = 'accuracy' + test_mode = "accuracy" build_model() - from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig from neural_compressor import set_random_seed + from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.data.dataloaders.dataloader import DataLoader + from neural_compressor.quantization import fit + set_random_seed(9527) - config = PostTrainingQuantConfig(backend='itex') - q_model = fit(keras.models.load_model('./baseline_model'), - conf=config, - calib_dataloader=DataLoader(framework="tensorflow", dataset=Dataset()), - eval_func=eval_func) + config = PostTrainingQuantConfig(backend="itex") + q_model = fit( + keras.models.load_model("./baseline_model"), + conf=config, + calib_dataloader=DataLoader(framework="tensorflow", dataset=Dataset()), + eval_func=eval_func, + ) q_model.save("itex_qdq_keras_model") - self.assertEqual(q_model.framework(), 'keras') + self.assertEqual(q_model.framework(), "keras") - framework_config = { - 'framework': 'keras', - 'approach': 'post_training_static_quant' - } + framework_config = {"framework": "keras", "approach": "post_training_static_quant"} q_model.q_config = framework_config - self.assertEqual(q_model.q_config['framework'], 'keras') + self.assertEqual(q_model.q_config["framework"], "keras") self.assertEqual(q_model.graph_info, None) - self.assertEqual(q_model.framework(), 'keras') + self.assertEqual(q_model.framework(), "keras") self.assertEqual(isinstance(q_model.model, tf.keras.Model), True) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/itex/test_smooth_quant_itex.py b/test/itex/test_smooth_quant_itex.py index dd30e31a425..11cf1ed8b9f 100644 --- a/test/itex/test_smooth_quant_itex.py +++ b/test/itex/test_smooth_quant_itex.py @@ -1,12 +1,14 @@ import unittest -import tensorflow as tf + import numpy as np +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.util import disable_random +from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.data.dataloaders.dataloader import DataLoader from neural_compressor.quantization import fit -from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.utils.utility import set_random_seed -from tensorflow.compat.v1 import graph_util class TestItexSmoothQuantTF(unittest.TestCase): @@ -24,49 +26,54 @@ def test_itex_conv_sq(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) set_random_seed(9527) config = PostTrainingQuantConfig( - backend='itex', + backend="itex", quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.5}}, - calibration_sampling_size=[500]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}, + calibration_sampling_size=[500], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(100, 56, 56, 16), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset, batch_size=1) + + dataset = Datasets("tensorflow")["dummy"](shape=(100, 56, 56, 16), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset, batch_size=1) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 2) @@ -76,12 +83,13 @@ def test_itex_sq_matmul(self): x_data = np.random.rand(1024, 1024).astype(np.float32) y_data = np.random.rand(1024, 1024).astype(np.float32) import tensorflow.compat.v1 as tf - x = tf.placeholder(tf.float32, shape=[1024, 1024], name='x') + + x = tf.placeholder(tf.float32, shape=[1024, 1024], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[1024, 1024]) z = tf.matmul(x, y) bias = np.random.rand(1024).astype(np.float32) z = tf.nn.bias_add(z, bias) - z = tf.nn.relu(z, name='op_to_store') + z = tf.nn.relu(z, name="op_to_store") with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) @@ -89,26 +97,30 @@ def test_itex_sq_matmul(self): set_random_seed(9527) config = PostTrainingQuantConfig( - backend='itex', + backend="itex", quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.5}}, - calibration_sampling_size=[1024]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}, + calibration_sampling_size=[1024], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(1024, 1024), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset, batch_size=1024) + + dataset = Datasets("tensorflow")["dummy"](shape=(1024, 1024), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset, batch_size=1024) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 1) @@ -119,52 +131,59 @@ def test_itex_sq_conv_matmul(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv1_weights = tf.compat.v1.get_variable("weight_conv1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weight_conv1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x_pad, conv1_weights, strides=[1, 2, 2, 1], padding="VALID") - matmul_weights = tf.compat.v1.get_variable("weight_matmul", [28*28*16, 7*7*32], - initializer=tf.compat.v1.random_normal_initializer()) - conv1_reshaped = tf.reshape(conv1, shape=[-1, 28*28*16]) + matmul_weights = tf.compat.v1.get_variable( + "weight_matmul", [28 * 28 * 16, 7 * 7 * 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv1_reshaped = tf.reshape(conv1, shape=[-1, 28 * 28 * 16]) matmul = tf.matmul(conv1_reshaped, matmul_weights) reshape = tf.reshape(matmul, (1, 7, 7, 32)) - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [7, 7, 32, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [7, 7, 32, 1], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(reshape, conv2_weights, strides=[1, 2, 2, 1], padding="VALID") - leaky_relu = tf.nn.leaky_relu(conv2, name='op_to_store') + leaky_relu = tf.nn.leaky_relu(conv2, name="op_to_store") - out_name = leaky_relu.name.split(':')[0] + out_name = leaky_relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) set_random_seed(9527) config = PostTrainingQuantConfig( - backend='itex', + backend="itex", quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.6}}, - calibration_sampling_size=[500]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.6}}, + calibration_sampling_size=[500], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(100, 56, 56, 16), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset) + + dataset = Datasets("tensorflow")["dummy"](shape=(100, 56, 56, 16), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 3) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/itex/test_tensorflow_itex_2.x.py b/test/itex/test_tensorflow_itex_2.x.py index b8ecde7159a..cd64f711482 100644 --- a/test/itex/test_tensorflow_itex_2.x.py +++ b/test/itex/test_tensorflow_itex_2.x.py @@ -3,15 +3,15 @@ # import unittest -from neural_compressor.adaptor.tf_utils.util import disable_random +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + +from neural_compressor import set_random_seed +from neural_compressor.adaptor.tf_utils.util import disable_random, version1_lt_version2 +from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.data.dataloaders.dataloader import DataLoader from neural_compressor.quantization import fit -from neural_compressor.config import PostTrainingQuantConfig -from neural_compressor import set_random_seed -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 -import tensorflow as tf -from tensorflow.compat.v1 import graph_util class TestItexNewAPI(unittest.TestCase): @classmethod @@ -23,58 +23,59 @@ def tearDownClass(self): pass @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_itex_new_api(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) set_random_seed(9527) - config = PostTrainingQuantConfig( - backend="itex", - quant_format="QDQ", - calibration_sampling_size=[200]) + config = PostTrainingQuantConfig(backend="itex", quant_format="QDQ", calibration_sampling_size=[200]) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(100, 56, 56, 16), label=True) + + dataset = Datasets("tensorflow")["dummy"](shape=(100, 56, 56, 16), label=True) output_graph = fit( model=output_graph_def, conf=config, - calib_dataloader=DataLoader(framework="tensorflow_itex", dataset=dataset, batch_size=1)) + calib_dataloader=DataLoader(framework="tensorflow_itex", dataset=dataset, batch_size=1), + ) dequant_count = 0 quantize_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Dequantize': + if i.op == "Dequantize": dequant_count += 1 - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 self.assertEqual(dequant_count, 5) self.assertEqual(quantize_count, 4) + if __name__ == "__main__": unittest.main() diff --git a/test/itex/test_tensorflow_itex_basic.py b/test/itex/test_tensorflow_itex_basic.py index 87c15dff8e4..7705a4acb09 100644 --- a/test/itex/test_tensorflow_itex_basic.py +++ b/test/itex/test_tensorflow_itex_basic.py @@ -1,30 +1,32 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import shutil -import yaml import platform -from tensorflow.python.platform import gfile -from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.experimental import Quantization, Benchmark, common -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2, version1_gte_version2 +import shutil +import unittest import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +from tensorflow.python.platform import gfile + +from neural_compressor.adaptor.tf_utils.util import disable_random, version1_gte_version2, version1_lt_version2 +from neural_compressor.experimental import Benchmark, Quantization, common + def build_fake_yaml(fake_yaml, save_path, **kwargs): y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open(file=save_path, mode=kwargs['mode'], encoding=kwargs['encoding']) as f: + with open(file=save_path, mode=kwargs["mode"], encoding=kwargs["encoding"]) as f: yaml.dump(y, f) -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") class TestItexEnabling(unittest.TestCase): @classmethod def setUpClass(self): os.system("rm *.log") - fake_yaml_1 = ''' + fake_yaml_1 = """ model: name: fake_model_cpu framework: tensorflow_itex @@ -50,9 +52,9 @@ def setUpClass(self): performance_only: True workspace: path: workspace_1 - ''' + """ - fake_yaml_2 = ''' + fake_yaml_2 = """ model: name: fake_model_gpu framework: tensorflow_itex @@ -84,9 +86,9 @@ def setUpClass(self): performance_only: True workspace: path: workspace_2 - ''' + """ - fake_yaml_3 = ''' + fake_yaml_3 = """ model: name: fake_model_default_device framework: tensorflow_itex @@ -111,20 +113,20 @@ def setUpClass(self): performance_only: True workspace: path: workspace_3 - ''' - build_fake_yaml(fake_yaml_1, 'fake_yaml_1.yaml', mode="w", encoding="utf-8") - build_fake_yaml(fake_yaml_2, 'fake_yaml_2.yaml', mode="w", encoding="utf-8") - build_fake_yaml(fake_yaml_3, 'fake_yaml_3.yaml', mode="w", encoding="utf-8") + """ + build_fake_yaml(fake_yaml_1, "fake_yaml_1.yaml", mode="w", encoding="utf-8") + build_fake_yaml(fake_yaml_2, "fake_yaml_2.yaml", mode="w", encoding="utf-8") + build_fake_yaml(fake_yaml_3, "fake_yaml_3.yaml", mode="w", encoding="utf-8") @classmethod def tearDownClass(self): - os.remove('fake_yaml_1.yaml') - os.remove('fake_yaml_2.yaml') - os.remove('fake_yaml_3.yaml') - if version1_gte_version2(tf.version.VERSION, '2.8.0'): - shutil.rmtree('workspace_1') - shutil.rmtree('workspace_2') - shutil.rmtree('workspace_3') + os.remove("fake_yaml_1.yaml") + os.remove("fake_yaml_2.yaml") + os.remove("fake_yaml_3.yaml") + if version1_gte_version2(tf.version.VERSION, "2.8.0"): + shutil.rmtree("workspace_1") + shutil.rmtree("workspace_2") + shutil.rmtree("workspace_3") @disable_random() def test_itex_convert_basic_default_device(self): @@ -132,27 +134,28 @@ def test_itex_convert_basic_default_device(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(relu, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_3.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_3.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -161,50 +164,51 @@ def test_itex_convert_basic_default_device(self): dequant_count = 0 quantize_count = 0 for i in output_graph.graph_def.node: - if 'min' in i.name or 'max' in i.name: - self.assertEqual(i.op, 'HostConst') - if i.op == 'HostConst': - self.assertTrue('min' in i.name or 'max' in i.name) - if i.op == 'Dequantize': + if "min" in i.name or "max" in i.name: + self.assertEqual(i.op, "HostConst") + if i.op == "HostConst": + self.assertTrue("min" in i.name or "max" in i.name) + if i.op == "Dequantize": dequant_count += 1 - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 self.assertEqual(dequant_count, 5) self.assertEqual(quantize_count, 4) @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_itex_convert_basic_cpu(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -213,42 +217,43 @@ def test_itex_convert_basic_cpu(self): dequant_count = 0 quantize_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Dequantize': + if i.op == "Dequantize": dequant_count += 1 - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 self.assertEqual(dequant_count, 5) self.assertEqual(quantize_count, 4) @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_itex_convert_basic_gpu(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(relu, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_2.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_2.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -257,35 +262,36 @@ def test_itex_convert_basic_gpu(self): dequant_count = 0 quantize_count = 0 for i in output_graph.graph_def.node: - if i.op == 'HostConst': - self.assertTrue('min' in i.name or 'max' in i.name) - if i.op == 'Dequantize': + if i.op == "HostConst": + self.assertTrue("min" in i.name or "max" in i.name) + if i.op == "Dequantize": dequant_count += 1 - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 self.assertEqual(dequant_count, 5) self.assertEqual(quantize_count, 4) @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_depthwiseconv2d_case(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - out_name = conv.name.split(':')[0] + out_name = conv.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -293,56 +299,59 @@ def test_depthwiseconv2d_case(self): reshape_counter = 0 for i in output_graph.graph_def.node: - if i.op == 'Reshape': + if i.op == "Reshape": reshape_counter += 1 self.assertEqual(reshape_counter, 2) @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0') or \ - platform.system().lower() == "windows", "Only supports tf greater 2.7.0 and Linux") + @unittest.skipIf( + version1_lt_version2(tf.version.VERSION, "2.8.0") or platform.system().lower() == "windows", + "Only supports tf greater 2.7.0 and Linux", + ) def test_itex_benchmark_gpu(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(relu, name="op_to_store") + out_name = relu6.name.split(":")[0] num_of_instance = 1 cores_per_instance = 1 - log_file = '' + log_file = "" with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_2.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_2.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() - evaluator = Benchmark('fake_yaml_2.yaml') + evaluator = Benchmark("fake_yaml_2.yaml") evaluator.b_dataloader = common.DataLoader(dataset) num_of_instance = evaluator.conf.usr_cfg.evaluation.performance.configs.num_of_instance cores_per_instance = evaluator.conf.usr_cfg.evaluation.performance.configs.cores_per_instance - log_file = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, 0) + log_file = "{}_{}_{}.log".format(num_of_instance, cores_per_instance, 0) if gfile.Exists(log_file): os.remove(log_file) evaluator.model = output_graph - evaluator('performance') + evaluator("performance") found_multi_instance_log = False for file_name in os.listdir(os.getcwd()): @@ -353,30 +362,30 @@ def test_itex_benchmark_gpu(self): self.assertEqual(found_multi_instance_log, False) @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_itex_convert_shared_y_pattern_normal_case(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 1, 1, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - #relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.Add(x=top_relu, y=normed2, name='addv2') + # relu2 = tf.nn.relu(normed2) + add = tf.raw_ops.Add(x=top_relu, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -385,46 +394,47 @@ def test_itex_convert_shared_y_pattern_normal_case(self): dequant_count = 0 quantize_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Dequantize': + if i.op == "Dequantize": dequant_count += 1 - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 self.assertEqual(dequant_count, 2) self.assertEqual(quantize_count, 2) @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_itex_convert_share_y_pattern_abnormal_case1(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.Add(x=relu, y=relu2, name='addv2') + add = tf.raw_ops.Add(x=relu, y=relu2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -433,38 +443,38 @@ def test_itex_convert_share_y_pattern_abnormal_case1(self): dequant_count = 0 quantize_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Dequantize': + if i.op == "Dequantize": dequant_count += 1 - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 self.assertEqual(dequant_count, 4) self.assertEqual(quantize_count, 3) @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_itex_convert_share_y_pattern_abnormal_case2(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -473,13 +483,14 @@ def test_itex_convert_share_y_pattern_abnormal_case2(self): dequant_count = 0 quantize_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Dequantize': + if i.op == "Dequantize": dequant_count += 1 - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_count += 1 self.assertEqual(dequant_count, 2) self.assertEqual(quantize_count, 2) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py index a7f59709de6..229f66bc06e 100644 --- a/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py +++ b/test/itex/test_tensorflow_qdq_convert_to_onnx_qdq.py @@ -1,28 +1,28 @@ # # -*- coding: utf-8 -*- # -import unittest import os import shutil -import yaml - -from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.experimental import Quantization, common, Benchmark - -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2, version1_gte_version2 +import unittest import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random, version1_gte_version2, version1_lt_version2 +from neural_compressor.experimental import Benchmark, Quantization, common + + def build_fake_yaml(fake_yaml, save_path, **kwargs): y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open(file=save_path, mode=kwargs['mode'], encoding=kwargs['encoding']) as f: + with open(file=save_path, mode=kwargs["mode"], encoding=kwargs["encoding"]) as f: yaml.dump(y, f) + class TestConvertTensorflowQDQToOnnxQDQ(unittest.TestCase): @classmethod def setUpClass(self): - fake_yaml = ''' + fake_yaml = """ model: name: fake_model_cpu framework: tensorflow_itex @@ -48,44 +48,45 @@ def setUpClass(self): performance_only: True workspace: path: workspace - ''' - build_fake_yaml(fake_yaml, 'fake_yaml.yaml', mode="w", encoding="utf-8") + """ + build_fake_yaml(fake_yaml, "fake_yaml.yaml", mode="w", encoding="utf-8") @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - if version1_gte_version2(tf.version.VERSION, '2.8.0'): - shutil.rmtree('workspace') + os.remove("fake_yaml.yaml") + if version1_gte_version2(tf.version.VERSION, "2.8.0"): + shutil.rmtree("workspace") @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_convert_tf_qdq_to_onnx_qdq(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") - add = tf.raw_ops.Add(x=normed, y=conv2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=conv2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) @@ -93,70 +94,80 @@ def test_convert_tf_qdq_to_onnx_qdq(self): output_graph = quantizer.fit() from neural_compressor.config import TF2ONNXConfig + config = TF2ONNXConfig() output_graph.export("workspace/tf_qdq_to_onnx_qdq.onnx", config) import onnx + onnx_model = onnx.load("workspace/tf_qdq_to_onnx_qdq.onnx") onnx.checker.check_model(onnx_model) import onnxruntime as ort - from neural_compressor.data import Datasets, DATALOADERS + + from neural_compressor.data import DATALOADERS, Datasets + ort_session = ort.InferenceSession("workspace/tf_qdq_to_onnx_qdq.onnx") dataset = Datasets("tensorflow")["dummy"]((100, 56, 56, 16)) dataloader = DATALOADERS["tensorflow"](dataset) it = iter(dataloader) input = next(it) - input_dict = {'input:0': input[0]} + input_dict = {"input:0": input[0]} ort_session.run(None, input_dict) @disable_random() - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0'), "Only supports tf greater 2.7.0") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.8.0"), "Only supports tf greater 2.7.0") def test_convert_tf_fp32_to_onnx_fp32(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - from neural_compressor.model import Model from neural_compressor.config import TF2ONNXConfig + from neural_compressor.model import Model + inc_model = Model(output_graph_def) config = TF2ONNXConfig(dtype="fp32") inc_model.export("workspace/tf_fp32_to_onnx_fp32.onnx", config) import onnx + onnx_model = onnx.load("workspace/tf_fp32_to_onnx_fp32.onnx") onnx.checker.check_model(onnx_model) import onnxruntime as ort - from neural_compressor.data import Datasets, DATALOADERS + + from neural_compressor.data import DATALOADERS, Datasets + ort_session = ort.InferenceSession("workspace/tf_fp32_to_onnx_fp32.onnx") dataset = Datasets("tensorflow")["dummy"]((100, 56, 56, 16)) dataloader = DATALOADERS["tensorflow"](dataset) it = iter(dataloader) input = next(it) - input_dict = {'input:0': input[0]} + input_dict = {"input:0": input[0]} ort_session.run(None, input_dict) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/metric/test_coco_tools.py b/test/metric/test_coco_tools.py index d960dfaa4ba..d56deac94c8 100644 --- a/test/metric/test_coco_tools.py +++ b/test/metric/test_coco_tools.py @@ -1,162 +1,274 @@ -"""Tests for coco_tools. """ +"""Tests for coco_tools.""" import unittest + import numpy as np + from neural_compressor.metric.coco_tools import * + class TestCOCO(unittest.TestCase): @classmethod def setUpClass(cls): groundtruth_annotations_list = [ { - 'id': 1, - 'image_id': 1, - 'category_id': 1, - 'bbox': [387.99,97.43,84.99,81.29], - 'area': 2991.9213, - 'iscrowd': 0, - 'segmentation':[ - [387.99,176.5,398.34,164.68,405.733,156.55,412.38,141.77, - 419.77,136.6,424.94,125.51,432.33,116.64,434.55,102.6, - 436.77,97.43,441.944,102.6,453.76,101.12,459.68,109.99, - 457.46,115.9,463.37,124.03,470.76,128.47,472.98,137.34, - 465.559,143.25,447.11,137.34,444.9,142.51,442.68,156.55, - 444.9,163.2,446.37,176.5,444.9,178.72] - ] + "id": 1, + "image_id": 1, + "category_id": 1, + "bbox": [387.99, 97.43, 84.99, 81.29], + "area": 2991.9213, + "iscrowd": 0, + "segmentation": [ + [ + 387.99, + 176.5, + 398.34, + 164.68, + 405.733, + 156.55, + 412.38, + 141.77, + 419.77, + 136.6, + 424.94, + 125.51, + 432.33, + 116.64, + 434.55, + 102.6, + 436.77, + 97.43, + 441.944, + 102.6, + 453.76, + 101.12, + 459.68, + 109.99, + 457.46, + 115.9, + 463.37, + 124.03, + 470.76, + 128.47, + 472.98, + 137.34, + 465.559, + 143.25, + 447.11, + 137.34, + 444.9, + 142.51, + 442.68, + 156.55, + 444.9, + 163.2, + 446.37, + 176.5, + 444.9, + 178.72, + ] + ], } ] - image_list = [{'id': 1}] - category_list = [{'id': 0, 'name': 'person'}, - {'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}] + image_list = [{"id": 1}] + category_list = [{"id": 0, "name": "person"}, {"id": 1, "name": "cat"}, {"id": 2, "name": "dog"}] cls.groundtruth_dict = { - 'annotations': groundtruth_annotations_list, - 'images': image_list, - 'categories': category_list + "annotations": groundtruth_annotations_list, + "images": image_list, + "categories": category_list, } cls.detections_list = [ { - 'image_id': 1, - 'category_id': 1, - 'bbox': [387.99,97.43,84.99,81.29], - 'score': .8, - 'segmentation':[ - [387.99,176.5,398.34,164.68,405.733,156.55,412.38,141.77, - 419.77,136.6,424.94,125.51,432.33,116.64,434.55,102.6, - 436.77,97.43,441.944,102.6,453.76,101.12,459.68,109.99, - 457.46,115.9,463.37,124.03,470.76,128.47,472.98,137.34, - 465.559,143.25,447.11,137.34,444.9,142.51,442.68,156.55, - 444.9,163.2,446.37,176.5,444.9,178.72] - ] - + "image_id": 1, + "category_id": 1, + "bbox": [387.99, 97.43, 84.99, 81.29], + "score": 0.8, + "segmentation": [ + [ + 387.99, + 176.5, + 398.34, + 164.68, + 405.733, + 156.55, + 412.38, + 141.77, + 419.77, + 136.6, + 424.94, + 125.51, + 432.33, + 116.64, + 434.55, + 102.6, + 436.77, + 97.43, + 441.944, + 102.6, + 453.76, + 101.12, + 459.68, + 109.99, + 457.46, + 115.9, + 463.37, + 124.03, + 470.76, + 128.47, + 472.98, + 137.34, + 465.559, + 143.25, + 447.11, + 137.34, + 444.9, + 142.51, + 442.68, + 156.55, + 444.9, + 163.2, + 446.37, + 176.5, + 444.9, + 178.72, + ] + ], }, ] def testCOCOWrapper(self): with self.assertRaises(ValueError): - wrap = COCOWrapper(None, 'test') + wrap = COCOWrapper(None, "test") wrap = COCOWrapper(TestCOCO.groundtruth_dict) with self.assertRaises(ValueError): wrap.LoadAnnotations(TestCOCO.groundtruth_dict) - wrong_detection = { - 'image_id': 'test', - 'category_id': 1, - 'bbox': [100., 100., 100., 100.], - 'score': .8 - } + wrong_detection = {"image_id": "test", "category_id": 1, "bbox": [100.0, 100.0, 100.0, 100.0], "score": 0.8} with self.assertRaises(ValueError): wrap.LoadAnnotations(wrong_detection) - wrong_detection = [ - { - 'image_id': 'test', - 'category_id': 1, - 'bbox': [100., 100., 100., 100.], - 'score': .8 - } - ] + wrong_detection = [{"image_id": "test", "category_id": 1, "bbox": [100.0, 100.0, 100.0, 100.0], "score": 0.8}] with self.assertRaises(ValueError): wrap.LoadAnnotations(wrong_detection) - + groundtruth = COCOWrapper(TestCOCO.groundtruth_dict) detections = groundtruth.LoadAnnotations(TestCOCO.detections_list) evaluator = COCOEvalWrapper(groundtruth, detections) - self.assertEqual(evaluator.GetCategory(1)['name'], 'cat') + self.assertEqual(evaluator.GetCategory(1)["name"], "cat") self.assertTrue(not evaluator.GetAgnosticMode()) self.assertEqual(evaluator.GetCategoryIdList(), [0, 1, 2]) evaluator = COCOEvalWrapper(groundtruth, detections, agnostic_mode=True) self.assertTrue(evaluator.GetAgnosticMode()) summary_metrics, _ = evaluator.ComputeMetrics() - self.assertAlmostEqual(1.0, summary_metrics['Precision/mAP']) + self.assertAlmostEqual(1.0, summary_metrics["Precision/mAP"]) with self.assertRaises(ValueError): summary_metrics, _ = evaluator.ComputeMetrics(True, True) - iou_thrs = '0.5:0.05:0.95' + iou_thrs = "0.5:0.05:0.95" map_points = 101 evaluator = COCOEvalWrapper(groundtruth, detections, iou_thrs=iou_thrs, map_points=map_points) evaluator.evaluate() evaluator.accumulate() - self.assertEqual(evaluator.eval['counts'], [10, 101, 3, 4, 3]) + self.assertEqual(evaluator.eval["counts"], [10, 101, 3, 4, 3]) iou_thrs = 0.5 map_points = 11 evaluator = COCOEvalWrapper(groundtruth, detections, iou_thrs=iou_thrs, map_points=map_points) evaluator.evaluate() evaluator.accumulate() - self.assertEqual(evaluator.eval['counts'], [1, 11, 3, 4, 3]) + self.assertEqual(evaluator.eval["counts"], [1, 11, 3, 4, 3]) iou_thrs = 0.5 map_points = 0 evaluator = COCOEvalWrapper(groundtruth, detections, iou_thrs=iou_thrs, map_points=map_points) evaluator.evaluate() evaluator.accumulate() - self.assertEqual(evaluator.eval['counts'], [1, 1, 3, 4, 3]) - - + self.assertEqual(evaluator.eval["counts"], [1, 1, 3, 4, 3]) def testExportSingleImageDetectionBoxesToCoco(self): with self.assertRaises(ValueError): - ExportSingleImageDetectionBoxesToCoco(None, None, None, - np.array([0]), np.array([[0,0]])) + ExportSingleImageDetectionBoxesToCoco(None, None, None, np.array([0]), np.array([[0, 0]])) with self.assertRaises(ValueError): - ExportSingleImageDetectionBoxesToCoco(None, None, np.array([0,0]), - np.array([0]), np.array([0])) + ExportSingleImageDetectionBoxesToCoco(None, None, np.array([0, 0]), np.array([0]), np.array([0])) with self.assertRaises(ValueError): - ExportSingleImageDetectionBoxesToCoco(None, None, np.array([[0,0]]), - np.array([0]), np.array([0])) - + ExportSingleImageDetectionBoxesToCoco(None, None, np.array([[0, 0]]), np.array([0]), np.array([0])) + def testExportSingleImageGroundtruthToCoco(self): with self.assertRaises(ValueError): - ExportSingleImageGroundtruthToCoco(None, None, None, - np.array([0,0]), np.array([0])) + ExportSingleImageGroundtruthToCoco(None, None, None, np.array([0, 0]), np.array([0])) with self.assertRaises(ValueError): - ExportSingleImageGroundtruthToCoco(None, None, None, - np.array([[0,0]]), np.array([0])) + ExportSingleImageGroundtruthToCoco(None, None, None, np.array([[0, 0]]), np.array([0])) with self.assertRaises(ValueError): - ExportSingleImageGroundtruthToCoco(None, None, None, - np.array([[1,1,5,5]]), np.array([1]), np.array([[[1]]]), np.array([[1,0]])) - ExportSingleImageGroundtruthToCoco(1, 2, [0,1,2], np.array([[1,1,5,5]]), - np.array([1]), np.array([[[1]]], dtype=np.uint8), np.array([1,0])) - + ExportSingleImageGroundtruthToCoco( + None, None, None, np.array([[1, 1, 5, 5]]), np.array([1]), np.array([[[1]]]), np.array([[1, 0]]) + ) + ExportSingleImageGroundtruthToCoco( + 1, + 2, + [0, 1, 2], + np.array([[1, 1, 5, 5]]), + np.array([1]), + np.array([[[1]]], dtype=np.uint8), + np.array([1, 0]), + ) def testExportSingleImageDetectionMasksToCoco(self): with self.assertRaises(ValueError): - ExportSingleImageDetectionMasksToCoco(None, None, None, - np.array([0]), np.array([[0,0]])) + ExportSingleImageDetectionMasksToCoco(None, None, None, np.array([0]), np.array([[0, 0]])) with self.assertRaises(ValueError): - ExportSingleImageDetectionMasksToCoco(None, None, np.array([0,0]), - np.array([0]), np.array([0])) - mask=[ - [387.99,176.5,398.34,164.68,405.733,156.55,412.38,141.77, - 419.77,136.6,424.94,125.51,432.33,116.64,434.55,102.6, - 436.77,97.43,441.944,102.6,453.76,101.12,459.68,109.99, - 457.46,115.9,463.37,124.03,470.76,128.47,472.98,137.34, - 465.559,143.25,447.11,137.34,444.9,142.51,442.68,156.55, - 444.9,163.2,446.37,176.5,444.9,178.72] - ] - - result = ExportSingleImageDetectionMasksToCoco( - 1, [0,1,2], mask, np.array([0.8]), np.array([1])) + ExportSingleImageDetectionMasksToCoco(None, None, np.array([0, 0]), np.array([0]), np.array([0])) + mask = [ + [ + 387.99, + 176.5, + 398.34, + 164.68, + 405.733, + 156.55, + 412.38, + 141.77, + 419.77, + 136.6, + 424.94, + 125.51, + 432.33, + 116.64, + 434.55, + 102.6, + 436.77, + 97.43, + 441.944, + 102.6, + 453.76, + 101.12, + 459.68, + 109.99, + 457.46, + 115.9, + 463.37, + 124.03, + 470.76, + 128.47, + 472.98, + 137.34, + 465.559, + 143.25, + 447.11, + 137.34, + 444.9, + 142.51, + 442.68, + 156.55, + 444.9, + 163.2, + 446.37, + 176.5, + 444.9, + 178.72, + ] + ] + + result = ExportSingleImageDetectionMasksToCoco(1, [0, 1, 2], mask, np.array([0.8]), np.array([1])) self.assertEqual(len(result), 1) + if __name__ == "__main__": unittest.main() diff --git a/test/metric/test_exp_metrics.py b/test/metric/test_exp_metrics.py index 3ffdb405cbb..236d693d9f0 100644 --- a/test/metric/test_exp_metrics.py +++ b/test/metric/test_exp_metrics.py @@ -1,15 +1,19 @@ """Tests for the metrics module.""" -import numpy as np -import unittest import platform +import unittest + +import numpy as np + from neural_compressor.experimental.metric import METRICS -from neural_compressor.experimental.metric.f1 import evaluate from neural_compressor.experimental.metric.evaluate_squad import evaluate as evaluate_squad +from neural_compressor.experimental.metric.f1 import evaluate + class InCorrectMetric: def __init__(self): self.item = None + class CorrectMetric: def __init__(self): self.item = [] @@ -23,6 +27,7 @@ def result(self): def reset(self): self.item = [] + class CorrectMetric_v2: def __init__(self): self.item = [] @@ -31,25 +36,26 @@ def update(self, labels, preds): self.item.append(preds) def result(self): - return 'res', 0 + return "res", 0 def reset(self): self.item = [] + class TestMetrics(unittest.TestCase): def testUserMetric(self): - from neural_compressor.experimental import common, Quantization, Benchmark, \ - Graph_Optimization + from neural_compressor.experimental import Benchmark, Graph_Optimization, Quantization, common + for i in [Quantization(), Benchmark(), Graph_Optimization()]: item = i with self.assertRaises(AssertionError): item.metric = InCorrectMetric() - item.framework = 'tensorflow' + item.framework = "tensorflow" item.metric = common.Metric(CorrectMetric, str(i)) def testmIOU(self): - metrics = METRICS('tensorflow') - miou = metrics['mIOU']() + metrics = METRICS("tensorflow") + miou = metrics["mIOU"]() preds = np.array([0, 0, 1, 1]) labels = np.array([0, 1, 0, 1]) miou.update(preds, labels) @@ -62,58 +68,66 @@ def testmIOU(self): self.assertAlmostEqual(miou.result(), 0.58333333) def testBLEU(self): - metrics = METRICS('tensorflow') - bleu = metrics['BLEU']() - preds = ['Gutach: Mehr Sicherheit für Fußgänger'] - labels = ('Gutach: Noch mehr Sicherheit für Fußgänger',) + metrics = METRICS("tensorflow") + bleu = metrics["BLEU"]() + preds = ["Gutach: Mehr Sicherheit für Fußgänger"] + labels = ("Gutach: Noch mehr Sicherheit für Fußgänger",) bleu.update(preds, labels) self.assertAlmostEqual(bleu.result(), 51.1507809) bleu.reset() - preds = ['Dies wurde auch von Peter Arnold vom Offenburg District Office bestätigt.'] - labels = ('Dies bestätigt auch Peter Arnold vom Landratsamt Offenburg.',) + preds = ["Dies wurde auch von Peter Arnold vom Offenburg District Office bestätigt."] + labels = ("Dies bestätigt auch Peter Arnold vom Landratsamt Offenburg.",) bleu.update(preds, labels) self.assertAlmostEqual(bleu.result(), 16.108992695) with self.assertRaises(ValueError): - bleu.update(['a','b'], ('c',)) + bleu.update(["a", "b"], ("c",)) def test_onnxrt_GLUE(self): - metrics = METRICS('onnxrt_qlinearops') - glue = metrics['GLUE']('mrpc') - preds = [np.array( - [[-3.2443411, 3.0909934], - [2.0500996, -2.3100944], - [1.870293 , -2.0741048], - [-2.8377204, 2.617834], - [2.008347 , -2.0215416], - [-2.9693947, 2.7782154], - [-2.9949608, 2.7887983], - [-3.0623112, 2.8748074]]) + metrics = METRICS("onnxrt_qlinearops") + glue = metrics["GLUE"]("mrpc") + preds = [ + np.array( + [ + [-3.2443411, 3.0909934], + [2.0500996, -2.3100944], + [1.870293, -2.0741048], + [-2.8377204, 2.617834], + [2.008347, -2.0215416], + [-2.9693947, 2.7782154], + [-2.9949608, 2.7887983], + [-3.0623112, 2.8748074], + ] + ) ] labels = [np.array([1, 0, 0, 1, 0, 1, 0, 1])] glue.update(preds, labels) self.assertEqual(glue.result(), 0.875) - preds_2 = [np.array( - [[-3.1296735, 2.8356276], - [-3.172515 , 2.9173899], - [-3.220131 , 3.0916846], - [2.1452675, -1.9398905], - [1.5475761, -1.9101546], - [-2.9797182, 2.721741], - [-3.2052834, 2.9934788], - [-2.7451005, 2.622343]]) + preds_2 = [ + np.array( + [ + [-3.1296735, 2.8356276], + [-3.172515, 2.9173899], + [-3.220131, 3.0916846], + [2.1452675, -1.9398905], + [1.5475761, -1.9101546], + [-2.9797182, 2.721741], + [-3.2052834, 2.9934788], + [-2.7451005, 2.622343], + ] + ) ] labels_2 = [np.array([1, 1, 1, 0, 0, 1, 1, 1])] glue.update(preds_2, labels_2) - self.assertEqual(glue.result(), 0.9375) - + self.assertEqual(glue.result(), 0.9375) + glue.reset() glue.update(preds, labels) self.assertEqual(glue.result(), 0.875) def test_tensorflow_F1(self): - metrics = METRICS('tensorflow') - F1 = metrics['F1']() + metrics = METRICS("tensorflow") + F1 = metrics["F1"]() preds = [1, 1, 1, 1] labels = [0, 1, 1, 0] @@ -121,30 +135,55 @@ def test_tensorflow_F1(self): self.assertEqual(F1.result(), 0.5) def test_squad_evaluate(self): - label = [{'paragraphs':\ - [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}], \ - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ - 'id': '56be4db0acb8001400a502ec'}]}]}] - preds = {'56be4db0acb8001400a502ec': 'Denver Broncos'} + label = [ + { + "paragraphs": [ + { + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ] + } + ] + } + ] + preds = {"56be4db0acb8001400a502ec": "Denver Broncos"} f1 = evaluate(preds, label) - self.assertEqual(f1, 100.) - dataset = [{'paragraphs':\ - [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}], \ - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ - 'id': '56be4db0acb8001400a502ec'}]}]}] - predictions = {'56be4db0acb8001400a502ec': 'Denver Broncos'} - f1_squad = evaluate_squad(dataset,predictions) - self.assertEqual(f1_squad['f1'], 100.) - self.assertEqual(f1_squad['exact_match'], 100.) - + self.assertEqual(f1, 100.0) + dataset = [ + { + "paragraphs": [ + { + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ] + } + ] + } + ] + predictions = {"56be4db0acb8001400a502ec": "Denver Broncos"} + f1_squad = evaluate_squad(dataset, predictions) + self.assertEqual(f1_squad["f1"], 100.0) + self.assertEqual(f1_squad["exact_match"], 100.0) def test_pytorch_F1(self): - metrics = METRICS('pytorch') - F1 = metrics['F1']() + metrics = METRICS("pytorch") + F1 = metrics["F1"]() F1.reset() preds = [1, 1] labels = [2, 1, 1] @@ -154,8 +193,8 @@ def test_pytorch_F1(self): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_F1(self): - metrics = METRICS('mxnet') - F1 = metrics['F1']() + metrics = METRICS("mxnet") + F1 = metrics["F1"]() preds = [0, 1, 1, 1, 1, 0] labels = [0, 1, 1, 1] @@ -163,17 +202,17 @@ def test_mxnet_F1(self): self.assertEqual(F1.result(), 0.8) def test_onnx_topk(self): - metrics = METRICS('onnxrt_qlinearops') - top1 = metrics['topk']() + metrics = METRICS("onnxrt_qlinearops") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] - + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] sparse_labels = [2, 2] single_label = 2 @@ -204,16 +243,16 @@ def test_onnx_topk(self): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_topk(self): - metrics = METRICS('mxnet') - top1 = metrics['topk']() + metrics = METRICS("mxnet") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] - + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] sparse_labels = [2, 2] single_label = 2 @@ -243,23 +282,41 @@ def test_mxnet_topk(self): self.assertEqual(top3.result(), 1) def test_tensorflow_topk(self): - metrics = METRICS('tensorflow') - top1 = metrics['topk']() + metrics = METRICS("tensorflow") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] int_predict = 0 ndarry_predict = np.array([[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]]) - + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] sparse_labels = [2, 2] single_label = 2 - tuple_label = tuple([[0, 1], [0, 0,]]) - list_tuple_label = [tuple([[0, 1], [0, 0,]])] + tuple_label = tuple( + [ + [0, 1], + [ + 0, + 0, + ], + ] + ) + list_tuple_label = [ + tuple( + [ + [0, 1], + [ + 0, + 0, + ], + ] + ) + ] # test functionality of one-hot label top1.update(predicts, labels) @@ -299,540 +356,593 @@ def test_tensorflow_topk(self): top1.reset() top1.update(ndarry_predict, list_tuple_label) self.assertEqual(top1.result(), 0.5) - + def test_tensorflow_mAP(self): import json import os - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['mAP']('anno.yaml') - self.assertEqual(mAP.category_map_reverse['dog'], 1) + mAP = metrics["mAP"]("anno.yaml") + self.assertEqual(mAP.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 1.]]) + np.array( + [[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.62706745, 0.35748824, 0.6892729, 0.41513762]]] + ), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 1.0]]), ] ground_truth = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[b'dog', b'dog']]), + np.array( + [[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.62706745, 0.35748824, 0.6892729, 0.41513762]]] + ), + np.array([[b"dog", b"dog"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] mAP.update(detection, ground_truth) mAP.result() - self.assertEqual(format(mAP.result(), '.5f'), - '1.00000') - + self.assertEqual(format(mAP.result(), ".5f"), "1.00000") + detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] - mAP = metrics['mAP']() - + mAP = metrics["mAP"]() + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.20347') + self.assertEqual(format(mAP.result(), ".5f"), "0.20347") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) - os.remove('anno.yaml') + os.remove("anno.yaml") - def test_tensorflow_VOCmAP(self): import os - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['VOCmAP']('anno.yaml') + mAP = metrics["VOCmAP"]("anno.yaml") self.assertEqual(mAP.iou_thrs, 0.5) self.assertEqual(mAP.map_points, 0) - self.assertEqual(mAP.category_map_reverse['dog'], 1) + self.assertEqual(mAP.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) - os.remove('anno.yaml') + os.remove("anno.yaml") - mAP = metrics['VOCmAP']() + mAP = metrics["VOCmAP"]() detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.20347') + self.assertEqual(format(mAP.result(), ".5f"), "0.20347") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) - def test_tensorflow_COCOmAP(self): import os - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + output_index_mapping = {"num_detections": 0, "boxes": 1, "scores": 2, "classes": 3} + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['COCOmAP']('anno.yaml') - mAP2 = metrics['COCOmAPv2']('anno.yaml', output_index_mapping=output_index_mapping) - self.assertEqual(mAP.category_map_reverse['dog'], 1) - self.assertEqual(mAP2.category_map_reverse['dog'], 1) + mAP = metrics["COCOmAP"]("anno.yaml") + mAP2 = metrics["COCOmAPv2"]("anno.yaml", output_index_mapping=output_index_mapping) + self.assertEqual(mAP.category_map_reverse["dog"], 1) + self.assertEqual(mAP2.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) - os.remove('anno.yaml') + os.remove("anno.yaml") - mAP = metrics['COCOmAP']() - mAP2 = metrics['COCOmAPv2']() + mAP = metrics["COCOmAP"]() + mAP2 = metrics["COCOmAPv2"]() detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] - + self.assertEqual(mAP.result(), 0) self.assertEqual(mAP2.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP.result(), ".5f"), "0.14149") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.13366') + self.assertEqual(format(mAP.result(), ".5f"), "0.13366") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP.result(), ".5f"), "0.14149") mAP2.update(detection, ground_truth) - + mAP2.update(detection, ground_truth) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP2.result(), ".5f"), "0.14149") + + mAP2 = metrics["COCOmAPv2"](output_index_mapping=output_index_mapping) - mAP2 = metrics['COCOmAPv2'](output_index_mapping=output_index_mapping) - mAP2.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.20520') + self.assertEqual(format(mAP2.result(), ".5f"), "0.20520") mAP2.reset() mAP2.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.20520') - - mAP2 = metrics['COCOmAPv2']() - + self.assertEqual(format(mAP2.result(), ".5f"), "0.20520") + + mAP2 = metrics["COCOmAPv2"]() + ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) self.assertRaises(ValueError, mAP2.update, detection, ground_truth_1) - + ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) self.assertRaises(ValueError, mAP2.update, detection, ground_truth_2) - + detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) self.assertRaises(ValueError, mAP2.update, detection_1, ground_truth_1) - + ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) self.assertRaises(ValueError, mAP2.update, detection_2, ground_truth_2) - + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") def test__accuracy(self): predicts1 = [1, 0, 1, 1] @@ -840,18 +950,18 @@ def test__accuracy(self): predicts2 = [[0, 0], [0, 0]] labels2 = [[0, 1], [1, 1]] - + predicts3 = [[[0, 1], [0, 0], [0, 1]], [[0, 1], [0, 1], [0, 1]]] labels3 = [[[0, 1], [0, 1], [1, 0]], [[1, 0], [1, 0], [1, 0]]] - predicts4 = [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] #1,1,1,1 + predicts4 = [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] # 1,1,1,1 labels4 = [0, 1, 0, 0] predicts5 = [[0], [0]] labels5 = [0, 1] - metrics = METRICS('pytorch') - acc = metrics['Accuracy']() + metrics = METRICS("pytorch") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -868,8 +978,8 @@ def test__accuracy(self): acc.update(predicts5, labels5) self.assertEqual(acc.result(), 1.0) - metrics = METRICS('mxnet') - acc = metrics['Accuracy']() + metrics = METRICS("mxnet") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -883,8 +993,8 @@ def test__accuracy(self): acc.update(predicts4, labels4) self.assertEqual(acc.result(), 0.25) - metrics = METRICS('onnxrt_qlinearops') - acc = metrics['Accuracy']() + metrics = METRICS("onnxrt_qlinearops") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -901,16 +1011,15 @@ def test__accuracy(self): acc.reset() acc.update(1, 1) self.assertEqual(acc.result(), 1.0) - + wrong_predictions = [1, 0, 0] wrong_labels = [[0, 1, 1]] self.assertRaises(ValueError, acc.update, wrong_predictions, wrong_labels) - @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_accuracy(self): - metrics = METRICS('mxnet') - acc = metrics['Accuracy']() + metrics = METRICS("mxnet") + acc = metrics["Accuracy"]() predicts = [1, 0, 1, 1] labels = [0, 1, 1, 1] acc.update(predicts, labels) @@ -924,17 +1033,17 @@ def test_mse(self): predicts2 = [1, 1, 1, 1] labels2 = [0, 1, 1, 0] - metrics = METRICS('onnxrt_qlinearops') - mse = metrics['MSE'](compare_label=False) + metrics = METRICS("onnxrt_qlinearops") + mse = metrics["MSE"](compare_label=False) mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) mse.update(predicts2, labels2) mse_result = mse.result() self.assertEqual(mse_result, 0.625) - - metrics = METRICS('tensorflow') - mse = metrics['MSE'](compare_label=False) + + metrics = METRICS("tensorflow") + mse = metrics["MSE"](compare_label=False) mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -942,9 +1051,8 @@ def test_mse(self): mse_result = mse.result() self.assertEqual(mse_result, 0.625) - - metrics = METRICS('mxnet') - mse = metrics['MSE']() + metrics = METRICS("mxnet") + mse = metrics["MSE"]() mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -952,8 +1060,8 @@ def test_mse(self): mse_result = mse.result() self.assertEqual(mse_result, 0.625) - metrics = METRICS('pytorch') - mse = metrics['MSE']() + metrics = METRICS("pytorch") + mse = metrics["MSE"]() mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -968,8 +1076,8 @@ def test_mae(self): predicts2 = [1, 1, 1, 1] labels2 = [1, 1, 1, 0] - metrics = METRICS('tensorflow') - mae = metrics['MAE']() + metrics = METRICS("tensorflow") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -981,8 +1089,8 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.25) - metrics = METRICS('pytorch') - mae = metrics['MAE']() + metrics = METRICS("pytorch") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -990,8 +1098,8 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.5) - metrics = METRICS('mxnet') - mae = metrics['MAE']() + metrics = METRICS("mxnet") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -999,19 +1107,19 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.5) - metrics = METRICS('onnxrt_qlinearops') - mae = metrics['MAE']() + metrics = METRICS("onnxrt_qlinearops") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) mae.update(predicts2, labels2) mae_result = mae.result() self.assertEqual(mae_result, 0.5) - + self.assertRaises(AssertionError, mae.update, [1], [1, 2]) - self.assertRaises(AssertionError, mae.update, 1, [1,2]) + self.assertRaises(AssertionError, mae.update, 1, [1, 2]) self.assertRaises(AssertionError, mae.update, [1, 2], [1]) - self.assertRaises(AssertionError, mae.update, 1, np.array([1,2])) + self.assertRaises(AssertionError, mae.update, 1, np.array([1, 2])) @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") def test_rmse(self): @@ -1020,8 +1128,8 @@ def test_rmse(self): predicts2 = [1, 1, 1, 1] labels2 = [1, 0, 0, 0] - metrics = METRICS('tensorflow') - rmse = metrics['RMSE']() + metrics = METRICS("tensorflow") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -1030,8 +1138,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.75)) - metrics = METRICS('pytorch') - rmse = metrics['RMSE']() + metrics = METRICS("pytorch") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -1039,8 +1147,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) - metrics = METRICS('mxnet') - rmse = metrics['RMSE']() + metrics = METRICS("mxnet") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -1048,8 +1156,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) - metrics = METRICS('onnxrt_qlinearops') - rmse = metrics['RMSE']() + metrics = METRICS("onnxrt_qlinearops") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -1058,8 +1166,8 @@ def test_rmse(self): self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) def test_loss(self): - metrics = METRICS('pytorch') - loss = metrics['Loss']() + metrics = METRICS("pytorch") + loss = metrics["Loss"]() predicts = [1, 0, 0, 1] labels = [0, 1, 0, 0] loss.update(predicts, labels) @@ -1076,9 +1184,8 @@ def test_loss(self): loss.update(predicts, labels) self.assertEqual(loss.result(), 0.5) - - metrics = METRICS('onnxrt_qlinearops') - loss = metrics['Loss']() + metrics = METRICS("onnxrt_qlinearops") + loss = metrics["Loss"]() predicts = [1, 0, 0, 1] labels = [0, 1, 0, 0] loss.update(predicts, labels) @@ -1094,10 +1201,10 @@ def test_loss(self): labels = [0, 1, 0, 0] loss.update(predicts, labels) self.assertEqual(loss.result(), 0.5) - + def test_ROC(self): - metrics = METRICS('pytorch') - roc = metrics['ROC']() + metrics = METRICS("pytorch") + roc = metrics["ROC"]() predicts = [[1, 0, 0, 1]] labels = [[0, 1, 0, 0]] roc.update(predicts, labels) @@ -1115,15 +1222,28 @@ def test_ROC(self): self.assertEqual(roc.result(), 0.25) def test_tensorflow_SquadF1(self): - metrics = METRICS('tensorflow') - squad = metrics['SquadF1']() - labels = [{'paragraphs':\ - [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}], \ - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ - 'id': '56be4db0acb8001400a502ec'}]}]}] - predicts = {'56be4db0acb8001400a502ec': 'Denver Broncos'} + metrics = METRICS("tensorflow") + squad = metrics["SquadF1"]() + labels = [ + { + "paragraphs": [ + { + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ] + } + ] + } + ] + predicts = {"56be4db0acb8001400a502ec": "Denver Broncos"} squad.update(predicts, labels) self.assertEqual(squad.result(), 100.0) squad.reset() @@ -1131,10 +1251,12 @@ def test_tensorflow_SquadF1(self): self.assertEqual(squad.result(), 100.0) def test_PyTorchLoss(self): - from neural_compressor.experimental.metric.metric import PyTorchLoss import torch + + from neural_compressor.experimental.metric.metric import PyTorchLoss + pytorch_loss = PyTorchLoss() - pytorch_loss.update([torch.ones(2,3), torch.ones(2,3)]) + pytorch_loss.update([torch.ones(2, 3), torch.ones(2, 3)]) self.assertEqual(pytorch_loss.compute(), 3) pytorch_loss.reset() self.assertEqual(pytorch_loss._num_examples, 0) @@ -1144,6 +1266,7 @@ def test_WrapMetric(self): metric_v2 = CorrectMetric_v2() from neural_compressor.experimental.metric.metric import WrapPyTorchMetric + pytorch_metric = WrapPyTorchMetric(metirc) self.assertIsInstance(pytorch_metric.metric, CorrectMetric) self.assertIsNone(pytorch_metric.hvd) @@ -1153,6 +1276,7 @@ def test_WrapMetric(self): self.assertEqual(len(pytorch_metric.metric.item), 0) from neural_compressor.experimental.metric.metric import WrapONNXRTMetric + onnx_metric = WrapONNXRTMetric(metric_v2) self.assertIsInstance(onnx_metric.metric, CorrectMetric_v2) self.assertIsNone(onnx_metric.hvd) @@ -1161,5 +1285,6 @@ def test_WrapMetric(self): onnx_metric.reset() self.assertEqual(len(onnx_metric.metric.item), 0) + if __name__ == "__main__": unittest.main() diff --git a/test/metric/test_metrics.py b/test/metric/test_metrics.py index a3c52841c20..b0a69a0128c 100644 --- a/test/metric/test_metrics.py +++ b/test/metric/test_metrics.py @@ -1,15 +1,19 @@ """Tests for the metrics module.""" -import numpy as np -import unittest import platform +import unittest + +import numpy as np + from neural_compressor.metric import METRICS -from neural_compressor.metric.f1 import evaluate from neural_compressor.metric.evaluate_squad import evaluate as evaluate_squad +from neural_compressor.metric.f1 import evaluate + class InCorrectMetric: def __init__(self): self.item = None + class CorrectMetric: def __init__(self): self.item = [] @@ -23,20 +27,21 @@ def result(self): def reset(self): self.item = [] + class TestMetrics(unittest.TestCase): def testUserMetric(self): - from neural_compressor.experimental import common, Quantization, Benchmark, \ - Graph_Optimization + from neural_compressor.experimental import Benchmark, Graph_Optimization, Quantization, common + for i in [Quantization(), Benchmark(), Graph_Optimization()]: item = i with self.assertRaises(AssertionError): item.metric = InCorrectMetric() - item.framework = 'tensorflow' + item.framework = "tensorflow" item.metric = common.Metric(CorrectMetric, str(i)) def testmIOU(self): - metrics = METRICS('tensorflow') - miou = metrics['mIOU']() + metrics = METRICS("tensorflow") + miou = metrics["mIOU"]() preds = np.array([0, 0, 1, 1]) labels = np.array([0, 1, 0, 1]) miou.update(preds, labels) @@ -49,58 +54,66 @@ def testmIOU(self): self.assertAlmostEqual(miou.result(), 0.58333333) def testBLEU(self): - metrics = METRICS('tensorflow') - bleu = metrics['BLEU']() - preds = ['Gutach: Mehr Sicherheit für Fußgänger'] - labels = ('Gutach: Noch mehr Sicherheit für Fußgänger',) + metrics = METRICS("tensorflow") + bleu = metrics["BLEU"]() + preds = ["Gutach: Mehr Sicherheit für Fußgänger"] + labels = ("Gutach: Noch mehr Sicherheit für Fußgänger",) bleu.update(preds, labels) self.assertAlmostEqual(bleu.result(), 51.1507809) bleu.reset() - preds = ['Dies wurde auch von Peter Arnold vom Offenburg District Office bestätigt.'] - labels = ('Dies bestätigt auch Peter Arnold vom Landratsamt Offenburg.',) + preds = ["Dies wurde auch von Peter Arnold vom Offenburg District Office bestätigt."] + labels = ("Dies bestätigt auch Peter Arnold vom Landratsamt Offenburg.",) bleu.update(preds, labels) self.assertAlmostEqual(bleu.result(), 16.108992695) with self.assertRaises(ValueError): - bleu.update(['a','b'], ('c',)) + bleu.update(["a", "b"], ("c",)) def test_onnxrt_GLUE(self): - metrics = METRICS('onnxrt_qlinearops') - glue = metrics['GLUE']('mrpc') - preds = [np.array( - [[-3.2443411, 3.0909934], - [2.0500996, -2.3100944], - [1.870293 , -2.0741048], - [-2.8377204, 2.617834], - [2.008347 , -2.0215416], - [-2.9693947, 2.7782154], - [-2.9949608, 2.7887983], - [-3.0623112, 2.8748074]]) + metrics = METRICS("onnxrt_qlinearops") + glue = metrics["GLUE"]("mrpc") + preds = [ + np.array( + [ + [-3.2443411, 3.0909934], + [2.0500996, -2.3100944], + [1.870293, -2.0741048], + [-2.8377204, 2.617834], + [2.008347, -2.0215416], + [-2.9693947, 2.7782154], + [-2.9949608, 2.7887983], + [-3.0623112, 2.8748074], + ] + ) ] labels = [np.array([1, 0, 0, 1, 0, 1, 0, 1])] glue.update(preds, labels) self.assertEqual(glue.result(), 0.875) - preds_2 = [np.array( - [[-3.1296735, 2.8356276], - [-3.172515 , 2.9173899], - [-3.220131 , 3.0916846], - [2.1452675, -1.9398905], - [1.5475761, -1.9101546], - [-2.9797182, 2.721741], - [-3.2052834, 2.9934788], - [-2.7451005, 2.622343]]) + preds_2 = [ + np.array( + [ + [-3.1296735, 2.8356276], + [-3.172515, 2.9173899], + [-3.220131, 3.0916846], + [2.1452675, -1.9398905], + [1.5475761, -1.9101546], + [-2.9797182, 2.721741], + [-3.2052834, 2.9934788], + [-2.7451005, 2.622343], + ] + ) ] labels_2 = [np.array([1, 1, 1, 0, 0, 1, 1, 1])] glue.update(preds_2, labels_2) - self.assertEqual(glue.result(), 0.9375) - + self.assertEqual(glue.result(), 0.9375) + glue.reset() glue.update(preds, labels) self.assertEqual(glue.result(), 0.875) def test_tensorflow_F1(self): - metrics = METRICS('tensorflow') - F1 = metrics['F1']() + metrics = METRICS("tensorflow") + F1 = metrics["F1"]() preds = [1, 1, 1, 1] labels = [0, 1, 1, 0] @@ -108,30 +121,55 @@ def test_tensorflow_F1(self): self.assertEqual(F1.result(), 0.5) def test_squad_evaluate(self): - label = [{'paragraphs':\ - [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}], \ - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ - 'id': '56be4db0acb8001400a502ec'}]}]}] - preds = {'56be4db0acb8001400a502ec': 'Denver Broncos'} + label = [ + { + "paragraphs": [ + { + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ] + } + ] + } + ] + preds = {"56be4db0acb8001400a502ec": "Denver Broncos"} f1 = evaluate(preds, label) - self.assertEqual(f1, 100.) - dataset = [{'paragraphs':\ - [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}], \ - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ - 'id': '56be4db0acb8001400a502ec'}]}]}] - predictions = {'56be4db0acb8001400a502ec': 'Denver Broncos'} - f1_squad = evaluate_squad(dataset,predictions) - self.assertEqual(f1_squad['f1'], 100.) - self.assertEqual(f1_squad['exact_match'], 100.) - + self.assertEqual(f1, 100.0) + dataset = [ + { + "paragraphs": [ + { + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ] + } + ] + } + ] + predictions = {"56be4db0acb8001400a502ec": "Denver Broncos"} + f1_squad = evaluate_squad(dataset, predictions) + self.assertEqual(f1_squad["f1"], 100.0) + self.assertEqual(f1_squad["exact_match"], 100.0) def test_pytorch_F1(self): - metrics = METRICS('pytorch') - F1 = metrics['F1']() + metrics = METRICS("pytorch") + F1 = metrics["F1"]() F1.reset() preds = [1, 1] labels = [2, 1, 1] @@ -141,8 +179,8 @@ def test_pytorch_F1(self): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_F1(self): - metrics = METRICS('mxnet') - F1 = metrics['F1']() + metrics = METRICS("mxnet") + F1 = metrics["F1"]() preds = [0, 1, 1, 1, 1, 0] labels = [0, 1, 1, 1] @@ -150,17 +188,17 @@ def test_mxnet_F1(self): self.assertEqual(F1.result(), 0.8) def test_onnx_topk(self): - metrics = METRICS('onnxrt_qlinearops') - top1 = metrics['topk']() + metrics = METRICS("onnxrt_qlinearops") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] - + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] sparse_labels = [2, 2] single_label = 2 @@ -191,16 +229,16 @@ def test_onnx_topk(self): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_topk(self): - metrics = METRICS('mxnet') - top1 = metrics['topk']() + metrics = METRICS("mxnet") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] - + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] sparse_labels = [2, 2] single_label = 2 @@ -230,16 +268,16 @@ def test_mxnet_topk(self): self.assertEqual(top3.result(), 1) def test_tensorflow_topk(self): - metrics = METRICS('tensorflow') - top1 = metrics['topk']() + metrics = METRICS("tensorflow") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] - + labels = [[0, 1, 0, 0], [0, 0, 1, 0]] sparse_labels = [2, 2] single_label = 2 @@ -267,540 +305,593 @@ def test_tensorflow_topk(self): self.assertEqual(top1.result(), 0.4) self.assertEqual(top2.result(), 0.8) self.assertEqual(top3.result(), 1) - + def test_tensorflow_mAP(self): import json import os - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['mAP']('anno.yaml') - self.assertEqual(mAP.category_map_reverse['dog'], 1) + mAP = metrics["mAP"]("anno.yaml") + self.assertEqual(mAP.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 1.]]) + np.array( + [[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.62706745, 0.35748824, 0.6892729, 0.41513762]]] + ), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 1.0]]), ] ground_truth = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[b'dog', b'dog']]), + np.array( + [[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.62706745, 0.35748824, 0.6892729, 0.41513762]]] + ), + np.array([[b"dog", b"dog"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] mAP.update(detection, ground_truth) mAP.result() - self.assertEqual(format(mAP.result(), '.5f'), - '1.00000') - + self.assertEqual(format(mAP.result(), ".5f"), "1.00000") + detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] - mAP = metrics['mAP']() - + mAP = metrics["mAP"]() + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.20347') + self.assertEqual(format(mAP.result(), ".5f"), "0.20347") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) - os.remove('anno.yaml') + os.remove("anno.yaml") - def test_tensorflow_VOCmAP(self): import os - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['VOCmAP']('anno.yaml') + mAP = metrics["VOCmAP"]("anno.yaml") self.assertEqual(mAP.iou_thrs, 0.5) self.assertEqual(mAP.map_points, 0) - self.assertEqual(mAP.category_map_reverse['dog'], 1) + self.assertEqual(mAP.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) - os.remove('anno.yaml') + os.remove("anno.yaml") - mAP = metrics['VOCmAP']() + mAP = metrics["VOCmAP"]() detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.20347') + self.assertEqual(format(mAP.result(), ".5f"), "0.20347") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) - def test_tensorflow_COCOmAP(self): import os - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + output_index_mapping = {"num_detections": 0, "boxes": 1, "scores": 2, "classes": 3} + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['COCOmAP']('anno.yaml') - mAP2 = metrics['COCOmAPv2']('anno.yaml', output_index_mapping=output_index_mapping) - self.assertEqual(mAP.category_map_reverse['dog'], 1) - self.assertEqual(mAP2.category_map_reverse['dog'], 1) + mAP = metrics["COCOmAP"]("anno.yaml") + mAP2 = metrics["COCOmAPv2"]("anno.yaml", output_index_mapping=output_index_mapping) + self.assertEqual(mAP.category_map_reverse["dog"], 1) + self.assertEqual(mAP2.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) - os.remove('anno.yaml') + os.remove("anno.yaml") - mAP = metrics['COCOmAP']() - mAP2 = metrics['COCOmAPv2']() + mAP = metrics["COCOmAP"]() + mAP2 = metrics["COCOmAPv2"]() detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] - + self.assertEqual(mAP.result(), 0) self.assertEqual(mAP2.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP.result(), ".5f"), "0.14149") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.13366') + self.assertEqual(format(mAP.result(), ".5f"), "0.13366") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP.result(), ".5f"), "0.14149") mAP2.update(detection, ground_truth) - + mAP2.update(detection, ground_truth) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP2.result(), ".5f"), "0.14149") + + mAP2 = metrics["COCOmAPv2"](output_index_mapping=output_index_mapping) - mAP2 = metrics['COCOmAPv2'](output_index_mapping=output_index_mapping) - mAP2.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.20520') + self.assertEqual(format(mAP2.result(), ".5f"), "0.20520") mAP2.reset() mAP2.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.20520') - - mAP2 = metrics['COCOmAPv2']() - + self.assertEqual(format(mAP2.result(), ".5f"), "0.20520") + + mAP2 = metrics["COCOmAPv2"]() + ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) self.assertRaises(ValueError, mAP2.update, detection, ground_truth_1) - + ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) self.assertRaises(ValueError, mAP2.update, detection, ground_truth_2) - + detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) self.assertRaises(ValueError, mAP2.update, detection_1, ground_truth_1) - + ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) self.assertRaises(ValueError, mAP2.update, detection_2, ground_truth_2) - + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") def test__accuracy(self): predicts1 = [1, 0, 1, 1] @@ -808,15 +899,15 @@ def test__accuracy(self): predicts2 = [[0, 0], [0, 0]] labels2 = [[0, 1], [1, 1]] - + predicts3 = [[[0, 1], [0, 0], [0, 1]], [[0, 1], [0, 1], [0, 1]]] labels3 = [[[0, 1], [0, 1], [1, 0]], [[1, 0], [1, 0], [1, 0]]] - predicts4 = [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] #1,1,1,1 + predicts4 = [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] # 1,1,1,1 labels4 = [0, 1, 0, 0] - metrics = METRICS('pytorch') - acc = metrics['Accuracy']() + metrics = METRICS("pytorch") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -830,8 +921,8 @@ def test__accuracy(self): acc.update(predicts4, labels4) self.assertEqual(acc.result(), 0.25) - metrics = METRICS('mxnet') - acc = metrics['Accuracy']() + metrics = METRICS("mxnet") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -845,8 +936,8 @@ def test__accuracy(self): acc.update(predicts4, labels4) self.assertEqual(acc.result(), 0.25) - metrics = METRICS('onnxrt_qlinearops') - acc = metrics['Accuracy']() + metrics = METRICS("onnxrt_qlinearops") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -863,16 +954,15 @@ def test__accuracy(self): acc.reset() acc.update(1, 1) self.assertEqual(acc.result(), 1.0) - + wrong_predictions = [1, 0, 0] wrong_labels = [[0, 1, 1]] self.assertRaises(ValueError, acc.update, wrong_predictions, wrong_labels) - @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_accuracy(self): - metrics = METRICS('mxnet') - acc = metrics['Accuracy']() + metrics = METRICS("mxnet") + acc = metrics["Accuracy"]() predicts = [1, 0, 1, 1] labels = [0, 1, 1, 1] acc.update(predicts, labels) @@ -886,17 +976,17 @@ def test_mse(self): predicts2 = [1, 1, 1, 1] labels2 = [0, 1, 1, 0] - metrics = METRICS('onnxrt_qlinearops') - mse = metrics['MSE'](compare_label=False) + metrics = METRICS("onnxrt_qlinearops") + mse = metrics["MSE"](compare_label=False) mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) mse.update(predicts2, labels2) mse_result = mse.result() self.assertEqual(mse_result, 0.625) - - metrics = METRICS('tensorflow') - mse = metrics['MSE'](compare_label=False) + + metrics = METRICS("tensorflow") + mse = metrics["MSE"](compare_label=False) mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -904,9 +994,8 @@ def test_mse(self): mse_result = mse.result() self.assertEqual(mse_result, 0.625) - - metrics = METRICS('mxnet') - mse = metrics['MSE']() + metrics = METRICS("mxnet") + mse = metrics["MSE"]() mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -914,8 +1003,8 @@ def test_mse(self): mse_result = mse.result() self.assertEqual(mse_result, 0.625) - metrics = METRICS('pytorch') - mse = metrics['MSE']() + metrics = METRICS("pytorch") + mse = metrics["MSE"]() mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -930,8 +1019,8 @@ def test_mae(self): predicts2 = [1, 1, 1, 1] labels2 = [1, 1, 1, 0] - metrics = METRICS('tensorflow') - mae = metrics['MAE']() + metrics = METRICS("tensorflow") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -943,8 +1032,8 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.25) - metrics = METRICS('pytorch') - mae = metrics['MAE']() + metrics = METRICS("pytorch") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -952,8 +1041,8 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.5) - metrics = METRICS('mxnet') - mae = metrics['MAE']() + metrics = METRICS("mxnet") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -961,19 +1050,19 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.5) - metrics = METRICS('onnxrt_qlinearops') - mae = metrics['MAE']() + metrics = METRICS("onnxrt_qlinearops") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) mae.update(predicts2, labels2) mae_result = mae.result() self.assertEqual(mae_result, 0.5) - + self.assertRaises(AssertionError, mae.update, [1], [1, 2]) - self.assertRaises(AssertionError, mae.update, 1, [1,2]) + self.assertRaises(AssertionError, mae.update, 1, [1, 2]) self.assertRaises(AssertionError, mae.update, [1, 2], [1]) - self.assertRaises(AssertionError, mae.update, 1, np.array([1,2])) + self.assertRaises(AssertionError, mae.update, 1, np.array([1, 2])) @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") def test_rmse(self): @@ -982,8 +1071,8 @@ def test_rmse(self): predicts2 = [1, 1, 1, 1] labels2 = [1, 0, 0, 0] - metrics = METRICS('tensorflow') - rmse = metrics['RMSE']() + metrics = METRICS("tensorflow") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -992,8 +1081,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.75)) - metrics = METRICS('pytorch') - rmse = metrics['RMSE']() + metrics = METRICS("pytorch") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -1001,8 +1090,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) - metrics = METRICS('mxnet') - rmse = metrics['RMSE']() + metrics = METRICS("mxnet") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -1010,8 +1099,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) - metrics = METRICS('onnxrt_qlinearops') - rmse = metrics['RMSE']() + metrics = METRICS("onnxrt_qlinearops") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -1020,8 +1109,8 @@ def test_rmse(self): self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) def test_loss(self): - metrics = METRICS('pytorch') - loss = metrics['Loss']() + metrics = METRICS("pytorch") + loss = metrics["Loss"]() predicts = [1, 0, 0, 1] labels = [0, 1, 0, 0] loss.update(predicts, labels) @@ -1038,9 +1127,8 @@ def test_loss(self): loss.update(predicts, labels) self.assertEqual(loss.result(), 0.5) - - metrics = METRICS('onnxrt_qlinearops') - loss = metrics['Loss']() + metrics = METRICS("onnxrt_qlinearops") + loss = metrics["Loss"]() predicts = [1, 0, 0, 1] labels = [0, 1, 0, 0] loss.update(predicts, labels) @@ -1057,5 +1145,6 @@ def test_loss(self): loss.update(predicts, labels) self.assertEqual(loss.result(), 0.5) + if __name__ == "__main__": unittest.main() diff --git a/test/metric/test_metrics_2.x.py b/test/metric/test_metrics_2.x.py index 5515031860c..5848441bda0 100644 --- a/test/metric/test_metrics_2.x.py +++ b/test/metric/test_metrics_2.x.py @@ -1,15 +1,19 @@ """Tests for the metrics module.""" -import numpy as np -import unittest import platform +import unittest + +import numpy as np + from neural_compressor.metric import METRICS -from neural_compressor.metric.f1 import evaluate from neural_compressor.metric.evaluate_squad import evaluate as evaluate_squad +from neural_compressor.metric.f1 import evaluate + class InCorrectMetric: def __init__(self): self.item = None + class CorrectMetric: def __init__(self): self.item = [] @@ -23,10 +27,11 @@ def result(self): def reset(self): self.item = [] + class TestMetrics(unittest.TestCase): def testmIOU(self): - metrics = METRICS('tensorflow') - miou = metrics['mIOU']() + metrics = METRICS("tensorflow") + miou = metrics["mIOU"]() preds = np.array([0, 0, 1, 1]) labels = np.array([0, 1, 0, 1]) miou.update(preds, labels) @@ -39,58 +44,66 @@ def testmIOU(self): self.assertAlmostEqual(miou.result(), 0.58333333) def testBLEU(self): - metrics = METRICS('tensorflow') - bleu = metrics['BLEU']() - preds = ['Gutach: Mehr Sicherheit für Fußgänger'] - labels = ('Gutach: Noch mehr Sicherheit für Fußgänger',) + metrics = METRICS("tensorflow") + bleu = metrics["BLEU"]() + preds = ["Gutach: Mehr Sicherheit für Fußgänger"] + labels = ("Gutach: Noch mehr Sicherheit für Fußgänger",) bleu.update(preds, labels) self.assertAlmostEqual(bleu.result(), 51.1507809) bleu.reset() - preds = ['Dies wurde auch von Peter Arnold vom Offenburg District Office bestätigt.'] - labels = ('Dies bestätigt auch Peter Arnold vom Landratsamt Offenburg.',) + preds = ["Dies wurde auch von Peter Arnold vom Offenburg District Office bestätigt."] + labels = ("Dies bestätigt auch Peter Arnold vom Landratsamt Offenburg.",) bleu.update(preds, labels) self.assertAlmostEqual(bleu.result(), 16.108992695) with self.assertRaises(ValueError): - bleu.update(['a','b'], ('c',)) + bleu.update(["a", "b"], ("c",)) def test_onnxrt_GLUE(self): - metrics = METRICS('onnxrt_qlinearops') - glue = metrics['GLUE']('mrpc') - preds = [np.array( - [[-3.2443411, 3.0909934], - [2.0500996, -2.3100944], - [1.870293 , -2.0741048], - [-2.8377204, 2.617834], - [2.008347 , -2.0215416], - [-2.9693947, 2.7782154], - [-2.9949608, 2.7887983], - [-3.0623112, 2.8748074]]) + metrics = METRICS("onnxrt_qlinearops") + glue = metrics["GLUE"]("mrpc") + preds = [ + np.array( + [ + [-3.2443411, 3.0909934], + [2.0500996, -2.3100944], + [1.870293, -2.0741048], + [-2.8377204, 2.617834], + [2.008347, -2.0215416], + [-2.9693947, 2.7782154], + [-2.9949608, 2.7887983], + [-3.0623112, 2.8748074], + ] + ) ] labels = [np.array([1, 0, 0, 1, 0, 1, 0, 1])] glue.update(preds, labels) self.assertEqual(glue.result(), 0.875) - preds_2 = [np.array( - [[-3.1296735, 2.8356276], - [-3.172515 , 2.9173899], - [-3.220131 , 3.0916846], - [2.1452675, -1.9398905], - [1.5475761, -1.9101546], - [-2.9797182, 2.721741], - [-3.2052834, 2.9934788], - [-2.7451005, 2.622343]]) + preds_2 = [ + np.array( + [ + [-3.1296735, 2.8356276], + [-3.172515, 2.9173899], + [-3.220131, 3.0916846], + [2.1452675, -1.9398905], + [1.5475761, -1.9101546], + [-2.9797182, 2.721741], + [-3.2052834, 2.9934788], + [-2.7451005, 2.622343], + ] + ) ] labels_2 = [np.array([1, 1, 1, 0, 0, 1, 1, 1])] glue.update(preds_2, labels_2) - self.assertEqual(glue.result(), 0.9375) + self.assertEqual(glue.result(), 0.9375) glue.reset() glue.update(preds, labels) self.assertEqual(glue.result(), 0.875) def test_tensorflow_F1(self): - metrics = METRICS('tensorflow') - F1 = metrics['F1']() + metrics = METRICS("tensorflow") + F1 = metrics["F1"]() preds = [1, 1, 1, 1] labels = [0, 1, 1, 0] @@ -98,29 +111,55 @@ def test_tensorflow_F1(self): self.assertEqual(F1.result(), 0.5) def test_squad_evaluate(self): - label = [{'paragraphs':\ - [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}], \ - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ - 'id': '56be4db0acb8001400a502ec'}]}]}] - preds = {'56be4db0acb8001400a502ec': 'Denver Broncos'} + label = [ + { + "paragraphs": [ + { + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ] + } + ] + } + ] + preds = {"56be4db0acb8001400a502ec": "Denver Broncos"} f1 = evaluate(preds, label) - self.assertEqual(f1, 100.) - dataset = [{'paragraphs':\ - [{'qas':[{'answers': [{'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}, \ - {'answer_start': 177, 'text': 'Denver Broncos'}], \ - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', \ - 'id': '56be4db0acb8001400a502ec'}]}]}] - predictions = {'56be4db0acb8001400a502ec': 'Denver Broncos'} - f1_squad = evaluate_squad(dataset,predictions) - self.assertEqual(f1_squad['f1'], 100.) - self.assertEqual(f1_squad['exact_match'], 100.) + self.assertEqual(f1, 100.0) + dataset = [ + { + "paragraphs": [ + { + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ] + } + ] + } + ] + predictions = {"56be4db0acb8001400a502ec": "Denver Broncos"} + f1_squad = evaluate_squad(dataset, predictions) + self.assertEqual(f1_squad["f1"], 100.0) + self.assertEqual(f1_squad["exact_match"], 100.0) def test_pytorch_F1(self): - metrics = METRICS('pytorch') - F1 = metrics['F1']() + metrics = METRICS("pytorch") + F1 = metrics["F1"]() F1.reset() preds = [1, 1] labels = [2, 1, 1] @@ -130,8 +169,8 @@ def test_pytorch_F1(self): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_F1(self): - metrics = METRICS('mxnet') - F1 = metrics['F1']() + metrics = METRICS("mxnet") + F1 = metrics["F1"]() preds = [0, 1, 1, 1, 1, 0] labels = [0, 1, 1, 1] @@ -139,13 +178,13 @@ def test_mxnet_F1(self): self.assertEqual(F1.result(), 0.8) def test_onnx_topk(self): - metrics = METRICS('onnxrt_qlinearops') - top1 = metrics['topk']() + metrics = METRICS("onnxrt_qlinearops") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] @@ -180,12 +219,12 @@ def test_onnx_topk(self): @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_topk(self): - metrics = METRICS('mxnet') - top1 = metrics['topk']() + metrics = METRICS("mxnet") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] @@ -219,12 +258,12 @@ def test_mxnet_topk(self): self.assertEqual(top3.result(), 1) def test_tensorflow_topk(self): - metrics = METRICS('tensorflow') - top1 = metrics['topk']() + metrics = METRICS("tensorflow") + top1 = metrics["topk"]() top1.reset() self.assertEqual(top1.result(), 0) - top2 = metrics['topk'](k=2) - top3 = metrics['topk'](k=3) + top2 = metrics["topk"](k=2) + top3 = metrics["topk"](k=3) predicts = [[0, 0.2, 0.9, 0.3], [0, 0.9, 0.8, 0]] single_predict = [0, 0.2, 0.9, 0.3] @@ -259,281 +298,329 @@ def test_tensorflow_topk(self): def test_tensorflow_mAP(self): import os - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['mAP']('anno.yaml') - self.assertEqual(mAP.category_map_reverse['dog'], 1) + mAP = metrics["mAP"]("anno.yaml") + self.assertEqual(mAP.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 1.]]) + np.array( + [[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.62706745, 0.35748824, 0.6892729, 0.41513762]]] + ), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 1.0]]), ] ground_truth = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[b'dog', b'dog']]), + np.array( + [[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.62706745, 0.35748824, 0.6892729, 0.41513762]]] + ), + np.array([[b"dog", b"dog"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] mAP.update(detection, ground_truth) mAP.result() - self.assertEqual(format(mAP.result(), '.5f'), - '1.00000') - + self.assertEqual(format(mAP.result(), ".5f"), "1.00000") + detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] - mAP = metrics['mAP']() + mAP = metrics["mAP"]() self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.20347') + self.assertEqual(format(mAP.result(), ".5f"), "0.20347") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) - os.remove('anno.yaml') + os.remove("anno.yaml") def test_tensorflow_VOCmAP(self): import os - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['VOCmAP']('anno.yaml') + mAP = metrics["VOCmAP"]("anno.yaml") self.assertEqual(mAP.iou_thrs, 0.5) self.assertEqual(mAP.map_points, 0) - self.assertEqual(mAP.category_map_reverse['dog'], 1) + self.assertEqual(mAP.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) - os.remove('anno.yaml') + os.remove("anno.yaml") - mAP = metrics['VOCmAP']() + mAP = metrics["VOCmAP"]() detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertEqual(mAP.result(), 0) @@ -541,253 +628,259 @@ def test_tensorflow_VOCmAP(self): mAP.update(detection, ground_truth) mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.20347') + self.assertEqual(format(mAP.result(), ".5f"), "0.20347") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.18182') + self.assertEqual(format(mAP.result(), ".5f"), "0.18182") ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) - def test_tensorflow_COCOmAP(self): import os - output_index_mapping = {'num_detections':0, 'boxes':1, 'scores':2, 'classes':3} - metrics = METRICS('tensorflow') - fake_dict = 'dog: 1' - with open('anno.yaml', 'w', encoding="utf-8") as f: + + output_index_mapping = {"num_detections": 0, "boxes": 1, "scores": 2, "classes": 3} + metrics = METRICS("tensorflow") + fake_dict = "dog: 1" + with open("anno.yaml", "w", encoding="utf-8") as f: f.write(fake_dict) - mAP = metrics['COCOmAP']('anno.yaml') - mAP2 = metrics['COCOmAPv2']('anno.yaml', output_index_mapping=output_index_mapping) - self.assertEqual(mAP.category_map_reverse['dog'], 1) - self.assertEqual(mAP2.category_map_reverse['dog'], 1) + mAP = metrics["COCOmAP"]("anno.yaml") + mAP2 = metrics["COCOmAPv2"]("anno.yaml", output_index_mapping=output_index_mapping) + self.assertEqual(mAP.category_map_reverse["dog"], 1) + self.assertEqual(mAP2.category_map_reverse["dog"], 1) detection = [ np.array([[5]]), np.array([[5]]), - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([[[0.5633255, 0.34003124, 0.69857144, 0.4009531], [0.4763466, 0.7769531, 0.54334897, 0.9675937]]]), + np.array([["a", "b"]]), np.array([[]]), - np.array([b'000000397133.jpg']) + np.array([b"000000397133.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth) - os.remove('anno.yaml') + os.remove("anno.yaml") - mAP = metrics['COCOmAP']() - mAP2 = metrics['COCOmAPv2']() + mAP = metrics["COCOmAP"]() + mAP2 = metrics["COCOmAPv2"]() detection = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762], - [0.40032804, 0.01218696, 0.6924763 , 0.30341768], - [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array( + [ + [ + [0.16117382, 0.59801614, 0.81511605, 0.7858219], + [0.5589304, 0.0, 0.98301625, 0.520178], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + [0.40032804, 0.01218696, 0.6924763, 0.30341768], + [0.62706745, 0.35748824, 0.6892729, 0.41513762], + ] + ] + ), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] detection_2 = [ np.array([[8]]), - np.array([[[0.82776225, 0.5865939 , 0.8927653 , 0.6302338 ], - [0.8375764 , 0.6424138 , 0.9055594 , 0.6921875 ], - [0.57902956, 0.39394334, 0.8342961 , 0.5577197 ], - [0.7949219 , 0.6513021 , 0.8472295 , 0.68427753], - [0.809729 , 0.5947042 , 0.8539927 , 0.62916476], - [0.7258591 , 0.08907133, 1. , 0.86224866], - [0.43100086, 0.37782395, 0.8384069 , 0.5616918 ], - [0.32005906, 0.84334356, 1. , 1. ]]]), - np.array([[0.86698544, 0.7562499 , 0.66414887, 0.64498234,\ - 0.63083494,0.46618757, 0.3914739 , 0.3094324 ]]), - np.array([[55., 55., 79., 55., 55., 67., 79., 82.]]) + np.array( + [ + [ + [0.82776225, 0.5865939, 0.8927653, 0.6302338], + [0.8375764, 0.6424138, 0.9055594, 0.6921875], + [0.57902956, 0.39394334, 0.8342961, 0.5577197], + [0.7949219, 0.6513021, 0.8472295, 0.68427753], + [0.809729, 0.5947042, 0.8539927, 0.62916476], + [0.7258591, 0.08907133, 1.0, 0.86224866], + [0.43100086, 0.37782395, 0.8384069, 0.5616918], + [0.32005906, 0.84334356, 1.0, 1.0], + ] + ] + ), + np.array([[0.86698544, 0.7562499, 0.66414887, 0.64498234, 0.63083494, 0.46618757, 0.3914739, 0.3094324]]), + np.array([[55.0, 55.0, 79.0, 55.0, 55.0, 67.0, 79.0, 82.0]]), ] ground_truth = [ - np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], - [0.56262296, 0.0015625 , 1. , 0.5431719 ], - [0.16374707, 0.60728127, 0.813911 , 0.77823436], - [0.5841452 , 0.21182813, 0.65156907, 0.24670312], - [0.8056206 , 0.048875 , 0.90124124, 0.1553125 ], - [0.6729742 , 0.09317187, 0.7696956 , 0.21203125], - [0.3848478 , 0.002125 , 0.61522245, 0.303 ], - [0.61548007, 0. , 0.7015925 , 0.097125 ], - [0.6381967 , 0.1865625 , 0.7184075 , 0.22534375], - [0.6274239 , 0.22104688, 0.71140516, 0.27134374], - [0.39566743, 0.24370313, 0.43578455, 0.284375 ], - [0.2673302 , 0.245625 , 0.3043794 , 0.27353126], - [0.7137705 , 0.15429688, 0.726815 , 0.17114063], - [0.6003747 , 0.25942189, 0.6438876 , 0.27320313], - [0.68845433, 0.13501562, 0.714637 , 0.17245312], - [0.69358313, 0.10959375, 0.7043091 , 0.12409375], - [0.493911 , 0. , 0.72571427, 0.299 ], + np.array( + [ + [ + [0.5633255, 0.34003124, 0.69857144, 0.4009531], + [0.56262296, 0.0015625, 1.0, 0.5431719], + [0.16374707, 0.60728127, 0.813911, 0.77823436], + [0.5841452, 0.21182813, 0.65156907, 0.24670312], + [0.8056206, 0.048875, 0.90124124, 0.1553125], + [0.6729742, 0.09317187, 0.7696956, 0.21203125], + [0.3848478, 0.002125, 0.61522245, 0.303], + [0.61548007, 0.0, 0.7015925, 0.097125], + [0.6381967, 0.1865625, 0.7184075, 0.22534375], + [0.6274239, 0.22104688, 0.71140516, 0.27134374], + [0.39566743, 0.24370313, 0.43578455, 0.284375], + [0.2673302, 0.245625, 0.3043794, 0.27353126], + [0.7137705, 0.15429688, 0.726815, 0.17114063], + [0.6003747, 0.25942189, 0.6438876, 0.27320313], + [0.68845433, 0.13501562, 0.714637, 0.17245312], + [0.69358313, 0.10959375, 0.7043091, 0.12409375], + [0.493911, 0.0, 0.72571427, 0.299], [0.69576114, 0.15107812, 0.70714283, 0.16332813], - [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), + [0.4763466, 0.7769531, 0.54334897, 0.9675937], + ] + ] + ), np.array([[]]), - np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\ - 56, 50, 56, 56, 79, 57, 81]]), - np.array([b'000000397133.jpg']) + np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79, 57, 81]]), + np.array([b"000000397133.jpg"]), ] ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.9358696 , 0.07528409, 0.99891305, 0.25 ], - [0.8242174 , 0.3309659 , 0.93508697, 0.47301137], - [0.77413046, 0.22599432, 0.9858696 , 0.8179261 ], - [0.32582608, 0.8575 , 0.98426086, 0.9984659 ], - [0.77795655, 0.6268466 , 0.89930433, 0.73434657], - [0.5396087 , 0.39053977, 0.8483913 , 0.5615057 ], - [0.58473915, 0.75661933, 0.5998261 , 0.83579546], - [0.80391306, 0.6129829 , 0.8733478 , 0.66201705], - [0.8737391 , 0.6579546 , 0.943 , 0.7053693 ], - [0.775 , 0.6549716 , 0.8227391 , 0.6882955 ], - [0.8130869 , 0.58292615, 0.90526086, 0.62551135], - [0.7844348 , 0.68735796, 0.98182607, 0.83329546], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array( + [ + [ + [0.51508695, 0.2911648, 0.5903478, 0.31360796], + [0.9358696, 0.07528409, 0.99891305, 0.25], + [0.8242174, 0.3309659, 0.93508697, 0.47301137], + [0.77413046, 0.22599432, 0.9858696, 0.8179261], + [0.32582608, 0.8575, 0.98426086, 0.9984659], + [0.77795655, 0.6268466, 0.89930433, 0.73434657], + [0.5396087, 0.39053977, 0.8483913, 0.5615057], + [0.58473915, 0.75661933, 0.5998261, 0.83579546], + [0.80391306, 0.6129829, 0.8733478, 0.66201705], + [0.8737391, 0.6579546, 0.943, 0.7053693], + [0.775, 0.6549716, 0.8227391, 0.6882955], + [0.8130869, 0.58292615, 0.90526086, 0.62551135], + [0.7844348, 0.68735796, 0.98182607, 0.83329546], + [0.872, 0.6190057, 0.9306522, 0.6591761], + ] + ] + ), np.array([[]]), np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] - + self.assertEqual(mAP.result(), 0) self.assertEqual(mAP2.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP.result(), ".5f"), "0.14149") mAP.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP.result(), '.5f'), - '0.13366') + self.assertEqual(format(mAP.result(), ".5f"), "0.13366") mAP.reset() mAP.update(detection, ground_truth) - self.assertEqual(format(mAP.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP.result(), ".5f"), "0.14149") mAP2.update(detection, ground_truth) - + mAP2.update(detection, ground_truth) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.14149') + self.assertEqual(format(mAP2.result(), ".5f"), "0.14149") + + mAP2 = metrics["COCOmAPv2"](output_index_mapping=output_index_mapping) - mAP2 = metrics['COCOmAPv2'](output_index_mapping=output_index_mapping) - mAP2.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.20520') + self.assertEqual(format(mAP2.result(), ".5f"), "0.20520") mAP2.reset() mAP2.update(detection_2, ground_truth_2) - self.assertEqual(format(mAP2.result(), '.5f'), - '0.20520') - - mAP2 = metrics['COCOmAPv2']() - + self.assertEqual(format(mAP2.result(), ".5f"), "0.20520") + + mAP2 = metrics["COCOmAPv2"]() + ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[[64, 62]]]), - np.array([b'000000037777.jpg']) + np.array([b"000000037777.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_1) self.assertRaises(ValueError, mAP2.update, detection, ground_truth_1) - + ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64]]), - np.array([b'000000037700.jpg']) + np.array([b"000000037700.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection, ground_truth_2) self.assertRaises(ValueError, mAP2.update, detection, ground_truth_2) - + detection_1 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787 , 0.60418576, 0.35155892, 0.31158054]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787, 0.60418576, 0.35155892, 0.31158054]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] ground_truth_1 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000011.jpg']) + np.array([b"000000011.jpg"]), ] self.assertRaises(ValueError, mAP.update, detection_1, ground_truth_1) self.assertRaises(ValueError, mAP2.update, detection_1, ground_truth_1) - + ground_truth_2 = [ - np.array([[[0.51508695, 0.2911648 , 0.5903478 , 0.31360796], - [0.872 , 0.6190057 , 0.9306522 , 0.6591761 ]]]), + np.array([[[0.51508695, 0.2911648, 0.5903478, 0.31360796], [0.872, 0.6190057, 0.9306522, 0.6591761]]]), np.array([[]]), np.array([[64, 62]]), - np.array([b'000000012.jpg']) + np.array([b"000000012.jpg"]), ] detection_2 = [ - np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], - [0.5589304 , 0. , 0.98301625, 0.520178 ]]]), - np.array([[0.9267181 , 0.8510787]]), - np.array([[ 1., 67., 51., 79., 47.]]) + np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219], [0.5589304, 0.0, 0.98301625, 0.520178]]]), + np.array([[0.9267181, 0.8510787]]), + np.array([[1.0, 67.0, 51.0, 79.0, 47.0]]), ] self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) self.assertRaises(ValueError, mAP2.update, detection_2, ground_truth_2) - + @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") def test__accuracy(self): predicts1 = [1, 0, 1, 1] @@ -795,15 +888,15 @@ def test__accuracy(self): predicts2 = [[0, 0], [0, 0]] labels2 = [[0, 1], [1, 1]] - + predicts3 = [[[0, 1], [0, 0], [0, 1]], [[0, 1], [0, 1], [0, 1]]] labels3 = [[[0, 1], [0, 1], [1, 0]], [[1, 0], [1, 0], [1, 0]]] - predicts4 = [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] #1,1,1,1 + predicts4 = [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] # 1,1,1,1 labels4 = [0, 1, 0, 0] - metrics = METRICS('pytorch') - acc = metrics['Accuracy']() + metrics = METRICS("pytorch") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -817,8 +910,8 @@ def test__accuracy(self): acc.update(predicts4, labels4) self.assertEqual(acc.result(), 0.25) - metrics = METRICS('mxnet') - acc = metrics['Accuracy']() + metrics = METRICS("mxnet") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -832,8 +925,8 @@ def test__accuracy(self): acc.update(predicts4, labels4) self.assertEqual(acc.result(), 0.25) - metrics = METRICS('onnxrt_qlinearops') - acc = metrics['Accuracy']() + metrics = METRICS("onnxrt_qlinearops") + acc = metrics["Accuracy"]() acc.update(predicts1, labels1) acc_result = acc.result() self.assertEqual(acc_result, 0.5) @@ -850,16 +943,15 @@ def test__accuracy(self): acc.reset() acc.update(1, 1) self.assertEqual(acc.result(), 1.0) - + wrong_predictions = [1, 0, 0] wrong_labels = [[0, 1, 1]] self.assertRaises(ValueError, acc.update, wrong_predictions, wrong_labels) - @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows yet") def test_mxnet_accuracy(self): - metrics = METRICS('mxnet') - acc = metrics['Accuracy']() + metrics = METRICS("mxnet") + acc = metrics["Accuracy"]() predicts = [1, 0, 1, 1] labels = [0, 1, 1, 1] acc.update(predicts, labels) @@ -873,17 +965,17 @@ def test_mse(self): predicts2 = [1, 1, 1, 1] labels2 = [0, 1, 1, 0] - metrics = METRICS('onnxrt_qlinearops') - mse = metrics['MSE'](compare_label=False) + metrics = METRICS("onnxrt_qlinearops") + mse = metrics["MSE"](compare_label=False) mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) mse.update(predicts2, labels2) mse_result = mse.result() self.assertEqual(mse_result, 0.625) - - metrics = METRICS('tensorflow') - mse = metrics['MSE'](compare_label=False) + + metrics = METRICS("tensorflow") + mse = metrics["MSE"](compare_label=False) mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -891,9 +983,8 @@ def test_mse(self): mse_result = mse.result() self.assertEqual(mse_result, 0.625) - - metrics = METRICS('mxnet') - mse = metrics['MSE']() + metrics = METRICS("mxnet") + mse = metrics["MSE"]() mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -901,8 +992,8 @@ def test_mse(self): mse_result = mse.result() self.assertEqual(mse_result, 0.625) - metrics = METRICS('pytorch') - mse = metrics['MSE']() + metrics = METRICS("pytorch") + mse = metrics["MSE"]() mse.update(predicts1, labels1) mse_result = mse.result() self.assertEqual(mse_result, 0.75) @@ -917,8 +1008,8 @@ def test_mae(self): predicts2 = [1, 1, 1, 1] labels2 = [1, 1, 1, 0] - metrics = METRICS('tensorflow') - mae = metrics['MAE']() + metrics = METRICS("tensorflow") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -930,8 +1021,8 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.25) - metrics = METRICS('pytorch') - mae = metrics['MAE']() + metrics = METRICS("pytorch") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -939,8 +1030,8 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.5) - metrics = METRICS('mxnet') - mae = metrics['MAE']() + metrics = METRICS("mxnet") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) @@ -948,19 +1039,19 @@ def test_mae(self): mae_result = mae.result() self.assertEqual(mae_result, 0.5) - metrics = METRICS('onnxrt_qlinearops') - mae = metrics['MAE']() + metrics = METRICS("onnxrt_qlinearops") + mae = metrics["MAE"]() mae.update(predicts1, labels1) mae_result = mae.result() self.assertEqual(mae_result, 0.75) mae.update(predicts2, labels2) mae_result = mae.result() self.assertEqual(mae_result, 0.5) - + self.assertRaises(AssertionError, mae.update, [1], [1, 2]) - self.assertRaises(AssertionError, mae.update, 1, [1,2]) + self.assertRaises(AssertionError, mae.update, 1, [1, 2]) self.assertRaises(AssertionError, mae.update, [1, 2], [1]) - self.assertRaises(AssertionError, mae.update, 1, np.array([1,2])) + self.assertRaises(AssertionError, mae.update, 1, np.array([1, 2])) @unittest.skipIf(platform.system().lower() == "windows", "not support mxnet on windows now") def test_rmse(self): @@ -969,8 +1060,8 @@ def test_rmse(self): predicts2 = [1, 1, 1, 1] labels2 = [1, 0, 0, 0] - metrics = METRICS('tensorflow') - rmse = metrics['RMSE']() + metrics = METRICS("tensorflow") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -979,8 +1070,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.75)) - metrics = METRICS('pytorch') - rmse = metrics['RMSE']() + metrics = METRICS("pytorch") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -988,8 +1079,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) - metrics = METRICS('mxnet') - rmse = metrics['RMSE']() + metrics = METRICS("mxnet") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -997,8 +1088,8 @@ def test_rmse(self): rmse_result = rmse.result() self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) - metrics = METRICS('onnxrt_qlinearops') - rmse = metrics['RMSE']() + metrics = METRICS("onnxrt_qlinearops") + rmse = metrics["RMSE"]() rmse.update(predicts1, labels1) rmse_result = rmse.result() self.assertEqual(rmse_result, 0.5) @@ -1007,8 +1098,8 @@ def test_rmse(self): self.assertAlmostEqual(rmse_result, np.sqrt(0.5)) def test_loss(self): - metrics = METRICS('pytorch') - loss = metrics['Loss']() + metrics = METRICS("pytorch") + loss = metrics["Loss"]() predicts = [1, 0, 0, 1] labels = [0, 1, 0, 0] loss.update(predicts, labels) @@ -1025,9 +1116,8 @@ def test_loss(self): loss.update(predicts, labels) self.assertEqual(loss.result(), 0.5) - - metrics = METRICS('onnxrt_qlinearops') - loss = metrics['Loss']() + metrics = METRICS("onnxrt_qlinearops") + loss = metrics["Loss"]() predicts = [1, 0, 0, 1] labels = [0, 1, 0, 0] loss.update(predicts, labels) @@ -1044,5 +1134,6 @@ def test_loss(self): loss.update(predicts, labels) self.assertEqual(loss.result(), 0.5) + if __name__ == "__main__": unittest.main() diff --git a/test/metric/test_mse.py b/test/metric/test_mse.py index 01276b04102..351d7d138af 100644 --- a/test/metric/test_mse.py +++ b/test/metric/test_mse.py @@ -1,14 +1,16 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os import shutil -import yaml +import unittest + +import numpy as np import torch import torchvision +import yaml + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -26,14 +28,15 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -53,14 +56,15 @@ def build_fake_yaml2(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml3(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -83,14 +87,15 @@ def build_fake_yaml3(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml3.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml3.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml4(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -114,14 +119,15 @@ def build_fake_yaml4(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml4.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml4.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_ox_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: onnxrt_qlinearops @@ -141,44 +147,49 @@ def build_ox_yaml(): max_trials: 3 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('ox_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("ox_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def build_ox_model(): path = "mb_v2.onnx" model = torchvision.models.mobilenet_v2() @@ -186,16 +197,18 @@ def build_ox_model(): x = torch.randn(100, 3, 224, 224, requires_grad=True) torch_out = model(x) - torch.onnx.export(model, - x, - path, - export_params=True, - opset_version=12, - do_constant_folding=True, - input_names = ["input"], - output_names = ["output"], - dynamic_axes={"input" : {0 : "batch_size"}, - "output" : {0 : "batch_size"}}) + torch.onnx.export( + model, + x, + path, + export_params=True, + opset_version=12, + do_constant_folding=True, + input_names=["input"], + output_names=["output"], + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, + ) + class dataset: def __init__(self): @@ -211,8 +224,8 @@ def __len__(self): def __getitem__(self, index): return self.data[index], self.label[index] -class TestQuantization(unittest.TestCase): +class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -222,22 +235,23 @@ def setUpClass(self): build_ox_yaml() build_fake_yaml3() build_fake_yaml4() - + @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - os.remove('ox_yaml.yaml') - os.remove('mb_v2.onnx') - os.remove('fake_yaml4.yaml') - os.remove('fake_yaml3.yaml') - + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") + os.remove("ox_yaml.yaml") + os.remove("mb_v2.onnx") + os.remove("fake_yaml4.yaml") + os.remove("fake_yaml3.yaml") + shutil.rmtree("saved", ignore_errors=True) def test_ru_mse_one_trial(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -245,8 +259,9 @@ def test_ru_mse_one_trial(self): def test_ru_mse_max_trials(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -254,8 +269,9 @@ def test_ru_mse_max_trials(self): def test_ru_mse_max_trials_multimetric(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml3.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml3.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -263,8 +279,9 @@ def test_ru_mse_max_trials_multimetric(self): def test_ru_mse_max_trials_multimetric_weight(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml4.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml4.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -272,12 +289,14 @@ def test_ru_mse_max_trials_multimetric_weight(self): def test_ox_mse(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('ox_yaml.yaml') + + quantizer = Quantization("ox_yaml.yaml") ds = dataset() quantizer.calib_dataloader = common.DataLoader(ds) quantizer.eval_dataloader = common.DataLoader(ds) - quantizer.model = 'mb_v2.onnx' + quantizer.model = "mb_v2.onnx" quantizer.fit() + if __name__ == "__main__": unittest.main() diff --git a/test/metric/test_mse_metric.py b/test/metric/test_mse_metric.py index 21a510121db..acd4a00e4a3 100644 --- a/test/metric/test_mse_metric.py +++ b/test/metric/test_mse_metric.py @@ -1,16 +1,17 @@ +import copy +import os +import shutil +import unittest +import numpy as np import torch import torchvision -import unittest -import os -from neural_compressor.adaptor import FRAMEWORKS -from neural_compressor.model import MODELS +from packaging.version import Version + import neural_compressor.adaptor.pytorch as nc_torch +from neural_compressor.adaptor import FRAMEWORKS from neural_compressor.experimental import Quantization, common -from packaging.version import Version -import shutil -import copy -import numpy as np +from neural_compressor.model import MODELS try: try: @@ -29,7 +30,7 @@ torch.manual_seed(1) -fake_ptq_yaml = ''' +fake_ptq_yaml = """ model: name: imagenet framework: pytorch @@ -52,9 +53,9 @@ random_seed: 9527 workspace: path: saved - ''' + """ -fake_dynamic_yaml = ''' +fake_dynamic_yaml = """ model: name: imagenet framework: pytorch @@ -79,33 +80,33 @@ random_seed: 9527 workspace: path: saved - ''' + """ def build_ptq_yaml(): - with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: + with open("ptq_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_ptq_yaml) def build_dynamic_yaml(): - with open('dynamic_yaml.yaml', 'w', encoding="utf-8") as f: + with open("dynamic_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_dynamic_yaml) def build_fx_ptq_yaml(): - fake_fx_ptq_yaml = fake_ptq_yaml.replace('pytorch', 'pytorch_fx') - with open('fx_ptq_yaml.yaml', 'w', encoding="utf-8") as f: + fake_fx_ptq_yaml = fake_ptq_yaml.replace("pytorch", "pytorch_fx") + with open("fx_ptq_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_fx_ptq_yaml) def build_fx_dynamic_yaml(): - fake_fx_dynamic_yaml = fake_dynamic_yaml.replace('pytorch', 'pytorch_fx') - with open('fx_dynamic_yaml.yaml', 'w', encoding="utf-8") as f: + fake_fx_dynamic_yaml = fake_dynamic_yaml.replace("pytorch", "pytorch_fx") + with open("fx_dynamic_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_fx_dynamic_yaml) def build_ipex_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: imagenet framework: pytorch_ipex @@ -127,22 +128,24 @@ def build_ipex_yaml(): random_seed: 9527 workspace: path: saved - ''' - with open('ipex_yaml.yaml', 'w', encoding="utf-8") as f: + """ + with open("ipex_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) @unittest.skipIf(TEST_IPEX, "TODO: Please wait to IPEX + PyTorch1.7 release") class TestPytorchAdaptor(unittest.TestCase): - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "workspace_path": './'} + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": "./", + } framework = "pytorch" adaptor = FRAMEWORKS[framework](framework_specific_info) model = torchvision.models.quantization.resnet18() - nc_model = MODELS['pytorch'](model) + nc_model = MODELS["pytorch"](model) @classmethod def setUpClass(self): @@ -151,21 +154,21 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('ptq_yaml.yaml') - os.remove('dynamic_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("ptq_yaml.yaml") + os.remove("dynamic_yaml.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_quantization_saved(self): - for fake_yaml in ['dynamic_yaml.yaml', 'ptq_yaml.yaml']: - if fake_yaml in ['dynamic_yaml.yaml']: + for fake_yaml in ["dynamic_yaml.yaml", "ptq_yaml.yaml"]: + if fake_yaml in ["dynamic_yaml.yaml"]: model = torchvision.models.quantization.resnet18() else: model = copy.deepcopy(self.model) - if fake_yaml in ['ptq_yaml.yaml']: + if fake_yaml in ["ptq_yaml.yaml"]: model.eval().fuse_model() quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) + dataset = quantizer.dataset("dummy", (100, 3, 256, 256), label=True) quantizer.model = model quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) @@ -175,15 +178,17 @@ def test_quantization_saved(self): @unittest.skipIf(not FX_MODE, "Unsupport Fx Mode with PyTorch Version Below 1.8") class TestPytorchFXAdaptor(unittest.TestCase): - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "workspace_path": './'} + framework_specific_info = { + "device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": "./", + } framework = "pytorch_fx" adaptor = FRAMEWORKS[framework](framework_specific_info) model = torchvision.models.quantization.resnet18() - nc_model = MODELS['pytorch_fx'](model) + nc_model = MODELS["pytorch_fx"](model) @classmethod def setUpClass(self): @@ -192,35 +197,38 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fx_ptq_yaml.yaml') - os.remove('fx_dynamic_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fx_ptq_yaml.yaml") + os.remove("fx_dynamic_yaml.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_fx_static_quantization_saved(self): - fake_yaml = 'fx_ptq_yaml.yaml' + fake_yaml = "fx_ptq_yaml.yaml" model = copy.deepcopy(self.model) model.eval().fuse_model() quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) + dataset = quantizer.dataset("dummy", (100, 3, 256, 256), label=True) quantizer.model = model quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer.fit() self.assertTrue(bool(q_model)) - - @unittest.skipIf(PT_VERSION < Version("1.9.0-rc1"), - "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend") + + @unittest.skipIf( + PT_VERSION < Version("1.9.0-rc1"), + "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend", + ) def test_fx_dynamic_quantization_saved(self): - fake_yaml = 'fx_dynamic_yaml.yaml' + fake_yaml = "fx_dynamic_yaml.yaml" model = torchvision.models.resnet18() quantizer = Quantization(fake_yaml) quantizer.model = model - dataset = quantizer.dataset('dummy', (100, 3, 256, 256), label=True) + dataset = quantizer.dataset("dummy", (100, 3, 256, 256), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) q_model = quantizer.fit() self.assertTrue(bool(q_model)) + if __name__ == "__main__": unittest.main() diff --git a/test/metric/test_register_metric_transform.py b/test/metric/test_register_metric_transform.py index eaaf27631c0..81f5be31887 100644 --- a/test/metric/test_register_metric_transform.py +++ b/test/metric/test_register_metric_transform.py @@ -1,33 +1,38 @@ -"""Tests for neural_compressor register metric and postprocess """ -import numpy as np -import unittest -import platform +"""Tests for neural_compressor register metric and postprocess.""" import os +import platform +import unittest + +import numpy as np import yaml + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: resnet_v1_101 framework: tensorflow inputs: input outputs: resnet_v1_101/predictions/Reshape_1 device: cpu - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() class TestRegisterMetric(unittest.TestCase): - model_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet101_fp32_pretrained_model.pb' - pb_path = '/tmp/.neural_compressor/resnet101_fp32_pretrained_model.pb' - #image_path = 'images/1024px-Doll_face_silver_Persian.jpg' - image_path = 'images/cat.jpg' + model_url = ( + "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet101_fp32_pretrained_model.pb" + ) + pb_path = "/tmp/.neural_compressor/resnet101_fp32_pretrained_model.pb" + # image_path = 'images/1024px-Doll_face_silver_Persian.jpg' + image_path = "images/cat.jpg" platform = platform.system().lower() if platform == "windows": - pb_path = 'C:\\tmp\.neural_compressor\\resnet101_fp32_pretrained_model.pb' + pb_path = "C:\\tmp\.neural_compressor\\resnet101_fp32_pretrained_model.pb" + @classmethod def setUpClass(self): build_fake_yaml() @@ -35,31 +40,32 @@ def setUpClass(self): os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {}".format(self.model_url, self.pb_path)) def test_register_metric_postprocess(self): - import PIL.Image + import PIL.Image + image = np.array(PIL.Image.open(self.image_path)) resize_image = np.resize(image, (224, 224, 3)) mean = [123.68, 116.78, 103.94] resize_image = resize_image - mean images = np.expand_dims(resize_image, axis=0) labels = [768] - from neural_compressor.experimental import Benchmark, common - from neural_compressor.experimental.common import Postprocess - from neural_compressor.experimental.common import Metric from neural_compressor.data.transforms.imagenet_transform import LabelShift + from neural_compressor.experimental import Benchmark, common + from neural_compressor.experimental.common import Metric, Postprocess from neural_compressor.metric import TensorflowTopK - os.environ['NC_ENV_CONF'] = 'True' - evaluator = Benchmark('fake_yaml.yaml') + os.environ["NC_ENV_CONF"] = "True" + + evaluator = Benchmark("fake_yaml.yaml") nc_postprocess = Postprocess(LabelShift, "label_benchmark", label_shift=1) evaluator.postprocess = nc_postprocess - nc_metric = Metric(TensorflowTopK, 'topk_benchmark') + nc_metric = Metric(TensorflowTopK, "topk_benchmark") evaluator.metric = nc_metric evaluator.b_dataloader = common.DataLoader(dataset=list(zip(images, labels))) evaluator.model = self.pb_path evaluator.fit() - evaluator = Benchmark('fake_yaml.yaml') - nc_metric = Metric(TensorflowTopK, 'topk_second') + evaluator = Benchmark("fake_yaml.yaml") + nc_metric = Metric(TensorflowTopK, "topk_second") evaluator.metric = nc_metric evaluator.b_dataloader = common.DataLoader(dataset=list(zip(images, labels))) evaluator.model = self.pb_path diff --git a/test/mixed_precision/test_mixed_precision.py b/test/mixed_precision/test_mixed_precision.py index b388df5eaad..1f333dde0fb 100644 --- a/test/mixed_precision/test_mixed_precision.py +++ b/test/mixed_precision/test_mixed_precision.py @@ -1,48 +1,46 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import numpy as np -import neural_compressor.adaptor.pytorch as nc_torch import shutil +import unittest + +import numpy as np import tensorflow as tf +from onnx import TensorProto, helper +from packaging.version import Version +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util + +import neural_compressor.adaptor.pytorch as nc_torch from neural_compressor import mix_precision -from neural_compressor.mix_precision import fit -from neural_compressor.utils.utility import LazyImport, CpuInfo from neural_compressor.adaptor.torch_utils.bf16_convert import BF16ModuleWrapper from neural_compressor.config import MixedPrecisionConfig, TuningCriterion -from onnx import helper, TensorProto -from packaging.version import Version -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +from neural_compressor.mix_precision import fit +from neural_compressor.utils.utility import CpuInfo, LazyImport PT_VERSION = nc_torch.get_torch_version() def build_matmul_model(): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 5, 1]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 1, 5, 1]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 1]) - H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 1, 5, 1]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 5, 1]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 1, 5, 1]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 1]) + H = helper.make_tensor_value_info("H", TensorProto.FLOAT, [1, 1, 5, 1]) - matmul_node = helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') + matmul_node = helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") e_value = np.random.randint(2, size=(5)).astype(np.float32) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) - add = helper.make_node('Add', ['C', 'E'], ['D'], name='add') + E_init = helper.make_tensor("E", TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) + add = helper.make_node("Add", ["C", "E"], ["D"], name="add") f_value = np.random.randint(2, size=(5)).astype(np.float32) - F_init = helper.make_tensor('F', TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) - add2 = helper.make_node('Add', ['D', 'F'], ['H'], name='add2') + F_init = helper.make_tensor("F", TensorProto.FLOAT, [1, 1, 5, 1], e_value.reshape(5).tolist()) + add2 = helper.make_node("Add", ["D", "F"], ["H"], name="add2") - graph = helper.make_graph([matmul_node, add, add2], 'test_graph_1', [A, B], [H], - [E_init, F_init]) + graph = helper.make_graph([matmul_node, add, add2], "test_graph_1", [A, B], [H], [E_init, F_init]) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 16)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 16)]}) return model @@ -50,150 +48,165 @@ def build_tf_graph(): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) - conv1_weight_node.attr['dtype'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom( - attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1]))) - conv1_node.attr['dilations'].CopyFrom( - attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node.input.extend([bias_add_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) - conv2_weight_node.attr['dtype'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv2_weight_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + conv2_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape + ) + ) + ) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) - conv2_node.attr['strides'].CopyFrom( - attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1]))) - conv2_node.attr['dilations'].CopyFrom( - attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) - bias_node2.attr['dtype'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node2.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value2, bias_value2.dtype.type, bias_value2.shape))) + bias_node2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value2, bias_value2.dtype.type, bias_value2.shape) + ) + ) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" - bias_add_node2.attr['T'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) - bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node2.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" - relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) - conv3_weight_node.attr['dtype'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv3_weight_node.attr['value'].CopyFrom( - attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + conv3_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape + ) + ) + ) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" - conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) - conv3_node.attr['strides'].CopyFrom( - attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1]))) - conv3_node.attr['dilations'].CopyFrom( - attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1]))) - conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv3_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv3_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) identity_node = node_def_pb2.NodeDef() identity_node.name = "final" identity_node.op = "Identity" - identity_node.attr['T'].CopyFrom( - attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + identity_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) identity_node.input.extend([conv3_node.name]) test_graph = graph_pb2.GraphDef() - test_graph.node.extend([ - input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - #cast_node, - relu_node, - #cast2_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - relu_node2, - conv3_weight_node, - conv3_node, - identity_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + # cast_node, + relu_node, + # cast2_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node, + ] + ) return test_graph @@ -233,16 +246,16 @@ def __init__(self): self.data = [] self.label = [] for i in range(3): - self.data.append([ - np.random.randn(1, 1, 5, 5).astype('float32'), - np.random.randn(1, 1, 5, 1).astype('float32') - ]) - self.label.append(np.random.randn(1, 1, 5, 1).astype('float32')) + self.data.append( + [np.random.randn(1, 1, 5, 5).astype("float32"), np.random.randn(1, 1, 5, 1).astype("float32")] + ) + self.label.append(np.random.randn(1, 1, 5, 1).astype("float32")) def __iter__(self): for data, label in zip(self.data, self.label): yield data, label + class Metric: def update(self, preds, labels): pass @@ -260,7 +273,7 @@ def setUpClass(self): self.onnx_model = build_matmul_model() self.tf_model = build_tf_graph() - @unittest.skipIf(CpuInfo().bf16, 'skip since hardware support bf16') + @unittest.skipIf(CpuInfo().bf16, "skip since hardware support bf16") def test_on_non_enabled_host_tf(self): conf = MixedPrecisionConfig() with self.assertRaises(SystemExit) as cm: @@ -283,8 +296,8 @@ def test_on_non_enabled_dtype(self): class TestMixedPrecision(unittest.TestCase): @classmethod def setUpClass(self): - os.environ['FORCE_FP16'] = '1' - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_FP16"] = "1" + os.environ["FORCE_BF16"] = "1" self.onnx_model = build_matmul_model() self.matmul_dataloader = MatmulDataloader() self.tf_model = build_tf_graph() @@ -293,8 +306,8 @@ def setUpClass(self): @classmethod def tearDownClass(self): - del os.environ['FORCE_FP16'] - del os.environ['FORCE_BF16'] + del os.environ["FORCE_FP16"] + del os.environ["FORCE_BF16"] shutil.rmtree("./saved", ignore_errors=True) shutil.rmtree("./nc_workspace", ignore_errors=True) os.remove("test.yaml") @@ -302,34 +315,37 @@ def tearDownClass(self): def test_mixed_precision_with_evaluation(self): from neural_compressor.data import DataLoader from neural_compressor.metric.metric import ONNXRT_QL_METRICS + # test onnx - conf = MixedPrecisionConfig(device='gpu', backend='onnxrt_cuda_ep') + conf = MixedPrecisionConfig(device="gpu", backend="onnxrt_cuda_ep") - #output_model = mix_precision.fit(self.onnx_model, conf) - #self.assertTrue(any([i.op_type == 'Cast' for i in output_model.nodes()])) + # output_model = mix_precision.fit(self.onnx_model, conf) + # self.assertTrue(any([i.op_type == 'Cast' for i in output_model.nodes()])) tuning_criterion = TuningCriterion(max_trials=3, timeout=1000000) - conf = MixedPrecisionConfig(device='gpu', tuning_criterion=tuning_criterion, backend='onnxrt_cuda_ep', precisions="fp16") - output_model = mix_precision.fit(self.onnx_model, - conf, - eval_dataloader=self.matmul_dataloader, - eval_metric=ONNXRT_QL_METRICS["MSE"]()) - self.assertTrue(any([i.op_type == 'Cast' for i in output_model.nodes()])) + conf = MixedPrecisionConfig( + device="gpu", tuning_criterion=tuning_criterion, backend="onnxrt_cuda_ep", precisions="fp16" + ) + output_model = mix_precision.fit( + self.onnx_model, conf, eval_dataloader=self.matmul_dataloader, eval_metric=ONNXRT_QL_METRICS["MSE"]() + ) + self.assertTrue(any([i.op_type == "Cast" for i in output_model.nodes()])) def test_mixed_precision_with_evaluation_old_api(self): from neural_compressor.conf.config import MixedPrecision_Conf from neural_compressor.experimental import MixedPrecision - converter = MixedPrecision(MixedPrecision_Conf('test.yaml')) + + converter = MixedPrecision(MixedPrecision_Conf("test.yaml")) converter.model = self.onnx_model output_model = converter.fit() - self.assertTrue(any([i.op_type != 'Cast' for i in output_model.nodes()])) + self.assertTrue(any([i.op_type != "Cast" for i in output_model.nodes()])) def test_mixed_precision_with_eval_func(self): def eval(model): return 0.5 - result = [0., 0.1, 0.102, 0.1003, 0.1005, 0.1004, 0.1002] - perf = [0.1, 0.5, 0.6, 0.7, 0.5, 0.4, 0.5 ] + result = [0.0, 0.1, 0.102, 0.1003, 0.1005, 0.1004, 0.1002] + perf = [0.1, 0.5, 0.6, 0.7, 0.5, 0.4, 0.5] import time def eval2(model): @@ -348,9 +364,9 @@ def eval2(model): conf, eval_func=eval, ) - self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) - self.assertEqual(conf.inputs, 'input') - self.assertEqual(conf.outputs, 'final') + self.assertTrue(any([i.op == "Cast" for i in output_model.graph_def.node])) + self.assertEqual(conf.inputs, "input") + self.assertEqual(conf.outputs, "final") tuning_criterion = TuningCriterion(max_trials=4, timeout=500) conf = MixedPrecisionConfig(tuning_criterion=tuning_criterion) @@ -359,26 +375,23 @@ def eval2(model): conf, eval_func=eval2, ) - self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) + self.assertTrue(any([i.op == "Cast" for i in output_model.graph_def.node])) tuning_criterion = TuningCriterion(max_trials=1, timeout=100) - conf = MixedPrecisionConfig(inputs="input", - outputs="final, test", - tuning_criterion=tuning_criterion) + conf = MixedPrecisionConfig(inputs="input", outputs="final, test", tuning_criterion=tuning_criterion) output_model = mix_precision.fit( self.tf_model, conf, eval_func=eval, ) - self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) + self.assertTrue(any([i.op == "Cast" for i in output_model.graph_def.node])) output_model = fit(self.tf_model, conf, eval) - self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) - + self.assertTrue(any([i.op == "Cast" for i in output_model.graph_def.node])) def test_mixed_precision_with_quant_level_1(self): + result = [0.0, 0.1, 0.102] - result = [0., 0.1, 0.102] def eval_func(model): del result[0] return result[0] @@ -386,13 +399,13 @@ def eval_func(model): conf = MixedPrecisionConfig(inputs="input", outputs="final", quant_level="auto") output_model = mix_precision.fit(self.tf_model, conf, eval_func=eval_func) - self.assertTrue(any([i.op == 'Cast' for i in output_model.graph_def.node])) - self.assertEqual(conf.inputs, 'input') - self.assertEqual(conf.outputs, 'final') + self.assertTrue(any([i.op == "Cast" for i in output_model.graph_def.node])) + self.assertEqual(conf.inputs, "input") + self.assertEqual(conf.outputs, "final") def test_mixed_precision_with_quant_level_2(self): + result = [0.0, 1, 0.9, 1.1] - result = [0., 1, 0.9, 1.1] # meet acc if fallback all conv def eval_func(model): del result[0] @@ -402,11 +415,11 @@ def eval_func(model): output_model = mix_precision.fit(self.tf_model, conf, eval_func=eval_func) # no cast in output model - self.assertFalse(any([i.op == 'Cast' for i in output_model.graph_def.node])) + self.assertFalse(any([i.op == "Cast" for i in output_model.graph_def.node])) def test_mixed_precision_with_quant_level_3(self): + result = [0.0, 1, 0.9, 0.9, 1.1] - result = [0., 1, 0.9, 0.9, 1.1] # meet acc if fallback 1 conv def eval_func(model): del result[0] @@ -423,8 +436,8 @@ def eval_func(model): self.assertEqual(count_cast, 4) def test_mixed_precision_with_quant_level_4(self): + result = [0.0, 1, 0.9, 0.9, 1.1] - result = [0., 1, 0.9, 0.9, 1.1] # meet acc if fallback the second conv def eval_func(model): del result[0] @@ -441,7 +454,8 @@ def eval_func(model): self.assertEqual(count_cast, 4) def test_mixed_precision_with_quant_level_5(self): - result = [0., 1, 0.9, 0.9, 0.9] + result = [0.0, 1, 0.9, 0.9, 0.9] + # meet not meet def eval_func(model): del result[0] @@ -452,10 +466,12 @@ def eval_func(model): output_model = mix_precision.fit(self.tf_model, conf, eval_func=eval_func) self.assertIsNone(output_model) - @unittest.skipIf(PT_VERSION.release < Version("1.11.0").release, - "Please use PyTroch 1.11 or higher version for mixed precision.") + @unittest.skipIf( + PT_VERSION.release < Version("1.11.0").release, "Please use PyTroch 1.11 or higher version for mixed precision." + ) def test_mixed_precision_with_eval_func_pt(self): torch = LazyImport("torch") + def eval(model): return 0.5 @@ -466,11 +482,12 @@ def eval(model): eval_func=eval, ) self.assertTrue(isinstance(output_model.model.fc, BF16ModuleWrapper)) - op_name_dict = {"fc":{ - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]}, - } - } + op_name_dict = { + "fc": { + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]}, + } + } conf = MixedPrecisionConfig(op_name_dict=op_name_dict) output_model = mix_precision.fit( self.pt_model, @@ -478,11 +495,12 @@ def eval(model): eval_func=eval, ) self.assertTrue(isinstance(output_model.model.fc.weight.dtype, type(torch.float32))) - op_type_dict = {"Linear":{ - "activation": {"dtype": ["fp32"]}, - "weight": {"dtype": ["fp32"]}, - } - } + op_type_dict = { + "Linear": { + "activation": {"dtype": ["fp32"]}, + "weight": {"dtype": ["fp32"]}, + } + } conf = MixedPrecisionConfig(op_type_dict=op_type_dict) output_model = mix_precision.fit( self.pt_model, @@ -491,5 +509,6 @@ def eval(model): ) self.assertTrue(isinstance(output_model.model.fc.weight.dtype, type(torch.float32))) + if __name__ == "__main__": unittest.main() diff --git a/test/mixed_precision/test_mixed_precision_keras_model.py b/test/mixed_precision/test_mixed_precision_keras_model.py index 29f1803fa91..254a7983efe 100644 --- a/test/mixed_precision/test_mixed_precision_keras_model.py +++ b/test/mixed_precision/test_mixed_precision_keras_model.py @@ -1,43 +1,48 @@ import os import shutil import unittest + import numpy as np from tensorflow import keras + from neural_compressor import mix_precision -from neural_compressor.data import DataLoader, Datasets from neural_compressor.config import MixedPrecisionConfig +from neural_compressor.data import DataLoader, Datasets -def build_sequential_model(): +def build_sequential_model(): # Create Keras model - model = keras.Sequential([ - keras.layers.InputLayer(input_shape=(28, 28), name="input"), - keras.layers.Reshape(target_shape=(28, 28, 1)), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.MaxPooling2D(pool_size=(2, 2)), - keras.layers.Flatten(), - keras.layers.Dense(10, activation="softmax", name="output") - ]) + model = keras.Sequential( + [ + keras.layers.InputLayer(input_shape=(28, 28), name="input"), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10, activation="softmax", name="output"), + ] + ) # Print model architecture model.summary() # Compile model with optimizer opt = keras.optimizers.Adam(learning_rate=0.01) - model.compile(optimizer=opt, - loss="sparse_categorical_crossentropy", - metrics=["accuracy"]) + model.compile(optimizer=opt, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) model.save("./models/saved_model") return -# Define a customized Metric function + +# Define a customized Metric function from neural_compressor.metric import BaseMetric + + class MyMetric(BaseMetric): def __init__(self, *args): self.pred_list = [] @@ -47,7 +52,7 @@ def __init__(self, *args): def update(self, predict, label): self.pred_list.extend(np.argmax(predict, axis=1)) self.label_list.extend(label) - self.samples += len(label) + self.samples += len(label) def reset(self): self.pred_list = [] @@ -55,76 +60,75 @@ def reset(self): self.samples = 0 def result(self): - correct_num = np.sum( - np.array(self.pred_list) == np.array(self.label_list)) + correct_num = np.sum(np.array(self.pred_list) == np.array(self.label_list)) return correct_num / self.samples + class MyMetric_keras(MyMetric): def __init__(self, *args): super(MyMetric_keras, self).__init__(*args) + class TestMixedPrecisionWithKerasModel(unittest.TestCase): @classmethod def setUpClass(self): - os.environ['FORCE_FP16'] = '1' - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_FP16"] = "1" + os.environ["FORCE_BF16"] = "1" build_sequential_model() @classmethod def tearDownClass(self): - del os.environ['FORCE_FP16'] - del os.environ['FORCE_BF16'] + del os.environ["FORCE_FP16"] + del os.environ["FORCE_BF16"] shutil.rmtree("./models", ignore_errors=True) shutil.rmtree("./nc_workspace", ignore_errors=True) def test_mixed_precision_with_keras_model(self): # use dummy dataset for UT test - dataset = Datasets('tensorflow')['dummy'](shape=(10, 28, 28), low=0., high=1., label=True) + dataset = Datasets("tensorflow")["dummy"](shape=(10, 28, 28), low=0.0, high=1.0, label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset) + dataloader = DataLoader(framework="tensorflow", dataset=dataset) config = MixedPrecisionConfig() q_model = mix_precision.fit( - model='./models/saved_model', - conf=config, - eval_dataloader=dataloader, - eval_metric=MyMetric()) + model="./models/saved_model", conf=config, eval_dataloader=dataloader, eval_metric=MyMetric() + ) # Optional, run quantized model import tensorflow as tf + with tf.compat.v1.Graph().as_default(), tf.compat.v1.Session() as sess: - tf.compat.v1.import_graph_def(q_model.graph_def, name='') - out = sess.run(['Identity:0'], feed_dict={'input:0':dataset.dataset}) + tf.compat.v1.import_graph_def(q_model.graph_def, name="") + out = sess.run(["Identity:0"], feed_dict={"input:0": dataset.dataset}) print("Inference is done.") found_cast = False for i in q_model.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": found_cast = True break self.assertEqual(found_cast, True) def test_mixed_precision_with_keras_adaptor(self): # use dummy dataset for UT test - dataset = Datasets('tensorflow')['dummy'](shape=(10, 28, 28), low=0., high=1., label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset) + dataset = Datasets("tensorflow")["dummy"](shape=(10, 28, 28), low=0.0, high=1.0, label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset) # add backend='itex' to run on keras adaptor - config = MixedPrecisionConfig(backend='itex') + config = MixedPrecisionConfig(backend="itex") bf16_model = mix_precision.fit( - model='./models/saved_model', - config=config, - eval_dataloader=dataloader, - eval_metric=MyMetric_keras()) + model="./models/saved_model", config=config, eval_dataloader=dataloader, eval_metric=MyMetric_keras() + ) - bf16_policy = keras.mixed_precision.Policy('mixed_bfloat16') + bf16_policy = keras.mixed_precision.Policy("mixed_bfloat16") # bf16_model.model is an obj of tf.keras.Model model_policy = bf16_model.model.dtype_policy - conv2d_layer_policy = bf16_model.model.get_layer('conv2d').dtype_policy + conv2d_layer_policy = bf16_model.model.get_layer("conv2d").dtype_policy self.assertEqual(model_policy.compute_dtype, bf16_policy.compute_dtype) self.assertEqual(conv2d_layer_policy.compute_dtype, bf16_policy.compute_dtype) + if __name__ == "__main__": unittest.main() diff --git a/test/model/test_model.py b/test/model/test_model.py index 75827c64470..d4b0e0a0c12 100644 --- a/test/model/test_model.py +++ b/test/model/test_model.py @@ -1,175 +1,177 @@ -"""Tests for model""" -import numpy as np -import unittest +"""Tests for model.""" import os import platform -from pkg_resources import parse_version -from neural_compressor.model import MODELS -from neural_compressor.model.onnx_model import ONNXModel -from neural_compressor.model.mxnet_model import MXNetModel -from neural_compressor.model.model import get_model_fwk_name -from neural_compressor.model import Model +import unittest -import torchvision -import torch +import numpy as np import onnx import tensorflow as tf +import torch +import torchvision +from pkg_resources import parse_version + +from neural_compressor.model import MODELS, Model +from neural_compressor.model.model import get_model_fwk_name +from neural_compressor.model.mxnet_model import MXNetModel +from neural_compressor.model.onnx_model import ONNXModel + def build_graph(): try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session(graph=graph) as sess: - x = tf.placeholder(tf.float64, shape=(1, 256, 256, 1), name='x') - y = tf.constant(np.random.random((2, 2, 1, 1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 256, 256, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session(graph=graph) as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1, 256, 256, 1), name='x') - y = tf.compat.v1.constant(np.random.random((3, 3, 1, 1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 256, 256, 1), name="x") + y = tf.compat.v1.constant(np.random.random((3, 3, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph - + + def build_estimator(): def model_fn(features, labels, mode): logits = tf.keras.layers.Dense(12)(features) logits = tf.keras.layers.Dense(56)(logits) logits = tf.keras.layers.Dense(4)(logits) - - output_spec = tf.estimator.EstimatorSpec( - mode=tf.estimator.ModeKeys.PREDICT, predictions=logits) + + output_spec = tf.estimator.EstimatorSpec(mode=tf.estimator.ModeKeys.PREDICT, predictions=logits) return output_spec + return model_fn + def build_input_fn(): def input_fun(): tf.compat.v1.disable_eager_execution() - raw_dataset = np.ones([100,224, 224, 3], dtype=np.float32) + raw_dataset = np.ones([100, 224, 224, 3], dtype=np.float32) tf_dataset = tf.compat.v1.data.Dataset.from_tensor_slices(raw_dataset) tf_dataset = tf_dataset.batch(1) ds_iterator = tf_dataset.make_initializable_iterator() iter_tensors = ds_iterator.get_next() return iter_tensors + return input_fun + def build_keras(): from tensorflow import keras - (train_images, train_labels), (test_images, - test_labels) = keras.datasets.fashion_mnist.load_data() + + (train_images, train_labels), (test_images, test_labels) = keras.datasets.fashion_mnist.load_data() train_images = train_images.astype(np.float32) / 255.0 # Create Keras model - model = keras.Sequential([ - keras.layers.InputLayer(input_shape=(28, 28), name="input"), - keras.layers.Reshape(target_shape=(28, 28, 1)), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.MaxPooling2D(pool_size=(2, 2)), - keras.layers.Flatten(), - keras.layers.Dense(10, activation="softmax", name="output") - ]) - + model = keras.Sequential( + [ + keras.layers.InputLayer(input_shape=(28, 28), name="input"), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10, activation="softmax", name="output"), + ] + ) + # Compile model with optimizer opt = keras.optimizers.Adam(learning_rate=0.01) - model.compile(optimizer=opt, - loss="sparse_categorical_crossentropy", - metrics=["accuracy"]) + model.compile(optimizer=opt, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) # # Train model - model.fit(\ - x={"input": train_images[0:100]}, y={"output": train_labels[0:100]}, epochs=1) + model.fit(x={"input": train_images[0:100]}, y={"output": train_labels[0:100]}, epochs=1) return model -class TestTensorflowModel(unittest.TestCase): +class TestTensorflowModel(unittest.TestCase): @classmethod def tearDownClass(self): - os.remove('model_test.pb') + os.remove("model_test.pb") def test_graph(self): graph = build_graph() model = Model(graph) - model.input_tensor_names = ['x'] - model.output_tensor_names = ['op_to_store'] + model.input_tensor_names = ["x"] + model.output_tensor_names = ["op_to_store"] self.assertEqual(True, isinstance(model.graph_def, tf.compat.v1.GraphDef)) - self.assertEqual(model.input_node_names[0], 'x') - self.assertEqual(model.output_node_names[0], 'op_to_store') - model.save('model_test.pb') + self.assertEqual(model.input_node_names[0], "x") + self.assertEqual(model.output_node_names[0], "op_to_store") + model.save("model_test.pb") - model = Model('model_test.pb') - self.assertEqual(model.input_tensor_names[0], 'x') - self.assertEqual(model.output_tensor_names[0], 'op_to_store') - self.assertEqual(model.input_tensor[0].name, 'x:0') - self.assertEqual(model.output_tensor[0].name, 'op_to_store:0') + model = Model("model_test.pb") + self.assertEqual(model.input_tensor_names[0], "x") + self.assertEqual(model.output_tensor_names[0], "op_to_store") + self.assertEqual(model.input_tensor[0].name, "x:0") + self.assertEqual(model.output_tensor[0].name, "op_to_store:0") # test wrong input tensor names can't set with self.assertRaises(AssertionError): - model.input_tensor_names = ['wrong_input'] + model.input_tensor_names = ["wrong_input"] with self.assertRaises(AssertionError): - model.output_tensor_names = ['wrong_output'] + model.output_tensor_names = ["wrong_output"] # test right tensor - model.input_tensor_names = ['x_1'] - model.output_tensor_names = ['op_to_store_1'] + model.input_tensor_names = ["x_1"] + model.output_tensor_names = ["op_to_store_1"] self.assertEqual(True, isinstance(model.graph_def, tf.compat.v1.GraphDef)) def test_validate_graph_node(self): from neural_compressor.model.tensorflow_model import validate_graph_node + graph = build_graph() self.assertEqual(False, validate_graph_node(graph.as_graph_def(), [])) - self.assertEqual(False, validate_graph_node(graph.as_graph_def(), ['test'])) - self.assertEqual(True, validate_graph_node(graph.as_graph_def(), ['x'])) + self.assertEqual(False, validate_graph_node(graph.as_graph_def(), ["test"])) + self.assertEqual(True, validate_graph_node(graph.as_graph_def(), ["x"])) def test_estimator(self): from neural_compressor.adaptor.tf_utils.util import get_estimator_graph + model_fn = build_estimator() - input_fn = build_input_fn() - estimator = tf.estimator.Estimator( - model_fn, model_dir=None, config=None, params=None, warm_start_from=None - ) + input_fn = build_input_fn() + estimator = tf.estimator.Estimator(model_fn, model_dir=None, config=None, params=None, warm_start_from=None) with self.assertRaises(AssertionError): graph_def = Model(estimator).graph_def model = Model(estimator, input_fn=input_fn) - self.assertEqual(model.output_tensor_names[0], 'dense_2/BiasAdd:0') + self.assertEqual(model.output_tensor_names[0], "dense_2/BiasAdd:0") def test_ckpt(self): - mobilenet_ckpt_url = \ - 'http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz' - dst_path = '/tmp/.neural_compressor/mobilenet_v1_1.0_224.tgz' + mobilenet_ckpt_url = "http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz" + dst_path = "/tmp/.neural_compressor/mobilenet_v1_1.0_224.tgz" if platform.system().lower() == "windows": - model_path = 'C:\\tmp\.neural_compressor\\mobilenet_v1_1.0_224' + model_path = "C:\\tmp\.neural_compressor\\mobilenet_v1_1.0_224" else: - model_path = './ckpt' + model_path = "./ckpt" if not os.path.exists(dst_path): - os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {}".format( - mobilenet_ckpt_url, dst_path)) + os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {}".format(mobilenet_ckpt_url, dst_path)) if not os.path.getsize(dst_path): os.system("rm -fr {0} && wget {1} -O {0}".format(dst_path, mobilenet_ckpt_url)) os.system("mkdir -p ckpt && tar xvf {0} -C {1}".format(dst_path, model_path)) model = Model(model_path) - model.output_tensor_names = ['MobilenetV1/Predictions/Reshape_1'] + model.output_tensor_names = ["MobilenetV1/Predictions/Reshape_1"] self.assertEqual(model_path, model.model_path) self.assertGreaterEqual(len(model.input_tensor_names), 1) @@ -178,16 +180,15 @@ def test_ckpt(self): self.assertEqual(True, isinstance(graph_def, tf.compat.v1.GraphDef)) model.graph_def = graph_def - os.system('rm -rf ckpt') + os.system("rm -rf ckpt") def test_slim(self): tf.compat.v1.reset_default_graph() - inception_ckpt_url = \ - 'http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz' + inception_ckpt_url = "http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz" if platform.system().lower() == "windows": dst_path = "C:\\tmp\\.neural_compressor\\inception_v1_2016_08_28.tar.g" elif platform.system().lower() == "linux": - dst_path = '/tmp/.neural_compressor/slim/inception_v1_2016_08_28.tar.gz' + dst_path = "/tmp/.neural_compressor/slim/inception_v1_2016_08_28.tar.gz" if platform.system().lower() == "linux": if not os.path.exists(dst_path): os.system("mkdir -p /tmp/.neural_compressor/slim") @@ -195,202 +196,222 @@ def test_slim(self): if not os.path.getsize(dst_path): os.system("rm -fr {0} && wget {1} -O {0}".format(dst_path, inception_ckpt_url)) os.system("mkdir -p slim_ckpt && tar xvf {} -C slim_ckpt".format(dst_path)) - if parse_version(tf.version.VERSION) > parse_version('2.0.0'): + if parse_version(tf.version.VERSION) > parse_version("2.0.0"): return - model = Model('./slim_ckpt/inception_v1.ckpt') - model.name = 'inception_v1' + model = Model("./slim_ckpt/inception_v1.ckpt") + model.name = "inception_v1" graph_def = model.graph_def self.assertGreaterEqual(len(model.output_node_names), 1) self.assertGreaterEqual(len(model.input_node_names), 1) - self.assertEqual(model.model_path, './slim_ckpt/inception_v1.ckpt') + self.assertEqual(model.model_path, "./slim_ckpt/inception_v1.ckpt") # test net factory from neural_compressor.model.nets_factory import TFSlimNetsFactory + factory = TFSlimNetsFactory() from tf_slim.nets import inception - input_shape = [None, 224, 224, 3] + + input_shape = [None, 224, 224, 3] model_func = inception.inception_v1 arg_scope = inception.inception_v1_arg_scope num_classes = 1001 - factory.register('inceptionv1', model_func, input_shape, \ - arg_scope, num_classes=num_classes) - os.system('rm -rf slim_ckpt') - + factory.register("inceptionv1", model_func, input_shape, arg_scope, num_classes=num_classes) + os.system("rm -rf slim_ckpt") + def test_keras_h5_model(self): - if parse_version(tf.version.VERSION) < parse_version('2.3.0'): + if parse_version(tf.version.VERSION) < parse_version("2.3.0"): return keras_model = build_keras() - self.assertEqual('tensorflow', get_model_fwk_name(keras_model)) - keras_model.save('./simple_model.h5') - #load from path - model = Model('./simple_model.h5') + self.assertEqual("tensorflow", get_model_fwk_name(keras_model)) + keras_model.save("./simple_model.h5") + # load from path + model = Model("./simple_model.h5") - self.assertEqual(model.model_path, './simple_model.h5') + self.assertEqual(model.model_path, "./simple_model.h5") self.assertGreaterEqual(len(model.output_node_names), 1) self.assertGreaterEqual(len(model.input_node_names), 1) - os.makedirs('./keras_model', exist_ok=True) - model.save('./keras_model') - os.system('rm -rf simple_model.h5') - os.system('rm -rf keras_model') - - + os.makedirs("./keras_model", exist_ok=True) + model.save("./keras_model") + os.system("rm -rf simple_model.h5") + os.system("rm -rf keras_model") + def test_keras_saved_model(self): - if parse_version(tf.version.VERSION) < parse_version('2.3.0'): + if parse_version(tf.version.VERSION) < parse_version("2.3.0"): return keras_model = build_keras() - self.assertEqual('tensorflow', get_model_fwk_name(keras_model)) + self.assertEqual("tensorflow", get_model_fwk_name(keras_model)) model = Model(keras_model) self.assertEqual(model.model_path, None) self.assertGreaterEqual(len(model.output_node_names), 1) self.assertGreaterEqual(len(model.input_node_names), 1) - keras_model.save('./simple_model') + keras_model.save("./simple_model") # load from path - model = Model('./simple_model') - self.assertEqual(model.model_path, './simple_model') + model = Model("./simple_model") + self.assertEqual(model.model_path, "./simple_model") self.assertGreaterEqual(len(model.output_node_names), 1) self.assertGreaterEqual(len(model.input_node_names), 1) - os.makedirs('./keras_model', exist_ok=True) - model.save('./keras_model') - os.system('rm -rf simple_model') - os.system('rm -rf keras_model') + os.makedirs("./keras_model", exist_ok=True) + model.save("./keras_model") + os.system("rm -rf simple_model") + os.system("rm -rf keras_model") def test_tf_qat_model(self): - if parse_version(tf.version.VERSION) < parse_version('2.3.0'): + if parse_version(tf.version.VERSION) < parse_version("2.3.0"): return keras_model = build_keras() - self.assertEqual('tensorflow', get_model_fwk_name(keras_model)) + self.assertEqual("tensorflow", get_model_fwk_name(keras_model)) from neural_compressor.model.tensorflow_model import TensorflowQATModel + model = TensorflowQATModel(keras_model) assert isinstance(model.model, tf.keras.Model) self.assertEqual(model.model_path, None) - keras_model.save('./simple_model') + keras_model.save("./simple_model") # load from path - model = TensorflowQATModel('./simple_model') + model = TensorflowQATModel("./simple_model") assert isinstance(model.model, tf.keras.Model) - self.assertEqual(model.model_path, './simple_model') + self.assertEqual(model.model_path, "./simple_model") - model.save('./keras_model') - loaded_model = tf.keras.models.load_model('./keras_model') + model.save("./keras_model") + loaded_model = tf.keras.models.load_model("./keras_model") assert isinstance(loaded_model, tf.keras.Model) - model.save('keras_model.h5') - loaded_model = tf.keras.models.load_model('keras_model.h5') + model.save("keras_model.h5") + loaded_model = tf.keras.models.load_model("keras_model.h5") assert isinstance(loaded_model, tf.keras.Model) root = model.save() loaded_model = tf.keras.models.load_model(root) assert isinstance(loaded_model, tf.keras.Model) - os.system('rm -rf simple_model') - os.system('rm -rf keras_model') - os.remove('keras_model.h5') - os.system('rm -rf '+root) + os.system("rm -rf simple_model") + os.system("rm -rf keras_model") + os.remove("keras_model.h5") + os.system("rm -rf " + root) - @unittest.skipIf(parse_version(tf.version.VERSION) < parse_version('2.4.0') or platform.system().lower() == "windows", "Only supports tf 2.4.0 or above") + @unittest.skipIf( + parse_version(tf.version.VERSION) < parse_version("2.4.0") or platform.system().lower() == "windows", + "Only supports tf 2.4.0 or above", + ) def test_saved_model(self): - ssd_resnet50_ckpt_url = 'http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz' - center_resnet50_saved_model_url = 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1?tf-hub-format=compressed' - dst_path = '/tmp/.neural_compressor/saved_model.tar.gz' - center_dst_path = '/tmp/.neural_compressor/center_saved_model.tar.gz' + ssd_resnet50_ckpt_url = "http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz" + center_resnet50_saved_model_url = ( + "https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1?tf-hub-format=compressed" + ) + dst_path = "/tmp/.neural_compressor/saved_model.tar.gz" + center_dst_path = "/tmp/.neural_compressor/center_saved_model.tar.gz" if not os.path.exists(dst_path): - os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {}".format(ssd_resnet50_ckpt_url, dst_path)) + os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {}".format(ssd_resnet50_ckpt_url, dst_path)) if not os.path.getsize(dst_path): os.system("rm -fr {0} && wget {1} -O {0}".format(dst_path, ssd_resnet50_ckpt_url)) if not os.path.exists(center_dst_path): - os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {}".format(center_resnet50_saved_model_url, center_dst_path)) + os.system( + "mkdir -p /tmp/.neural_compressor && wget {} -O {}".format( + center_resnet50_saved_model_url, center_dst_path + ) + ) if not os.path.getsize(center_dst_path): os.system("rm -fr {0} && wget {1} -O {0}".format(center_dst_path, center_resnet50_saved_model_url)) os.system("tar -xvf {}".format(dst_path)) - unzip_center_model = 'unzip_center_model' + unzip_center_model = "unzip_center_model" os.system("mkdir -p {} ".format(unzip_center_model)) - os.system("tar -xvf {} -C {}".format(center_dst_path,unzip_center_model)) + os.system("tar -xvf {} -C {}".format(center_dst_path, unzip_center_model)) + + model = Model("ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03/saved_model") + center_model = Model("unzip_center_model") - model = Model('ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03/saved_model') - center_model = Model('unzip_center_model') + from tensorflow.python.training.tracking.tracking import AutoTrackable - from tensorflow.python.training.tracking.tracking import AutoTrackable - assert isinstance(model.model, AutoTrackable), "The model getter of TensorflowSavedModelModel is not correctly run." + assert isinstance( + model.model, AutoTrackable + ), "The model getter of TensorflowSavedModelModel is not correctly run." + + from tensorflow.compat.v1 import graph_util - from tensorflow.compat.v1 import graph_util graph_def = graph_util.convert_variables_to_constants( - sess=model.sess, - input_graph_def=model.graph_def, - output_node_names=model.output_node_names) - + sess=model.sess, input_graph_def=model.graph_def, output_node_names=model.output_node_names + ) + model.graph_def = graph_def - tmp_saved_model_path = './tmp_saved_model' + tmp_saved_model_path = "./tmp_saved_model" if os.path.exists(tmp_saved_model_path): - os.system('rm -rf {}'.format(tmp_saved_model_path)) - os.system('mkdir -p {}'.format(tmp_saved_model_path)) - + os.system("rm -rf {}".format(tmp_saved_model_path)) + os.system("mkdir -p {}".format(tmp_saved_model_path)) + self.assertTrue(isinstance(model.graph_def, tf.compat.v1.GraphDef)) self.assertTrue(isinstance(model.graph, tf.compat.v1.Graph)) model.save(tmp_saved_model_path) # load again to make sure model can be loaded model = Model(tmp_saved_model_path) - os.system('rm -rf ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03') - os.system('rm -rf temp_saved_model') - os.system('rm -rf {}'.format(tmp_saved_model_path)) - + os.system("rm -rf ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03") + os.system("rm -rf temp_saved_model") + os.system("rm -rf {}".format(tmp_saved_model_path)) + center_graph_def = graph_util.convert_variables_to_constants( sess=center_model.sess, input_graph_def=center_model.graph_def, - output_node_names=center_model.output_node_names) - + output_node_names=center_model.output_node_names, + ) + center_model.graph_def = center_graph_def - + self.assertTrue(isinstance(center_model.graph_def, tf.compat.v1.GraphDef)) self.assertTrue(isinstance(center_model.graph, tf.compat.v1.Graph)) from neural_compressor.model.tensorflow_model import _get_graph_from_saved_model_v1 + graph_def, input_names, output_names = _get_graph_from_saved_model_v1(unzip_center_model) - assert graph_def is not None, 'Can not parse the saved model...' + assert graph_def is not None, "Can not parse the saved model..." from tensorflow.python.saved_model.loader_impl import parse_saved_model_with_debug_info + from neural_compressor.model.tensorflow_model import _contains_function_with_implements_attr + saved_model_proto, _ = parse_saved_model_with_debug_info(unzip_center_model) self.assertEqual(False, _contains_function_with_implements_attr(saved_model_proto)) - os.system('rm -rf unzip_center_model') - + os.system("rm -rf unzip_center_model") def test_tensorflow(self): from neural_compressor.model.tensorflow_model import TensorflowBaseModel + ori_model = build_graph() - self.assertEqual('tensorflow', get_model_fwk_name(ori_model)) - self.assertEqual('tensorflow', get_model_fwk_name(TensorflowBaseModel(ori_model))) + self.assertEqual("tensorflow", get_model_fwk_name(ori_model)) + self.assertEqual("tensorflow", get_model_fwk_name(TensorflowBaseModel(ori_model))) try: get_model_fwk_name([]) except AssertionError: pass try: - get_model_fwk_name('./model.pb') + get_model_fwk_name("./model.pb") except AssertionError: pass + def export_onnx_model(model, path): x = torch.randn(100, 3, 224, 224, requires_grad=True) torch_out = model(x) - torch.onnx.export(model, - x, - path, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ["input"], - output_names = ["output"], - dynamic_axes={"input" : {0 : "batch_size"}, - "output" : {0 : "batch_size"}}) + torch.onnx.export( + model, + x, + path, + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names=["input"], + output_names=["output"], + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, + ) + class TestONNXModel(unittest.TestCase): cnn_export_path = "cnn.onnx" cnn_model = torchvision.models.quantization.resnet18() - + @classmethod def setUpClass(self): cnn_model = torchvision.models.quantization.resnet18() @@ -402,92 +423,101 @@ def tearDownClass(self): os.remove(self.cnn_export_path) def test_model(self): - self.assertEqual('onnxruntime', get_model_fwk_name(self.cnn_export_path)) - model = MODELS['onnxruntime'](self.cnn_model) + self.assertEqual("onnxruntime", get_model_fwk_name(self.cnn_export_path)) + model = MODELS["onnxruntime"](self.cnn_model) self.assertEqual(True, isinstance(model, ONNXModel)) self.assertEqual(True, isinstance(model.model, onnx.ModelProto)) - model.save('test.onnx') - self.assertEqual(True, os.path.exists('test.onnx')) - os.remove('test.onnx') + model.save("test.onnx") + self.assertEqual(True, os.path.exists("test.onnx")) + os.remove("test.onnx") + class TestPyTorchModel(unittest.TestCase): def testPyTorch(self): import torchvision - from neural_compressor.model.torch_model import PyTorchModel, IPEXModel, PyTorchFXModel + + from neural_compressor.model.torch_model import IPEXModel, PyTorchFXModel, PyTorchModel + ori_model = torchvision.models.mobilenet_v2() - self.assertEqual('pytorch', get_model_fwk_name(ori_model)) + self.assertEqual("pytorch", get_model_fwk_name(ori_model)) pt_model = PyTorchModel(ori_model) pt_model.model = ori_model pt_model = PyTorchModel(torchvision.models.mobilenet_v2()) with self.assertRaises(AssertionError): - pt_model.workspace_path = './pytorch' - + pt_model.workspace_path = "./pytorch" + ipex_model = IPEXModel(ori_model) self.assertTrue(ipex_model.model) ipex_model.model = ori_model ipex_model = PyTorchModel(torchvision.models.mobilenet_v2()) with self.assertRaises(AssertionError): - ipex_model.workspace_path = './pytorch' - ipex_model.save('./') + ipex_model.workspace_path = "./pytorch" + ipex_model.save("./") + + self.assertEqual("pytorch", get_model_fwk_name(PyTorchModel(ori_model))) + self.assertEqual("pytorch", get_model_fwk_name(IPEXModel(ori_model))) + self.assertEqual("pytorch", get_model_fwk_name(PyTorchFXModel(ori_model))) - self.assertEqual('pytorch', get_model_fwk_name(PyTorchModel(ori_model))) - self.assertEqual('pytorch', get_model_fwk_name(IPEXModel(ori_model))) - self.assertEqual('pytorch', get_model_fwk_name(PyTorchFXModel(ori_model))) def load_mxnet_model(symbol_file, param_file): import mxnet as mx + symbol = mx.sym.load(symbol_file) save_dict = mx.nd.load(param_file) arg_params = {} aux_params = {} for k, v in save_dict.items(): - tp, name = k.split(':', 1) - if tp == 'arg': + tp, name = k.split(":", 1) + if tp == "arg": arg_params[name] = v return symbol, arg_params, aux_params + class TestMXNetModel(unittest.TestCase): @classmethod def setUpClass(self): if platform.system().lower() == "windows": self.skipTest(self, "not support mxnet on windows yet") - import mxnet.gluon.nn as nn import mxnet as mx + import mxnet.gluon.nn as nn + net = nn.HybridSequential() net.add(nn.Dense(128, activation="relu")) net.add(nn.Dense(64, activation="relu")) net.add(nn.Dense(10)) net.initialize() net.hybridize() - fake_data = mx.random.uniform(shape=(1,128,128)) + fake_data = mx.random.uniform(shape=(1, 128, 128)) net(fake_data) self.net = net @classmethod def tearDownClass(self): - os.remove('test-symbol.json') - os.remove('test-0000.params') - os.remove('test2-symbol.json') - os.remove('test2-0000.params') + os.remove("test-symbol.json") + os.remove("test-0000.params") + os.remove("test2-symbol.json") + os.remove("test2-0000.params") def test_model(self): import mxnet as mx - self.assertEqual('mxnet', get_model_fwk_name(self.net)) - model = MODELS['mxnet'](self.net) + + self.assertEqual("mxnet", get_model_fwk_name(self.net)) + model = MODELS["mxnet"](self.net) self.assertEqual(True, isinstance(model, MXNetModel)) self.assertEqual(True, isinstance(model.model, mx.gluon.HybridBlock)) - model.save('./test') - self.assertEqual(True, os.path.exists('test-symbol.json')) - self.assertEqual(True, os.path.exists('test-0000.params')) + model.save("./test") + self.assertEqual(True, os.path.exists("test-symbol.json")) + self.assertEqual(True, os.path.exists("test-0000.params")) - net = load_mxnet_model('test-symbol.json', 'test-0000.params') + net = load_mxnet_model("test-symbol.json", "test-0000.params") model.model = net self.assertEqual(True, isinstance(model.model[0], mx.symbol.Symbol)) - model.save('./test2') - self.assertEqual(True, os.path.exists('test2-symbol.json')) - self.assertEqual(True, os.path.exists('test2-0000.params')) + model.save("./test2") + self.assertEqual(True, os.path.exists("test2-symbol.json")) + self.assertEqual(True, os.path.exists("test2-0000.params")) + if __name__ == "__main__": unittest.main() diff --git a/test/model/test_model_pytorch.py b/test/model/test_model_pytorch.py index 42762367217..7b42ef63729 100644 --- a/test/model/test_model_pytorch.py +++ b/test/model/test_model_pytorch.py @@ -1,23 +1,26 @@ import os +import unittest + import torch import torchvision -import unittest +from packaging.version import Version + import neural_compressor.adaptor.pytorch as nc_torch +from neural_compressor import PostTrainingQuantConfig, quantization +from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear from neural_compressor.model import MODELS from neural_compressor.model import Model as INCModel from neural_compressor.model.torch_model import PyTorchModel -from packaging.version import Version -from neural_compressor import quantization, PostTrainingQuantConfig -from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear try: import intel_pytorch_extension as ipex + TEST_IPEX = True except: TEST_IPEX = False PT_VERSION = nc_torch.get_torch_version() -if PT_VERSION >= Version("1.8.0-rc1"): +if PT_VERSION >= Version("1.8.0-rc1"): FX_MODE = True else: FX_MODE = False @@ -40,7 +43,7 @@ def forward(self, x): class TestPytorchModel(unittest.TestCase): framework = "pytorch" model = torchvision.models.quantization.resnet18() - lpot_model = MODELS['pytorch'](model) + lpot_model = MODELS["pytorch"](model) def test_Model(self): model = torchvision.models.quantization.resnet18() @@ -57,42 +60,38 @@ def test_get_weight(self): if name == "fc.bias": param.data.fill_(0.1) assert int(torch.sum(self.lpot_model.get_weight("layer4.1.conv2.weight"))) == 0 - assert torch.allclose( - torch.sum( - torch.tensor(self.lpot_model.get_weight("fc.bias"))), - torch.tensor(100.)) + assert torch.allclose(torch.sum(torch.tensor(self.lpot_model.get_weight("fc.bias"))), torch.tensor(100.0)) def test_get_input(self): - model = MODELS['pytorch'](torchvision.models.quantization.resnet18()) + model = MODELS["pytorch"](torchvision.models.quantization.resnet18()) model.model.eval().fuse_model() model.register_forward_pre_hook() rand_input = torch.rand(100, 3, 256, 256).float() model.model(rand_input) - assert torch.equal(model.get_inputs('x'), rand_input) + assert torch.equal(model.get_inputs("x"), rand_input) model.remove_hooks() def test_update_weights(self): - self.lpot_model.update_weights('fc.bias', torch.zeros([1000])) + self.lpot_model.update_weights("fc.bias", torch.zeros([1000])) assert int(torch.sum(self.lpot_model.get_weight("fc.bias"))) == 0 def test_gradient(self): with self.assertRaises(AssertionError): - self.lpot_model.get_gradient('fc.bias') + self.lpot_model.get_gradient("fc.bias") shape = None for name, tensor in self.lpot_model._model.named_parameters(): - if name == 'fc.bias': + if name == "fc.bias": shape = tensor.shape tensor.grad = torch.randn(shape) break new_grad = torch.zeros(shape) - self.lpot_model.update_gradient('fc.bias', new_grad) - assert torch.equal(torch.tensor(self.lpot_model.get_gradient('fc.bias')), torch.zeros(shape)) + self.lpot_model.update_gradient("fc.bias", new_grad) + assert torch.equal(torch.tensor(self.lpot_model.get_gradient("fc.bias")), torch.zeros(shape)) rand_input = torch.rand(100, 3, 256, 256).float() rand_input.grad = torch.ones_like(rand_input) - assert torch.equal(torch.tensor(self.lpot_model.get_gradient(rand_input)), - torch.ones_like(rand_input)) + assert torch.equal(torch.tensor(self.lpot_model.get_gradient(rand_input)), torch.ones_like(rand_input)) def test_report_sparsity(self): df, total_sparsity = self.lpot_model.report_sparsity() @@ -103,29 +102,29 @@ def test_WeightOnlyLinear(self): model = Model() input = torch.randn(1, 30) conf = PostTrainingQuantConfig( - approach='weight_only', + approach="weight_only", ) q_model = quantization.fit(model, conf) out1 = q_model(input) - q_model.save('saved') - model_size1 = os.path.getsize('saved/best_model.pt')/1024 + q_model.save("saved") + model_size1 = os.path.getsize("saved/best_model.pt") / 1024 print("FP32 Model size:{:.3f}M".format(model_size1)) # test compress_bits = [8, 16, 32, 64] - compression_dtype = [torch.int8, torch.int16, torch.int32, torch.int64] + compression_dtype = [torch.int8, torch.int16, torch.int32, torch.int64] for dtype in compression_dtype: new_model = Model() inc_model = INCModel(new_model) inc_model.export_compressed_model( - qweight_config_path='saved/qconfig.json', + qweight_config_path="saved/qconfig.json", compression_dtype=dtype, ) out2 = q_model(input) - torch.save(inc_model.state_dict(), 'saved/tmp.pt') - model_size2 = os.path.getsize('saved/tmp.pt')/1024 + torch.save(inc_model.state_dict(), "saved/tmp.pt") + model_size2 = os.path.getsize("saved/tmp.pt") / 1024 print("WeightOnlyLinear Model size:{:.3f}M".format(model_size2)) self.assertTrue(isinstance(inc_model.model.fc1, WeightOnlyLinear)) - self.assertTrue(inc_model.model.fc1.packed_weight.dtype==dtype) - self.assertTrue(inc_model.model.fc1.scale.dtype==torch.float32) + self.assertTrue(inc_model.model.fc1.packed_weight.dtype == dtype) + self.assertTrue(inc_model.model.fc1.scale.dtype == torch.float32) self.assertTrue(model_size1 / model_size2 > 2) self.assertTrue(torch.all(torch.isclose(out1, out2, atol=5e-1))) @@ -135,22 +134,18 @@ def test_WeightOnlyLinear(self): new_model = Model() inc_model = INCModel(new_model) inc_model.export_compressed_model( - qweight_config_path='saved/qconfig.json', + qweight_config_path="saved/qconfig.json", compression_dim=dim, ) out2 = q_model(input) - torch.save(inc_model.state_dict(), 'saved/tmp.pt') - model_size2 = os.path.getsize('saved/tmp.pt')/1024 + torch.save(inc_model.state_dict(), "saved/tmp.pt") + model_size2 = os.path.getsize("saved/tmp.pt") / 1024 print("WeightOnlyLinear Model size:{:.3f}M".format(model_size2)) self.assertTrue(isinstance(inc_model.model.fc1, WeightOnlyLinear)) if dim == 1: - self.assertTrue( - inc_model.model.fc1.packed_weight.shape[0] == inc_model.model.fc1.out_features - ) + self.assertTrue(inc_model.model.fc1.packed_weight.shape[0] == inc_model.model.fc1.out_features) else: - self.assertTrue( - inc_model.model.fc1.packed_weight.shape[1] == inc_model.model.fc1.in_features - ) + self.assertTrue(inc_model.model.fc1.packed_weight.shape[1] == inc_model.model.fc1.in_features) self.assertTrue(model_size1 / model_size2 > 2) self.assertTrue(torch.all(torch.isclose(out1, out2, atol=5e-1))) @@ -158,15 +153,15 @@ def test_WeightOnlyLinear(self): new_model = Model() inc_model = INCModel(new_model) inc_model.export_compressed_model( - qweight_config_path='saved/qconfig.json', + qweight_config_path="saved/qconfig.json", scale_dtype=torch.float16, ) out2 = q_model(input) - torch.save(inc_model.state_dict(), 'saved/tmp.pt') - model_size2 = os.path.getsize('saved/tmp.pt')/1024 + torch.save(inc_model.state_dict(), "saved/tmp.pt") + model_size2 = os.path.getsize("saved/tmp.pt") / 1024 print("WeightOnlyLinear Model size:{:.3f}M".format(model_size2)) self.assertTrue(isinstance(inc_model.model.fc1, WeightOnlyLinear)) - self.assertTrue(inc_model.model.fc1.scale.dtype==torch.float16) + self.assertTrue(inc_model.model.fc1.scale.dtype == torch.float16) self.assertTrue(model_size1 / model_size2 > 2) self.assertTrue(torch.all(torch.isclose(out1, out2, atol=5e-1))) diff --git a/test/model/test_onnx_model.py b/test/model/test_onnx_model.py index 6c2813b681a..97a784f7315 100644 --- a/test/model/test_onnx_model.py +++ b/test/model/test_onnx_model.py @@ -1,15 +1,16 @@ -import sys import os -import onnx -from onnx import helper, TensorProto, numpy_helper import shutil import subprocess +import sys import unittest + import numpy as np +import onnx +from onnx import TensorProto, helper, numpy_helper +from neural_compressor import PostTrainingQuantConfig, quantization +from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.model.onnx_model import ONNXModel -from neural_compressor.data import Datasets, DATALOADERS -from neural_compressor import quantization, PostTrainingQuantConfig def get_onnx_model(): @@ -23,13 +24,12 @@ def get_onnx_model(): def generate_input_initializer(tensor_shape, tensor_dtype, input_name): - ''' - Helper function to generate initializers for test inputs - ''' + """Helper function to generate initializers for test inputs.""" tensor = np.random.ranf(tensor_shape).astype(tensor_dtype) init = numpy_helper.from_array(tensor, input_name) return init + class TestOnnxModel(unittest.TestCase): def setUp(self): # Relu @@ -43,24 +43,29 @@ def setUp(self): # | # Add - input0 = helper.make_tensor_value_info('input0', TensorProto.FLOAT, [1, 3, 1, 3]) - output = helper.make_tensor_value_info('output', TensorProto.FLOAT, [1, 3, 1, 3]) - - X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X1_weight') - X1_bias = generate_input_initializer([3], np.float32, 'X1_bias') - X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X3_weight') - X3_bias = generate_input_initializer([3],np.float32, 'X3_bias') - X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, 'X5_weight') - X5_bias = generate_input_initializer([3],np.float32,'X5_bias') - - relu_node_1 = onnx.helper.make_node('Relu', ['input0'], ['X1'], name='Relu1') - conv_node_1 = onnx.helper.make_node('Conv', ['X1', 'X1_weight', 'X1_bias'], ['X2'], name='Conv1') - relu_node_2 = onnx.helper.make_node('Relu', ['X2'], ['X3'], name= 'Relu2') - conv_node_2 = onnx.helper.make_node('Conv', ['X3', 'X3_weight', 'X3_bias'], ['X4'], name='Conv2') - conv_node_3 = onnx.helper.make_node('Conv', ['X1', 'X5_weight', 'X5_bias'], ['X5'], name='Conv3') - add_node = onnx.helper.make_node('Add', ['X4', 'X5'], ['output'], name='Add') - - graph = helper.make_graph([relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node], 'test_graph_6', [input0], [output]) + input0 = helper.make_tensor_value_info("input0", TensorProto.FLOAT, [1, 3, 1, 3]) + output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [1, 3, 1, 3]) + + X1_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X1_weight") + X1_bias = generate_input_initializer([3], np.float32, "X1_bias") + X3_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X3_weight") + X3_bias = generate_input_initializer([3], np.float32, "X3_bias") + X5_weight = generate_input_initializer([3, 3, 1, 1], np.float32, "X5_weight") + X5_bias = generate_input_initializer([3], np.float32, "X5_bias") + + relu_node_1 = onnx.helper.make_node("Relu", ["input0"], ["X1"], name="Relu1") + conv_node_1 = onnx.helper.make_node("Conv", ["X1", "X1_weight", "X1_bias"], ["X2"], name="Conv1") + relu_node_2 = onnx.helper.make_node("Relu", ["X2"], ["X3"], name="Relu2") + conv_node_2 = onnx.helper.make_node("Conv", ["X3", "X3_weight", "X3_bias"], ["X4"], name="Conv2") + conv_node_3 = onnx.helper.make_node("Conv", ["X1", "X5_weight", "X5_bias"], ["X5"], name="Conv3") + add_node = onnx.helper.make_node("Add", ["X4", "X5"], ["output"], name="Add") + + graph = helper.make_graph( + [relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node], + "test_graph_6", + [input0], + [output], + ) graph.initializer.add().CopyFrom(X1_weight) graph.initializer.add().CopyFrom(X1_bias) graph.initializer.add().CopyFrom(X3_weight) @@ -69,7 +74,7 @@ def setUp(self): graph.initializer.add().CopyFrom(X5_bias) model = helper.make_model(graph) - test_model_path = './test_model_6.onnx' + test_model_path = "./test_model_6.onnx" onnx.save(model, test_model_path) model = onnx.load(test_model_path) self.model = ONNXModel(model) @@ -79,32 +84,58 @@ def setUp(self): # QLinearConv # | # DequantizeLinear - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - A_scale = helper.make_tensor_value_info('A_scale', TensorProto.FLOAT, [1]) - a_scale = generate_input_initializer([1], np.float32, 'A_scale') - A_zero = helper.make_tensor_value_info('A_zero_point', TensorProto.INT8, [1]) - a_zero_point = generate_input_initializer([1], np.int8, 'A_zero_point') - B_scale = helper.make_tensor_value_info('B_scale', TensorProto.FLOAT, [1]) - b_scale = generate_input_initializer([1], np.float32, 'B_scale') - B_zero = helper.make_tensor_value_info('B_zero_point', TensorProto.INT8, [1]) - b_zero_point = generate_input_initializer([1], np.int8, 'B_zero_point') - C = helper.make_tensor_value_info('C', TensorProto.INT8, [1, 1, 5, 5]) - c = generate_input_initializer([1, 1, 5, 5], np.int8, 'C') - C_scale = helper.make_tensor_value_info('C_scale', TensorProto.FLOAT, [1]) - c_scale = generate_input_initializer([1], np.float32, 'C_scale') - C_zero = helper.make_tensor_value_info('C_zero_point', TensorProto.INT8, [1]) - c_zero_point = generate_input_initializer([1], np.int8, 'C_zero_point') - E = helper.make_tensor_value_info('E', TensorProto.INT32, [1]) - e = generate_input_initializer([1], np.int32, 'E') - D_scale = helper.make_tensor_value_info('D_scale', TensorProto.FLOAT, [1]) - d_scale = generate_input_initializer([1], np.float32, 'D_scale') - D_zero = helper.make_tensor_value_info('D_zero_point', TensorProto.INT8, [1]) - d_zero_point = generate_input_initializer([1], np.int8, 'D_zero_point') - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 1, 5, 5]) - quantize_node = onnx.helper.make_node('QuantizeLinear', ['A', 'A_scale', 'A_zero_point'], ['B_quantized'], name='A_QuantizeLinear') - conv_node = onnx.helper.make_node('QLinearConv', ['B_quantized', 'B_scale', 'B_zero_point', 'C_quantized', 'C_scale', 'C_zero_point', 'D_scale', 'D_zero_point', 'E'], ['D_quantized'], name='conv_quant', kernel_shape=[3, 3], pads=[1, 1, 1, 1]) - dequantize_node = onnx.helper.make_node('DequantizeLinear', ['D_quantized', 'D_scale', 'D_zero_point'], ['D'], name='D_DequantizeLinear') - graph = helper.make_graph([quantize_node, conv_node, dequantize_node], 'test_graph_7', [A, A_scale, A_zero, C, C_scale, C_zero, E, D_scale, D_zero], [D]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + A_scale = helper.make_tensor_value_info("A_scale", TensorProto.FLOAT, [1]) + a_scale = generate_input_initializer([1], np.float32, "A_scale") + A_zero = helper.make_tensor_value_info("A_zero_point", TensorProto.INT8, [1]) + a_zero_point = generate_input_initializer([1], np.int8, "A_zero_point") + B_scale = helper.make_tensor_value_info("B_scale", TensorProto.FLOAT, [1]) + b_scale = generate_input_initializer([1], np.float32, "B_scale") + B_zero = helper.make_tensor_value_info("B_zero_point", TensorProto.INT8, [1]) + b_zero_point = generate_input_initializer([1], np.int8, "B_zero_point") + C = helper.make_tensor_value_info("C", TensorProto.INT8, [1, 1, 5, 5]) + c = generate_input_initializer([1, 1, 5, 5], np.int8, "C") + C_scale = helper.make_tensor_value_info("C_scale", TensorProto.FLOAT, [1]) + c_scale = generate_input_initializer([1], np.float32, "C_scale") + C_zero = helper.make_tensor_value_info("C_zero_point", TensorProto.INT8, [1]) + c_zero_point = generate_input_initializer([1], np.int8, "C_zero_point") + E = helper.make_tensor_value_info("E", TensorProto.INT32, [1]) + e = generate_input_initializer([1], np.int32, "E") + D_scale = helper.make_tensor_value_info("D_scale", TensorProto.FLOAT, [1]) + d_scale = generate_input_initializer([1], np.float32, "D_scale") + D_zero = helper.make_tensor_value_info("D_zero_point", TensorProto.INT8, [1]) + d_zero_point = generate_input_initializer([1], np.int8, "D_zero_point") + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 1, 5, 5]) + quantize_node = onnx.helper.make_node( + "QuantizeLinear", ["A", "A_scale", "A_zero_point"], ["B_quantized"], name="A_QuantizeLinear" + ) + conv_node = onnx.helper.make_node( + "QLinearConv", + [ + "B_quantized", + "B_scale", + "B_zero_point", + "C_quantized", + "C_scale", + "C_zero_point", + "D_scale", + "D_zero_point", + "E", + ], + ["D_quantized"], + name="conv_quant", + kernel_shape=[3, 3], + pads=[1, 1, 1, 1], + ) + dequantize_node = onnx.helper.make_node( + "DequantizeLinear", ["D_quantized", "D_scale", "D_zero_point"], ["D"], name="D_DequantizeLinear" + ) + graph = helper.make_graph( + [quantize_node, conv_node, dequantize_node], + "test_graph_7", + [A, A_scale, A_zero, C, C_scale, C_zero, E, D_scale, D_zero], + [D], + ) graph.initializer.add().CopyFrom(a_scale) graph.initializer.add().CopyFrom(a_zero_point) graph.initializer.add().CopyFrom(b_scale) @@ -130,38 +161,42 @@ def setUp(self): # | # Add - input = onnx.helper.make_tensor_value_info('input', onnx.TensorProto.FLOAT, [2, 4]) - - W1 = onnx.helper.make_tensor_value_info('W1', onnx.TensorProto.FLOAT, [4, 5]) - w1 = generate_input_initializer([4, 5], np.float32, 'W1') - B1 = onnx.helper.make_tensor_value_info('b1', onnx.TensorProto.FLOAT, [5]) - b1 = generate_input_initializer([5], np.float32, 'b1') - shape = numpy_helper.from_array(np.array((2, 5)).astype(np.int64), name='shape') - W2 = onnx.helper.make_tensor_value_info('W2', onnx.TensorProto.FLOAT, [5, 6]) - w2 = generate_input_initializer([5, 6], np.float32, 'W2') - B2 = onnx.helper.make_tensor_value_info('b2', onnx.TensorProto.FLOAT, [6]) - b2 = generate_input_initializer([6], np.float32, 'b2') - output = onnx.helper.make_tensor_value_info('output', onnx.TensorProto.FLOAT, [2, 6]) - - node1 = onnx.helper.make_node('MatMul', inputs=['input', 'W1'], outputs=['y1']) - node2 = onnx.helper.make_node('Add', inputs=['y1', 'b1'], outputs=['y1_add_b1']) - node3 = onnx.helper.make_node('Reshape', inputs=['y1_add_b1', 'shape'], outputs=['y2']) - node4 = onnx.helper.make_node('Reshape', inputs=['y2', 'shape'], outputs=['y3']) - node5 = onnx.helper.make_node('MatMul', inputs=['y3', 'W2'], outputs=['y4']) - node6 = onnx.helper.make_node('Add', inputs=['y4', 'b2'], outputs=['output']) - - graph = onnx.helper.make_graph([node1, node2, node3, node4, node5, node6], 'test_matmul_reshape_graph', [input, W1, B1, W2, B2], [output]) + input = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, [2, 4]) + + W1 = onnx.helper.make_tensor_value_info("W1", onnx.TensorProto.FLOAT, [4, 5]) + w1 = generate_input_initializer([4, 5], np.float32, "W1") + B1 = onnx.helper.make_tensor_value_info("b1", onnx.TensorProto.FLOAT, [5]) + b1 = generate_input_initializer([5], np.float32, "b1") + shape = numpy_helper.from_array(np.array((2, 5)).astype(np.int64), name="shape") + W2 = onnx.helper.make_tensor_value_info("W2", onnx.TensorProto.FLOAT, [5, 6]) + w2 = generate_input_initializer([5, 6], np.float32, "W2") + B2 = onnx.helper.make_tensor_value_info("b2", onnx.TensorProto.FLOAT, [6]) + b2 = generate_input_initializer([6], np.float32, "b2") + output = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [2, 6]) + + node1 = onnx.helper.make_node("MatMul", inputs=["input", "W1"], outputs=["y1"]) + node2 = onnx.helper.make_node("Add", inputs=["y1", "b1"], outputs=["y1_add_b1"]) + node3 = onnx.helper.make_node("Reshape", inputs=["y1_add_b1", "shape"], outputs=["y2"]) + node4 = onnx.helper.make_node("Reshape", inputs=["y2", "shape"], outputs=["y3"]) + node5 = onnx.helper.make_node("MatMul", inputs=["y3", "W2"], outputs=["y4"]) + node6 = onnx.helper.make_node("Add", inputs=["y4", "b2"], outputs=["output"]) + + graph = onnx.helper.make_graph( + [node1, node2, node3, node4, node5, node6], "test_matmul_reshape_graph", [input, W1, B1, W2, B2], [output] + ) graph.initializer.add().CopyFrom(w1) graph.initializer.add().CopyFrom(b1) graph.initializer.add().CopyFrom(w2) graph.initializer.add().CopyFrom(b2) graph.initializer.add().CopyFrom(shape) - model = onnx.helper.make_model(graph, **{'opset_imports': [onnx.helper.make_opsetid('', 14)]}) + model = onnx.helper.make_model(graph, **{"opset_imports": [onnx.helper.make_opsetid("", 14)]}) self.matmul_reshape_model = model - cmd = 'optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation gptj/' - p = subprocess.Popen(cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) # nosec + cmd = "optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation gptj/" + p = subprocess.Popen( + cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) # nosec p.communicate() @classmethod @@ -172,19 +207,15 @@ def tearDownClass(self): def test_hf_model(self): from optimum.onnxruntime import ORTModelForCausalLM from transformers import AutoConfig, AutoTokenizer - os.mkdir('hf_test') - model = ONNXModel('gptj/decoder_model.onnx') - model.save('./hf_test/decoder_model.onnx') - self.assertTrue(os.path.exists('hf_test/config.json')) - - config = AutoConfig.from_pretrained('hf_test') - sessions = ORTModelForCausalLM.load_model('hf_test/decoder_model.onnx') - model = ORTModelForCausalLM( - sessions[0], - config, - 'hf_test', - use_cache=False, - use_io_binding=False) + + os.mkdir("hf_test") + model = ONNXModel("gptj/decoder_model.onnx") + model.save("./hf_test/decoder_model.onnx") + self.assertTrue(os.path.exists("hf_test/config.json")) + + config = AutoConfig.from_pretrained("hf_test") + sessions = ORTModelForCausalLM.load_model("hf_test/decoder_model.onnx") + model = ORTModelForCausalLM(sessions[0], config, "hf_test", use_cache=False, use_io_binding=False) self.assertNotEqual(model, None) def test_nodes(self): @@ -197,11 +228,10 @@ def test_nodes(self): def test_initializer(self): self.assertEqual(len(self.model.initializer()), 6) inits_name = [init.name for init in self.model.initializer()] - inits = ['X1_weight', 'X1_bias', 'X3_weight', 'X3_bias', 'X5_weight', 'X5_bias'] + inits = ["X1_weight", "X1_bias", "X3_weight", "X3_bias", "X5_weight", "X5_bias"] for init in inits: self.assertTrue(init in inits_name) - def test_remove_node(self): for node in self.model.nodes(): if node.op_type == "Add": @@ -225,24 +255,26 @@ def test_remove_nodes(self): self.assertTrue(node in nodes_name) def test_add_node(self): - node_to_add = onnx.helper.make_node('Relu', ['output'], ['output1'], keepdims=0) + node_to_add = onnx.helper.make_node("Relu", ["output"], ["output1"], keepdims=0) self.model.add_node(node_to_add) last_node = self.model.nodes()[-1] - self.assertEqual(last_node.op_type, 'Relu') + self.assertEqual(last_node.op_type, "Relu") def test_add_nodes(self): nodes_to_add = [] for i in range(2): - node_to_add = onnx.helper.make_node('Relu', ["add_node{}_input".format(str(i))], ["add_node{}_output".format(str(i))], keepdims=0) + node_to_add = onnx.helper.make_node( + "Relu", ["add_node{}_input".format(str(i))], ["add_node{}_output".format(str(i))], keepdims=0 + ) nodes_to_add.append(node_to_add) self.model.add_nodes(nodes_to_add) - self.assertEqual(self.model.nodes()[-1].input, ['add_node1_input']) - self.assertEqual(self.model.nodes()[-2].input, ['add_node0_input']) - self.assertEqual(self.model.nodes()[-1].output, ['add_node1_output']) - self.assertEqual(self.model.nodes()[-2].output, ['add_node0_output']) + self.assertEqual(self.model.nodes()[-1].input, ["add_node1_input"]) + self.assertEqual(self.model.nodes()[-2].input, ["add_node0_input"]) + self.assertEqual(self.model.nodes()[-1].output, ["add_node1_output"]) + self.assertEqual(self.model.nodes()[-2].output, ["add_node0_output"]) def test_get_initializer(self): - inits = ['X1_weight', 'X1_bias', 'X3_weight', 'X3_bias', 'X5_weight', 'X5_bias'] + inits = ["X1_weight", "X1_bias", "X3_weight", "X3_bias", "X5_weight", "X5_bias"] for init in inits: self.assertIsNotNone(self.model.get_initializer(init)) @@ -252,7 +284,7 @@ def test_remove_initializer(self): self.model.remove_initializer(init) self.assertEqual(len(self.model.initializer()), 5) inits_name = [init.name for init in self.model.initializer()] - inits = ['X1_bias', 'X3_weight', 'X3_bias', 'X5_weight', 'X5_bias'] + inits = ["X1_bias", "X3_weight", "X3_bias", "X5_weight", "X5_bias"] for init in inits: self.assertTrue(init in inits_name) @@ -264,21 +296,21 @@ def test_remove_initializers(self): self.model.remove_initializers(init_to_remove) self.assertEqual(len(self.model.initializer()), 3) inits_name = [init.name for init in self.model.initializer()] - inits = ['X1_weight', 'X3_weight', 'X5_weight'] + inits = ["X1_weight", "X3_weight", "X5_weight"] for init in inits: self.assertTrue(init in inits_name) def test_input_name_to_nodes(self): self.assertEqual(len(self.model.input_name_to_nodes), 12) ipts_name = [name for name in self.model.input_name_to_nodes] - ipts = ['input0', 'X1', 'X2', 'X3', 'X3_weight', 'X3_bias','X5_weight', 'X5_bias', 'X4', 'X5'] + ipts = ["input0", "X1", "X2", "X3", "X3_weight", "X3_bias", "X5_weight", "X5_bias", "X4", "X5"] for ipt in ipts: self.assertTrue(ipt in ipts_name) def test_output_name_to_node(self): self.assertEqual(len(self.model.output_name_to_node), 6) opts_name = [name for name in self.model.output_name_to_node] - opts = ['X1', 'X2', 'X3', 'X4', 'X5', 'output'] + opts = ["X1", "X2", "X3", "X4", "X5", "output"] for opt in opts: self.assertTrue(opt in opts_name) @@ -333,7 +365,9 @@ def test_find_nodes_by_initializer(self): def test_get_scale_zero(self): import time + result = [0.1] + def sub_eval(model, result): time.sleep(0.001 * len(result)) return result[0] @@ -341,40 +375,38 @@ def sub_eval(model, result): def eval(model): return sub_eval(model, result) - dataset = Datasets("onnxrt_qdq")["dummy"]((4, 4), low=0., high=0., dtype='float32') + dataset = Datasets("onnxrt_qdq")["dummy"]((4, 4), low=0.0, high=0.0, dtype="float32") dataloader = DATALOADERS["onnxrt_qdq"](dataset, 2) config = PostTrainingQuantConfig() - q_model = quantization.fit(self.matmul_reshape_model, config, - calib_dataloader=dataloader, eval_func=eval) - q_model.save('test.onnx') - scale, zp = q_model.get_scale_zero('y3_QuantizeInput_quantized') - self.assertEqual(scale.name, 'y1_add_b1_scale') - self.assertEqual(zp.name, 'y1_add_b1_zero_point') - - scale, zp = q_model.get_scale_zero('input_quantized') - self.assertEqual(scale.name, 'input_scale') - self.assertEqual(zp.name, 'input_zero_point') + q_model = quantization.fit(self.matmul_reshape_model, config, calib_dataloader=dataloader, eval_func=eval) + q_model.save("test.onnx") + scale, zp = q_model.get_scale_zero("y3_QuantizeInput_quantized") + self.assertEqual(scale.name, "y1_add_b1_scale") + self.assertEqual(zp.name, "y1_add_b1_zero_point") + scale, zp = q_model.get_scale_zero("input_quantized") + self.assertEqual(scale.name, "input_scale") + self.assertEqual(zp.name, "input_zero_point") def test_save(self): - self.model.save_model_to_file('./test_model_6.onnx', use_external_data_format=True) + self.model.save_model_to_file("./test_model_6.onnx", use_external_data_format=True) def test_find_by_name(self): - from neural_compressor.adaptor.ox_utils.util import find_by_name, dtype_to_name, dtype_mapping - initializer = find_by_name('X1_weight', self.model.initializer()) + from neural_compressor.adaptor.ox_utils.util import dtype_mapping, dtype_to_name, find_by_name + + initializer = find_by_name("X1_weight", self.model.initializer()) self.assertIsNotNone(initializer) - initializer = find_by_name('X1', self.model.initializer()) + initializer = find_by_name("X1", self.model.initializer()) self.assertIsNone(initializer) def test_remove_unused_nodes(self): self.assertEqual(len(self.model.nodes()), 6) - node_to_add = onnx.helper.make_node('Relu', ['output1'], ['output2'], keepdims=0, name='added_relu') + node_to_add = onnx.helper.make_node("Relu", ["output1"], ["output2"], keepdims=0, name="added_relu") self.model.add_node(node_to_add) self.assertEqual(len(self.model.nodes()), 7) self.model.remove_unused_nodes() self.assertEqual(len(self.model.nodes()), 6) - if __name__ == "__main__": unittest.main() diff --git a/test/model/test_tensorflow_auto_input_output.py b/test/model/test_tensorflow_auto_input_output.py index 935242d6d05..d0ba38be92f 100644 --- a/test/model/test_tensorflow_auto_input_output.py +++ b/test/model/test_tensorflow_auto_input_output.py @@ -1,29 +1,37 @@ # # -*- coding: utf-8 -*- # -import unittest import os import platform +import unittest + from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor from neural_compressor.model import Model as TensorflowModel from neural_compressor.model.tensorflow_model import validate_graph_node + class TestTFAutoDetectInputOutput(unittest.TestCase): - mb_model_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb' - pb_path = '/tmp/.neural_compressor/mobilenet_fp32.pb' + mb_model_url = ( + "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb" + ) + pb_path = "/tmp/.neural_compressor/mobilenet_fp32.pb" platform = platform.system().lower() if platform == "windows": - pb_path = 'C:\\tmp\\.neural_compressor\\mobilenet_fp32.pb' + pb_path = "C:\\tmp\\.neural_compressor\\mobilenet_fp32.pb" + @classmethod def setUpClass(self): self.saved_flag = True if not os.path.exists(self.pb_path): try: if self.platform == "linux": - os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.mb_model_url, self.pb_path)) + os.system( + "mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.mb_model_url, self.pb_path) + ) elif self.platform == "windows": - os.system('md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor') + os.system("md C:\\tmp\.neural_compressor && cd C:\\tmp\.neural_compressor") from urllib import request + request.urlretrieve(self.mb_model_url) except Exception as e: self.saved_flag = False @@ -39,5 +47,6 @@ def testAutoDetectInputOutput(self): input_validate = validate_graph_node(model.graph_def, inputs) self.assertTrue(input_validate) + if __name__ == "__main__": unittest.main() diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index 90b16d6a625..7c5d263d916 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -1,17 +1,17 @@ import os import shutil import unittest + import numpy as np import torch from neural_compressor.conf.config import NASConfig from neural_compressor.data import Datasets from neural_compressor.experimental import NAS, common -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import \ - PyTorchDataLoader +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']): +def build_fake_yaml(approach=None, search_algorithm=None, metrics=["acc"]): fake_yaml = """ model: name: imagenet_nas @@ -51,13 +51,14 @@ def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']): shape: [32, 3, 64, 64] label: True """ % ( - 'approach: \'{}\''.format(approach) if approach else '', - 'search_algorithm: \'{}\''.format(search_algorithm) if search_algorithm else '', - 'metrics: [{}]'.format(','.join(['\'{}\''.format(m) for m in metrics])) if metrics else '' + "approach: '{}'".format(approach) if approach else "", + "search_algorithm: '{}'".format(search_algorithm) if search_algorithm else "", + "metrics: [{}]".format(",".join(["'{}'".format(m) for m in metrics])) if metrics else "", ) - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_dynas_fake_yaml(): fake_yaml = """ model: @@ -73,9 +74,10 @@ def build_dynas_fake_yaml(): metrics: ['accuracy_top1', 'macs'] results_csv_path: './search_results.csv' """ - with open('dynas_fake.yaml', 'w', encoding="utf-8") as f: + with open("dynas_fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_dynas_results_csv(): results_csv = """ Sub-network,Date,Latency (ms), MACs,Top-1 Acc (%) @@ -91,12 +93,13 @@ def build_dynas_results_csv(): "{'wid': None, 'ks': [3, 7, 5, 5, 7, 3, 5, 3, 5, 5, 5, 3, 5, 5, 3, 5, 7, 3, 7, 5], 'e': [3, 4, 6, 6, 4, 3, 6, 6, 6, 3, 3, 3, 3, 6, 3, 6, 6, 3, 6, 3], 'd': [3, 2, 3, 2, 3], 'r': [224]}",2022-07-07 03:29:00.989578,36,369186480,77.096 "{'wid': None, 'ks': [7, 7, 5, 5, 7, 5, 3, 3, 3, 5, 7, 3, 7, 7, 5, 5, 3, 7, 3, 7], 'e': [6, 3, 6, 3, 4, 3, 3, 3, 4, 3, 6, 4, 3, 3, 6, 4, 4, 3, 4, 3], 'd': [4, 4, 3, 4, 4], 'r': [224]}",2022-07-07 03:31:07.608402,51,518341312,78.104 """ - with open('search_results.csv', 'w', encoding="utf-8") as f: + with open("search_results.csv", "w", encoding="utf-8") as f: f.write(results_csv) + def model_builder(model_arch_params): - channels = model_arch_params['channels'] - dimensions = model_arch_params['dimensions'] + channels = model_arch_params["channels"] + dimensions = model_arch_params["dimensions"] return ConvNet(channels, dimensions) @@ -119,7 +122,6 @@ def forward(self, inputs): class TestNAS(unittest.TestCase): - @classmethod def setUpClass(cls): build_fake_yaml() @@ -128,24 +130,24 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - os.remove('dynas_fake.yaml') - os.remove('search_results.csv') - shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fake.yaml") + os.remove("dynas_fake.yaml") + os.remove("search_results.csv") + shutil.rmtree(os.path.join(os.getcwd(), "NASResults"), ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_basic_nas(self): # Built-in train, evaluation - nas_agent = NAS('fake.yaml') - nas_agent.model_builder = \ - lambda model_arch_params:common.Model(model_builder(model_arch_params)) + nas_agent = NAS("fake.yaml") + nas_agent.model_builder = lambda model_arch_params: common.Model(model_builder(model_arch_params)) best_model_archs = nas_agent() self.assertTrue(len(best_model_archs) > 0) # Customized train, evaluation - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(32, 3, 64, 64), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(32, 3, 64, 64), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) + def train_func(model): epochs = 2 iters = 10 @@ -155,7 +157,7 @@ def train_func(model): model.train() cnt = 0 for image, target in dummy_dataloader: - print('.', end='') + print(".", end="") cnt += 1 output = model(image).unsqueeze(dim=0) loss = criterion(output, target) @@ -164,19 +166,20 @@ def train_func(model): optimizer.step() if cnt >= iters: break + def eval_func(model): model.eval() acc = 0 for image, target in dummy_dataloader: output = model(image).cpu().detach().numpy() - acc += np.sum(output==target) - return {'acc': acc / len(dummy_dataset)} + acc += np.sum(output == target) + return {"acc": acc / len(dummy_dataset)} - for approach, search_algorithm in [(None, None), ('basic', 'grid'), ('basic', 'random'), ('basic', 'bo')]: - print('{fix}Search algorithm: {msg}{fix}'.format(msg=search_algorithm, fix='='*30)) - search_space = {'channels': [16, 32], 'dimensions': [32]} + for approach, search_algorithm in [(None, None), ("basic", "grid"), ("basic", "random"), ("basic", "bo")]: + print("{fix}Search algorithm: {msg}{fix}".format(msg=search_algorithm, fix="=" * 30)) + search_space = {"channels": [16, 32], "dimensions": [32]} nas_config = NASConfig(approach=approach, search_space=search_space, search_algorithm=search_algorithm) - nas_config.usr_cfg.model.framework = 'pytorch' + nas_config.usr_cfg.model.framework = "pytorch" nas_agent = NAS(nas_config) nas_agent.model_builder = model_builder nas_agent.train_func = train_func @@ -185,16 +188,19 @@ def eval_func(model): self.assertTrue(len(best_model_archs) > 0) def test_dynas(self): - nas_agent = NAS('dynas_fake.yaml') - for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]: - config = NASConfig(approach='dynas', search_algorithm=search_algorithm) + nas_agent = NAS("dynas_fake.yaml") + for search_algorithm, supernet in [ + ("nsga2", "ofa_mbv3_d234_e346_k357_w1.2"), + ("age", "ofa_mbv3_d234_e346_k357_w1.2"), + ]: + config = NASConfig(approach="dynas", search_algorithm=search_algorithm) config.dynas.supernet = supernet - config.dynas.metrics = ['params', 'latency'] + config.dynas.metrics = ["params", "latency"] config.dynas.population = 10 config.dynas.num_evals = 10 config.nas.search.seed = 71 config.dynas.batch_size = 64 - config.dynas.results_csv_path = 'search_results.csv' + config.dynas.results_csv_path = "search_results.csv" nas_agent = NAS(config) best_model_archs = nas_agent.search() self.assertTrue(len(best_model_archs) == config.dynas.population) diff --git a/test/neural_coder/test_common.py b/test/neural_coder/test_common.py index a110479ae94..72142aa18e5 100644 --- a/test/neural_coder/test_common.py +++ b/test/neural_coder/test_common.py @@ -2,6 +2,7 @@ from neural_coder.utils import common + class TestCommon(unittest.TestCase): def test_move_element_to_front(self): f = common.move_element_to_front @@ -14,5 +15,6 @@ def test_move_element_to_front(self): self.assertEqual(f(["a", "b", "c", "d"], "d"), ["d", "a", "b", "c"]) self.assertEqual(f(["ab", "a", "ac", "ad"], "a"), ["a", "ab", "ac", "ad"]) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/neural_coder/test_line_operation.py b/test/neural_coder/test_line_operation.py index 6cd3dd4807a..822621bb39d 100644 --- a/test/neural_coder/test_line_operation.py +++ b/test/neural_coder/test_line_operation.py @@ -2,6 +2,7 @@ from neural_coder.utils import line_operation + class TestLineOperation(unittest.TestCase): def test_get_line_indent_level(self): f = line_operation.get_line_indent_level @@ -40,5 +41,6 @@ def test_of_definition_format(self): self.assertEqual(f("model = Net()"), (True, "model", "Net")) self.assertEqual(f("model = Net"), (False, "", "")) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/objective/test_objective.py b/test/objective/test_objective.py index 7da0dbc530d..1af4ea668b4 100644 --- a/test/objective/test_objective.py +++ b/test/objective/test_objective.py @@ -1,14 +1,16 @@ -"""Tests for neural_compressor quantization""" -import unittest -import os +"""Tests for neural_compressor quantization.""" import importlib +import os +import random import shutil -import yaml +import unittest + import numpy as np -import random +import yaml + def build_fake_yaml_footprint(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -28,14 +30,15 @@ def build_fake_yaml_footprint(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_footprint.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_footprint.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_yaml_model_size(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -55,14 +58,15 @@ def build_fake_yaml_model_size(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_model_size.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_model_size.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -81,165 +85,178 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session(graph=graph) as sess: - x = tf.placeholder(tf.float64, shape=(1, 256, 256, 1), name='x') - y = tf.constant(np.random.random((2, 2, 1, 1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 256, 256, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: import tensorflow as tf + graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session(graph=graph) as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1, 256, 256, 1), name='x') - y = tf.compat.v1.constant(np.random.random((3, 3, 1, 1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 256, 256, 1), name="x") + y = tf.compat.v1.constant(np.random.random((3, 3, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def build_fake_model1(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session(graph=graph) as sess: - x = tf.placeholder(tf.float64, shape=(1, 256, 256, 1), name='x') - y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_1') - y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_2') - conv1 = tf.nn.conv2d(input=x, filter=y_1, strides=[1, 1, 1, 1], \ - padding='VALID', name='conv1') - op = tf.nn.conv2d(input=conv1, filter=y_2, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 256, 256, 1), name="x") + y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_1") + y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_2") + conv1 = tf.nn.conv2d(input=x, filter=y_1, strides=[1, 1, 1, 1], padding="VALID", name="conv1") + op = tf.nn.conv2d(input=conv1, filter=y_2, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: import tensorflow as tf + graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session(graph=graph) as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1, 256, 256, 1), name='x') - y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_1') - y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name='y_2') - conv1 = tf.nn.conv2d(input=x, filters=y_1, strides=[1, 1, 1, 1], \ - padding='VALID', name='conv1') - op = tf.nn.conv2d(input=conv1, filters=y_2, strides=[1, 1, 1, 1], \ - padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 256, 256, 1), name="x") + y_1 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_1") + y_2 = tf.constant(np.random.random((3, 3, 1, 1)), name="y_2") + conv1 = tf.nn.conv2d(input=x, filters=y_1, strides=[1, 1, 1, 1], padding="VALID", name="conv1") + op = tf.nn.conv2d(input=conv1, filters=y_2, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def build_fake_strategy(): - with open(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), \ - 'experimental/strategy/fake.py'), 'w', encoding='utf-8') as f: - seq = ["import time \n", - "import copy \n", - "import numpy as np \n", - "from collections import OrderedDict \n", - "from .strategy import strategy_registry, TuneStrategy \n", - "from ...utils import logger \n", - "from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler \n", - "from .utils.tuning_structs import OpTuningConfig \n", - "import copy \n", - "@strategy_registry \n", - "class FakeTuneStrategy(TuneStrategy): \n", - " def __init__(self, model, cfg, q_dataloader, q_func=None, eval_dataloader=None, \n", - " eval_func=None, dicts=None, q_hooks=None): \n", - " self.id = 0 \n", - " self.resume = True if dicts else False \n", - " super(FakeTuneStrategy, self).__init__(model, cfg, q_dataloader, \n", - " q_func, eval_dataloader, eval_func, dicts) \n", - " def __getstate__(self): \n", - " for history in self.tuning_history: \n", - " if self._same_yaml(history['cfg'], self.cfg): \n", - " history['id'] = self.id \n", - " save_dict = super(FakeTuneStrategy, self).__getstate__() \n", - " return save_dict \n", - " def next_tune_cfg(self): \n", - " if self.resume: \n", - " #assert self.id == 1 \n", - " assert len(self.tuning_history) == 1 \n", - " history = self.tuning_history[0] \n", - " assert self._same_yaml(history['cfg'], self.cfg) \n", - " assert len(history['history']) \n", - " for h in history['history']: \n", - " assert h \n", - " from copy import deepcopy \n", - " tuning_space = self.tuning_space \n", - " initial_op_tuning_cfg = {} \n", - " for item in tuning_space.root_item.options: \n", - " if item.item_type == 'op': \n", - " op_name, op_type = item.name \n", - " initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) \n", - " calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options \n", - " for calib_sampling_size in calib_sampling_size_lst: \n", - " # step1. collect the ops that support static and dynamic \n", - " quant_mode_wise_items = OrderedDict() \n", - " query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] \n", - " pre_items = set() \n", - " for quant_mode in query_order: \n", - " items = tuning_space.query_items_by_quant_mode(quant_mode) \n", - " filtered_items = [item for item in items if item not in pre_items] \n", - " pre_items = pre_items.union(set(items)) \n", - " quant_mode_wise_items[quant_mode] = filtered_items \n", - " def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): \n", - " for item in items_lst: \n", - " op_item_dtype_dict[item.name] = target_quant_mode \n", - " op_item_dtype_dict = OrderedDict() \n", - " for quant_mode, quant_mode_items in quant_mode_wise_items.items(): \n", - " initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) \n", - " # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) \n", - " early_stop_tuning = False \n", - " stage1_cnt = 0 \n", - " int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] \n", - " stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value \n", - " op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], \n", - " op_item_dtype_dict, initial_op_tuning_cfg) \n", - " for op_tuning_cfg in op_wise_tuning_sampler: \n", - " stage1_cnt += 1 \n", - " if early_stop_tuning and stage1_cnt > stage1_max: \n", - " logger.info('Early stopping the stage 1.') \n", - " break \n", - " op_tuning_cfg['calib_sampling_size'] = calib_sampling_size \n", - " self.id += 1 \n", - " yield op_tuning_cfg \n",] + with open( + os.path.join( + os.path.dirname(importlib.util.find_spec("neural_compressor").origin), "experimental/strategy/fake.py" + ), + "w", + encoding="utf-8", + ) as f: + seq = [ + "import time \n", + "import copy \n", + "import numpy as np \n", + "from collections import OrderedDict \n", + "from .strategy import strategy_registry, TuneStrategy \n", + "from ...utils import logger \n", + "from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler \n", + "from .utils.tuning_structs import OpTuningConfig \n", + "import copy \n", + "@strategy_registry \n", + "class FakeTuneStrategy(TuneStrategy): \n", + " def __init__(self, model, cfg, q_dataloader, q_func=None, eval_dataloader=None, \n", + " eval_func=None, dicts=None, q_hooks=None): \n", + " self.id = 0 \n", + " self.resume = True if dicts else False \n", + " super(FakeTuneStrategy, self).__init__(model, cfg, q_dataloader, \n", + " q_func, eval_dataloader, eval_func, dicts) \n", + " def __getstate__(self): \n", + " for history in self.tuning_history: \n", + " if self._same_yaml(history['cfg'], self.cfg): \n", + " history['id'] = self.id \n", + " save_dict = super(FakeTuneStrategy, self).__getstate__() \n", + " return save_dict \n", + " def next_tune_cfg(self): \n", + " if self.resume: \n", + " #assert self.id == 1 \n", + " assert len(self.tuning_history) == 1 \n", + " history = self.tuning_history[0] \n", + " assert self._same_yaml(history['cfg'], self.cfg) \n", + " assert len(history['history']) \n", + " for h in history['history']: \n", + " assert h \n", + " from copy import deepcopy \n", + " tuning_space = self.tuning_space \n", + " initial_op_tuning_cfg = {} \n", + " for item in tuning_space.root_item.options: \n", + " if item.item_type == 'op': \n", + " op_name, op_type = item.name \n", + " initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) \n", + " calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options \n", + " for calib_sampling_size in calib_sampling_size_lst: \n", + " # step1. collect the ops that support static and dynamic \n", + " quant_mode_wise_items = OrderedDict() \n", + " query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] \n", + " pre_items = set() \n", + " for quant_mode in query_order: \n", + " items = tuning_space.query_items_by_quant_mode(quant_mode) \n", + " filtered_items = [item for item in items if item not in pre_items] \n", + " pre_items = pre_items.union(set(items)) \n", + " quant_mode_wise_items[quant_mode] = filtered_items \n", + " def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): \n", + " for item in items_lst: \n", + " op_item_dtype_dict[item.name] = target_quant_mode \n", + " op_item_dtype_dict = OrderedDict() \n", + " for quant_mode, quant_mode_items in quant_mode_wise_items.items(): \n", + " initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) \n", + " # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) \n", + " early_stop_tuning = False \n", + " stage1_cnt = 0 \n", + " int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] \n", + " stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value \n", + " op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], \n", + " op_item_dtype_dict, initial_op_tuning_cfg) \n", + " for op_tuning_cfg in op_wise_tuning_sampler: \n", + " stage1_cnt += 1 \n", + " if early_stop_tuning and stage1_cnt > stage1_max: \n", + " logger.info('Early stopping the stage 1.') \n", + " break \n", + " op_tuning_cfg['calib_sampling_size'] = calib_sampling_size \n", + " self.id += 1 \n", + " yield op_tuning_cfg \n", + ] f.writelines(seq) f.close() + class TestObjective(unittest.TestCase): @classmethod def setUpClass(self): @@ -252,77 +269,87 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml_model_size.yaml') - os.remove('fake_yaml_footprint.yaml') - os.remove(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'experimental/strategy/fake.py')) - shutil.rmtree('./saved', ignore_errors=True) + os.remove("fake_yaml.yaml") + os.remove("fake_yaml_model_size.yaml") + os.remove("fake_yaml_footprint.yaml") + os.remove( + os.path.join( + os.path.dirname(importlib.util.find_spec("neural_compressor").origin), "experimental/strategy/fake.py" + ) + ) + shutil.rmtree("./saved", ignore_errors=True) def test_performance(self): from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy']((100, 256, 256, 1), label=True) + + dataset = Datasets("tensorflow")["dummy"]((100, 256, 256, 1), label=True) from neural_compressor.experimental import Quantization, common from neural_compressor.utils.utility import get_size - quantizer = Quantization('fake_yaml.yaml') + quantizer = Quantization("fake_yaml.yaml") quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph q_model = quantizer.fit() from neural_compressor.experimental import Benchmark, common - benchmarker = Benchmark('fake_yaml.yaml') + + benchmarker = Benchmark("fake_yaml.yaml") benchmarker.b_dataloader = common.DataLoader(dataset) benchmarker.model = self.constant_graph_1 - benchmarker.fit(mode='accuracy') + benchmarker.fit(mode="accuracy") def test_model_size(self): - from neural_compressor.experimental import Benchmark, common from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy']((100, 256, 256, 1), label=True) + from neural_compressor.experimental import Benchmark, common - benchmarker = Benchmark('fake_yaml_model_size.yaml') + dataset = Datasets("tensorflow")["dummy"]((100, 256, 256, 1), label=True) + + benchmarker = Benchmark("fake_yaml_model_size.yaml") benchmarker.b_dataloader = common.DataLoader(dataset) benchmarker.model = self.constant_graph_1 - benchmarker(mode='accuracy') + benchmarker(mode="accuracy") def test_footprint(self): - from neural_compressor.experimental import Benchmark, common from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy']((100, 256, 256, 1), label=True) + from neural_compressor.experimental import Benchmark, common - benchmarker = Benchmark('fake_yaml_footprint.yaml') + dataset = Datasets("tensorflow")["dummy"]((100, 256, 256, 1), label=True) + + benchmarker = Benchmark("fake_yaml_footprint.yaml") benchmarker.b_dataloader = common.DataLoader(dataset) benchmarker.model = self.constant_graph_1 - benchmarker.fit(mode='accuracy') + benchmarker.fit(mode="accuracy") + def build_matmul_model(): - from onnx import helper, TensorProto - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 1, 5, 5]) - B = helper.make_tensor_value_info('B', TensorProto.FLOAT, [1, 1, 5, 1]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 1, 5, 1]) - matmul_node = helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') - graph = helper.make_graph([matmul_node], 'test_graph_1', [A, B], [C]) + from onnx import TensorProto, helper + + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 5, 5]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 5, 1]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 1, 5, 1]) + matmul_node = helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") + graph = helper.make_graph([matmul_node], "test_graph_1", [A, B], [C]) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model -class TestObjs(unittest.TestCase): +class TestObjs(unittest.TestCase): def test_model(self): def eval(model): return random.random() - + model = build_matmul_model() - + from neural_compressor.conf.config import conf from neural_compressor.experimental import Quantization - - conf.model.framework = 'onnxrt_integerops' - conf.quantization.approach = 'post_training_dynamic_quant' + + conf.model.framework = "onnxrt_integerops" + conf.quantization.approach = "post_training_dynamic_quant" conf.tuning.accuracy_criterion.absolute = 0.3 - conf.tuning.multi_objectives.objective = ['accuracy', 'performance'] + conf.tuning.multi_objectives.objective = ["accuracy", "performance"] conf.tuning.multi_objectives.weight = [0.8, 0.2] conf.tuning.exit_policy.timeout = 10000 conf.tuning.exit_policy.max_trials = 2 @@ -333,259 +360,286 @@ def eval(model): def test_tune_data(self): from neural_compressor.objective import MultiObjective + obj = MultiObjective( - objectives=['accuracy', 'modelsize', 'performance'], - accuracy_criterion={'relative': 0.1}, + objectives=["accuracy", "modelsize", "performance"], + accuracy_criterion={"relative": 0.1}, obj_criterion=[True, False, False], - obj_weight=[0.7, 0.2, 0.1]) + obj_weight=[0.7, 0.2, 0.1], + ) baseline = [0.8, [0.8, 780, 0.6]] tune_data = [ [0.760, [0.760, 400, 0.23]], [0.778, [0.778, 420, 0.24]], [0.750, [0.750, 430, 0.22]], [0.720, [0.720, 410, 0.18]], - [0.790, [0.790, 360, 0.15]], + [0.790, [0.790, 360, 0.15]], [0.750, [0.750, 430, 0.24]], - [0.785, [0.785, 360, 0.13]]] + [0.785, [0.785, 360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 4) - obj = MultiObjective(['accuracy', 'modelsize', 'performance'], - {'relative': 0.1}, - obj_criterion=[True, False, False]) + obj = MultiObjective( + ["accuracy", "modelsize", "performance"], {"relative": 0.1}, obj_criterion=[True, False, False] + ) baseline = [0.8, [0.8, 780, 0.6]] tune_data = [ [0.760, [0.760, 400, 0.23]], [0.778, [0.778, 420, 0.24]], [0.750, [0.750, 430, 0.22]], [0.720, [0.720, 410, 0.18]], - [0.790, [0.790, 360, 0.15]], + [0.790, [0.790, 360, 0.15]], [0.750, [0.750, 430, 0.24]], - [0.785, [0.785, 360, 0.13]]] + [0.785, [0.785, 360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 6) - obj = MultiObjective(['accuracy', 'modelsize', 'performance'], - {'absolute': 0.3}, - obj_criterion=[True, False, False]) + obj = MultiObjective( + ["accuracy", "modelsize", "performance"], {"absolute": 0.3}, obj_criterion=[True, False, False] + ) baseline = [0.8, [0.8, 780, 0.6]] tune_data = [ [0.760, [0.760, 400, 0.23]], [0.778, [0.778, 420, 0.24]], [0.750, [0.750, 430, 0.22]], [0.720, [0.720, 410, 0.18]], - [0.790, [0.790, 360, 0.15]], + [0.790, [0.790, 360, 0.15]], [0.750, [0.750, 430, 0.24]], - [0.785, [0.785, 360, 0.13]]] + [0.785, [0.785, 360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 6) obj = MultiObjective( - objectives=['accuracy', 'modelsize', 'performance'], - accuracy_criterion={'absolute': 0.3}, + objectives=["accuracy", "modelsize", "performance"], + accuracy_criterion={"absolute": 0.3}, obj_criterion=[True, False, False], - obj_weight=[0.6, 0.1, 0.3]) + obj_weight=[0.6, 0.1, 0.3], + ) baseline = [0.8, [0.8, 780, 0.6]] tune_data = [ [0.760, [0.760, 400, 0.23]], [0.778, [0.778, 400, 0.24]], [0.750, [0.750, 400, 0.22]], [0.720, [0.720, 400, 0.18]], - [0.790, [0.790, 400, 0.15]], + [0.790, [0.790, 400, 0.15]], [0.750, [0.750, 400, 0.24]], - [0.785, [0.785, 400, 0.13]]] + [0.785, [0.785, 400, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 6) - obj = MultiObjective(['accuracy', 'modelsize', 'performance'], - {'absolute': 0.04, 'higher_is_better': False}, - obj_weight=[0.6, 0.1, 0.3]) + obj = MultiObjective( + ["accuracy", "modelsize", "performance"], + {"absolute": 0.04, "higher_is_better": False}, + obj_weight=[0.6, 0.1, 0.3], + ) baseline = [0.75, [0.75, 780, 0.6]] tune_data = [ [0.760, [0.760, 400, 0.23]], [0.778, [0.778, 400, 0.10]], [0.750, [0.750, 400, 0.22]], [0.720, [0.720, 400, 0.18]], - [0.790, [0.790, 400, 0.15]], + [0.790, [0.790, 400, 0.15]], [0.750, [0.750, 400, 0.24]], - [0.785, [0.785, 400, 0.13]]] + [0.785, [0.785, 400, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 3) - obj = MultiObjective(['accuracy', 'modelsize', 'performance'], - {'absolute': 0.4, 'higher_is_better': False}, - obj_weight=[0.6, 0.1, 0.3]) + obj = MultiObjective( + ["accuracy", "modelsize", "performance"], + {"absolute": 0.4, "higher_is_better": False}, + obj_weight=[0.6, 0.1, 0.3], + ) baseline = [0.0, [0.0, 780, 0.6]] tune_data = [ [0.00, [0.00, 400, 0.23]], [0.80, [0.80, 400, 0.10]], [0.02, [0.02, 400, 0.22]], [0.10, [0.10, 400, 0.18]], - [0.20, [0.20, 400, 0.15]], + [0.20, [0.20, 400, 0.15]], [0.00, [0.00, 400, 0.24]], - [0.50, [0.50, 400, 0.13]]] + [0.50, [0.50, 400, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 0) - - obj = MultiObjective(['modelsize', 'performance'], - {'relative': 0.08}, - obj_criterion=[False], - obj_weight=[0.2, 0.8]) + + obj = MultiObjective( + ["modelsize", "performance"], {"relative": 0.08}, obj_criterion=[False], obj_weight=[0.2, 0.8] + ) baseline = [0.8, [780, 0.6]] tune_data = [ [0.760, [400, 0.23]], [0.778, [420, 0.24]], [0.750, [430, 0.22]], [0.720, [410, 0.18]], - [0.790, [360, 0.15]], + [0.790, [360, 0.15]], [0.750, [430, 0.24]], - [0.785, [360, 0.13]]] + [0.785, [360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 6) def test_multi_obj_metric(self): from neural_compressor.objective import MultiObjective - obj = MultiObjective(['accuracy', 'modelsize', 'performance'], - {'relative': 0.04, 'higher_is_better': True}, - metric_criterion=[True, True], - metric_weight=[0., 1.], - obj_criterion=[True, False, False], - obj_weight=[0.6, 0.1, 0.3]) + + obj = MultiObjective( + ["accuracy", "modelsize", "performance"], + {"relative": 0.04, "higher_is_better": True}, + metric_criterion=[True, True], + metric_weight=[0.0, 1.0], + obj_criterion=[True, False, False], + obj_weight=[0.6, 0.1, 0.3], + ) baseline = [[0.75, 0.4], [[0.75, 0.4], 780, 0.6]] tune_data = [ [[0.760, 0.4], [[0.760, 0.4], 400, 0.23]], [[0.778, 0.3], [[0.778, 0.3], 400, 0.10]], [[0.750, 0.3], [[0.750, 0.3], 400, 0.22]], [[0.720, 0.3], [[0.720, 0.3], 400, 0.18]], - [[0.790, 0.3], [[0.790, 0.3], 400, 0.15]], + [[0.790, 0.3], [[0.790, 0.3], 400, 0.15]], [[0.750, 0.3], [[0.750, 0.3], 400, 0.24]], - [[0.785, 0.3], [[0.785, 0.3], 400, 0.13]]] + [[0.785, 0.3], [[0.785, 0.3], 400, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 0) - obj = MultiObjective(['accuracy', 'modelsize', 'performance'], - {'absolute': 0.4, 'higher_is_better': False}, - metric_criterion=[False, True], - obj_weight=[0.6, 0.1, 0.3]) + obj = MultiObjective( + ["accuracy", "modelsize", "performance"], + {"absolute": 0.4, "higher_is_better": False}, + metric_criterion=[False, True], + obj_weight=[0.6, 0.1, 0.3], + ) baseline = [[0.0, 0.9], [[0.0, 0.9], 780, 0.6]] tune_data = [ [[0.00, 0.9], [[0.00, 0.9], 400, 0.23]], [[0.80, 0.8], [[0.80, 0.8], 400, 0.10]], [[0.02, 0.7], [[0.02, 0.7], 400, 0.22]], [[0.10, 0.6], [[0.10, 0.6], 400, 0.18]], - [[0.20, 0.7], [[0.20, 0.7], 400, 0.15]], + [[0.20, 0.7], [[0.20, 0.7], 400, 0.15]], [[0.00, 0.7], [[0.00, 0.7], 400, 0.24]], - [[0.50, 0.7], [[0.50, 0.7], 400, 0.13]]] + [[0.50, 0.7], [[0.50, 0.7], 400, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 0) - - obj = MultiObjective(['modelsize', 'performance'], - {'relative': 0.08}, - metric_criterion=[True, True], - metric_weight=[0.5,0.5], - obj_weight=[0.2, 0.8]) + + obj = MultiObjective( + ["modelsize", "performance"], + {"relative": 0.08}, + metric_criterion=[True, True], + metric_weight=[0.5, 0.5], + obj_weight=[0.2, 0.8], + ) baseline = [[0.8, 0.1], [780, 0.6]] tune_data = [ [[0.760, 0.093], [400, 0.23]], [[0.778, 0.094], [420, 0.24]], [[0.750, 0.092], [430, 0.22]], [[0.720, 0.093], [410, 0.18]], - [[0.790, 0.093], [360, 0.15]], + [[0.790, 0.093], [360, 0.15]], [[0.750, 0.093], [430, 0.24]], - [[0.785, 0.060], [360, 0.13]]] + [[0.785, 0.060], [360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 6) - obj = MultiObjective(['modelsize', 'performance'], - {'absolute': 0.013}, - metric_criterion=[True, True], - metric_weight=[0.5,0.5], - obj_weight=[0.2, 0.8]) + obj = MultiObjective( + ["modelsize", "performance"], + {"absolute": 0.013}, + metric_criterion=[True, True], + metric_weight=[0.5, 0.5], + obj_weight=[0.2, 0.8], + ) baseline = [[0.8, 0.1], [780, 0.6]] tune_data = [ [[0.760, 0.093], [400, 0.23]], [[0.778, 0.094], [420, 0.24]], [[0.750, 0.092], [430, 0.22]], [[0.720, 0.093], [410, 0.18]], - [[0.790, 0.093], [360, 0.15]], + [[0.790, 0.093], [360, 0.15]], [[0.750, 0.093], [430, 0.24]], - [[0.785, 0.060], [360, 0.13]]] + [[0.785, 0.060], [360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 4) - obj = MultiObjective(['modelsize', 'performance'], - {'relative': 0.08}, - metric_criterion=[True, True], - obj_weight=[0.2, 0.8]) + obj = MultiObjective( + ["modelsize", "performance"], {"relative": 0.08}, metric_criterion=[True, True], obj_weight=[0.2, 0.8] + ) baseline = [[0.8, 0.1], [780, 0.6]] tune_data = [ [[0.760, 0.093], [400, 0.23]], [[0.778, 0.094], [420, 0.24]], [[0.750, 0.092], [430, 0.22]], [[0.720, 0.093], [410, 0.18]], - [[0.790, 0.093], [360, 0.15]], + [[0.790, 0.093], [360, 0.15]], [[0.750, 0.093], [430, 0.24]], - [[0.785, 0.060], [360, 0.13]]] + [[0.785, 0.060], [360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 4) - obj = MultiObjective(['modelsize', 'performance'], - {'absolute': 0.06}, - metric_criterion=[True, True], - obj_weight=[0.2, 0.8]) + obj = MultiObjective( + ["modelsize", "performance"], {"absolute": 0.06}, metric_criterion=[True, True], obj_weight=[0.2, 0.8] + ) baseline = [[0.8, 0.1], [780, 0.6]] tune_data = [ [[0.760, 0.093], [400, 0.23]], [[0.778, 0.094], [420, 0.24]], [[0.750, 0.092], [430, 0.22]], [[0.720, 0.093], [410, 0.18]], - [[0.790, 0.093], [360, 0.15]], + [[0.790, 0.093], [360, 0.15]], [[0.750, 0.093], [430, 0.24]], - [[0.785, 0.060], [360, 0.13]]] + [[0.785, 0.060], [360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 6) - obj = MultiObjective(['modelsize', 'performance'], - {'relative': 0.08}, - metric_criterion=[True, False], - obj_weight=[0.2, 0.8]) + obj = MultiObjective( + ["modelsize", "performance"], {"relative": 0.08}, metric_criterion=[True, False], obj_weight=[0.2, 0.8] + ) baseline = [[0.8, 0.1], [780, 0.6]] tune_data = [ [[0.760, 0.093], [400, 0.23]], [[0.778, 0.094], [420, 0.24]], [[0.750, 0.092], [430, 0.22]], [[0.720, 0.093], [410, 0.18]], - [[0.790, 0.093], [360, 0.15]], + [[0.790, 0.093], [360, 0.15]], [[0.750, 0.093], [430, 0.24]], - [[0.785, 0.060], [360, 0.13]]] + [[0.785, 0.060], [360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 6) - obj = MultiObjective(['modelsize', 'performance'], - {'absolute': 0.07}, - metric_criterion=[True, False], - obj_weight=[0.2, 0.8]) + obj = MultiObjective( + ["modelsize", "performance"], {"absolute": 0.07}, metric_criterion=[True, False], obj_weight=[0.2, 0.8] + ) baseline = [[0.8, 0.1], [780, 0.6]] tune_data = [ [[0.760, 0.093], [400, 0.23]], [[0.778, 0.094], [420, 0.24]], [[0.750, 0.092], [430, 0.22]], [[0.720, 0.093], [410, 0.18]], - [[0.790, 0.093], [360, 0.15]], + [[0.790, 0.093], [360, 0.15]], [[0.750, 0.093], [430, 0.24]], - [[0.785, 0.060], [360, 0.13]]] + [[0.785, 0.060], [360, 0.13]], + ] num, _ = obj.best_result(tune_data, baseline) self.assertEqual(num, 6) + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_1.x_v1/test_gradient_sensitivity.py b/test/pruning_with_pt/pruning_1.x_v1/test_gradient_sensitivity.py index 3fab75346be..05ab9b1e918 100644 --- a/test/pruning_with_pt/pruning_1.x_v1/test_gradient_sensitivity.py +++ b/test/pruning_with_pt/pruning_1.x_v1/test_gradient_sensitivity.py @@ -1,12 +1,13 @@ import os import shutil import unittest -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.data import Datasets import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor.data import Datasets +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader def build_fake_yaml(): @@ -101,9 +102,10 @@ def build_fake_yaml(): timeout: 0 # tuning timeout (seconds) random_seed: 9527 # random seed """ - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_fake_yaml_unstructured(): fake_yaml_unstructured = """ model: @@ -134,33 +136,36 @@ def build_fake_yaml_unstructured(): metric: topk: 1 """ - with open('fake_unstructured.yaml', 'w', encoding="utf-8") as f: + with open("fake_unstructured.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml_unstructured) -class TestGradientSensitivity(unittest.TestCase): +class TestGradientSensitivity(unittest.TestCase): @classmethod def setUpClass(cls): build_fake_yaml() @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - + os.remove("fake.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_gradient_sensitivity(self): from neural_compressor.experimental import Pruning, common - prune = Pruning('fake.yaml') + + prune = Pruning("fake.yaml") from transformers import BertForSequenceClassification - model = BertForSequenceClassification.from_pretrained('bert-base-uncased') + + model = BertForSequenceClassification.from_pretrained("bert-base-uncased") def training_func_for_nc(model): - inputs = {'input_ids': torch.rand([1,12]).long(), - 'attention_mask': torch.rand([1,12]).long(), - 'labels': torch.tensor([1]).long()} + inputs = { + "input_ids": torch.rand([1, 12]).long(), + "attention_mask": torch.rand([1, 12]).long(), + "labels": torch.tensor([1]).long(), + } model.eval() # To calculate head prune @@ -189,6 +194,7 @@ def eval_func_for_nc(model): self.assertEqual(bertlayer.intermediate.dense.weight.shape, (600, 768)) self.assertEqual(bertlayer.output.dense.weight.shape, (768, 600)) + class TestGradientSensitivityUnstructured(unittest.TestCase): cv_model = torchvision.models.resnet18() @@ -198,15 +204,16 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake_unstructured.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fake_unstructured.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_unstructured_pruning(self): from neural_compressor.experimental import Pruning, common - prune_cv = Pruning('fake_unstructured.yaml') - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(100, 3, 224, 224), low=0., high=1., label=True) + + prune_cv = Pruning("fake_unstructured.yaml") + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(100, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) def training_func_for_cv(model): @@ -221,7 +228,7 @@ def training_func_for_cv(model): prune_cv.on_epoch_begin(nepoch) for image, target in dummy_dataloader: prune_cv.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -233,6 +240,7 @@ def training_func_for_cv(model): break prune_cv.on_epoch_end() prune_cv.on_train_end() + prune_cv.model = self.cv_model prune_cv.pruning_func = training_func_for_cv prune_cv.eval_dataloader = dummy_dataloader @@ -242,12 +250,9 @@ def training_func_for_cv(model): # assert sparsity ratio conv1_weight = self.cv_model.layer1[0].conv1.weight conv2_weight = self.cv_model.layer1[0].conv2.weight - self.assertAlmostEqual((conv1_weight == 0).sum().item() / conv1_weight.numel(), - 0.8, - delta=0.01) - self.assertAlmostEqual((conv2_weight == 0).sum().item() / conv2_weight.numel(), - 0.48, - delta=0.01) + self.assertAlmostEqual((conv1_weight == 0).sum().item() / conv1_weight.numel(), 0.8, delta=0.01) + self.assertAlmostEqual((conv2_weight == 0).sum().item() / conv2_weight.numel(), 0.48, delta=0.01) + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_1.x_v1/test_pattern_lock.py b/test/pruning_with_pt/pruning_1.x_v1/test_pattern_lock.py index a759820ab1e..8b00cbf7bb3 100644 --- a/test/pruning_with_pt/pruning_1.x_v1/test_pattern_lock.py +++ b/test/pruning_with_pt/pruning_1.x_v1/test_pattern_lock.py @@ -3,11 +3,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision -from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset + def build_fake_yaml(): fake_yaml = """ @@ -28,7 +29,7 @@ def build_fake_yaml(): metric: topk: 1 """ - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) @@ -41,17 +42,18 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fake.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_pattern_lock(self): from neural_compressor.experimental import Pruning, common - prune = Pruning('fake.yaml') + + prune = Pruning("fake.yaml") weight = self.model.layer1[0].conv1.weight mask = torch.ones(weight.numel()) - mask[:round(weight.numel()*0.9)] = .0 + mask[: round(weight.numel() * 0.9)] = 0.0 mask = mask[torch.randperm(mask.numel())].view(weight.shape) weight.data = weight * mask @@ -71,7 +73,7 @@ def training_func_for_nc(model): prune.on_epoch_begin(nepoch) for i, (image, target) in enumerate(dummy_dataloader): prune.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -82,6 +84,7 @@ def training_func_for_nc(model): if cnt >= iters: break prune.on_epoch_end() + dummy_dataset = DummyDataset(tuple([100, 3, 256, 256]), label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.model = self.model diff --git a/test/pruning_with_pt/pruning_1.x_v1/test_pruning_experimental.py b/test/pruning_with_pt/pruning_1.x_v1/test_pruning_experimental.py index c7d319eef9b..11bd2031344 100644 --- a/test/pruning_with_pt/pruning_1.x_v1/test_pruning_experimental.py +++ b/test/pruning_with_pt/pruning_1.x_v1/test_pruning_experimental.py @@ -3,12 +3,13 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.experimental.pruning import Pruning # old API +from neural_compressor.experimental.pruning import Pruning # old API + def build_fake_yaml_basic(): fake_snip_yaml = """ @@ -59,9 +60,10 @@ def build_fake_yaml_basic(): sparsity_decay_type: "cube" """ - with open('fake_snip.yaml', 'w', encoding="utf-8") as f: + with open("fake_snip.yaml", "w", encoding="utf-8") as f: f.write(fake_snip_yaml) + def build_fake_yaml_channel(): fake_channel_pruning_yaml = """ model: @@ -111,12 +113,11 @@ def build_fake_yaml_channel(): """ - with open('fake_channel_pruning.yaml', 'w', encoding="utf-8") as f: + with open("fake_channel_pruning.yaml", "w", encoding="utf-8") as f: f.write(fake_channel_pruning_yaml) class TestPytorchPruning(unittest.TestCase): - model = torchvision.models.resnet18() @classmethod @@ -124,13 +125,12 @@ def setUpClass(cls): build_fake_yaml_basic() build_fake_yaml_channel() - @classmethod def tearDownClass(cls): - os.remove('fake_channel_pruning.yaml') - os.remove('fake_snip.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fake_channel_pruning.yaml") + os.remove("fake_snip.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_pytorch_pruning_basic(self): prune = Pruning("fake_snip.yaml") @@ -139,8 +139,8 @@ def test_pytorch_pruning_basic(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.prepare() @@ -173,8 +173,8 @@ def test_pytorch_pruner_channel_pruning(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.prepare() @@ -197,5 +197,6 @@ def test_pytorch_pruner_channel_pruning(self): prune.on_epoch_end() + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_1.x_v1/test_pruning_group_lasso.py b/test/pruning_with_pt/pruning_1.x_v1/test_pruning_group_lasso.py index 73c9c4d70b6..58e06ed8b1d 100644 --- a/test/pruning_with_pt/pruning_1.x_v1/test_pruning_group_lasso.py +++ b/test/pruning_with_pt/pruning_1.x_v1/test_pruning_group_lasso.py @@ -3,12 +3,13 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader + def build_fake_yaml(): fake_yaml = """ model: @@ -72,12 +73,11 @@ def build_fake_yaml(): shape: [128, 3, 224, 224] label: True """ - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) class TestPruningGroupLasso(unittest.TestCase): - model = torchvision.models.resnet18() @classmethod @@ -86,16 +86,18 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fake.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_pruning_internal(self): from neural_compressor.experimental import Pruning, common - prune = Pruning('fake.yaml') + + prune = Pruning("fake.yaml") prune.model = self.model _ = prune() + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_1.x_v1/test_pruning_pattern.py b/test/pruning_with_pt/pruning_1.x_v1/test_pruning_pattern.py index f46c6363886..0b4f0ff73b6 100644 --- a/test/pruning_with_pt/pruning_1.x_v1/test_pruning_pattern.py +++ b/test/pruning_with_pt/pruning_1.x_v1/test_pruning_pattern.py @@ -1,13 +1,13 @@ -import random import copy - +import random import unittest + import numpy as np from neural_compressor.experimental.pruning_recipes.patterns import patterns -class TestPruningPattern(unittest.TestCase): +class TestPruningPattern(unittest.TestCase): tensor_4d = np.random.random([560, 560, 3, 3]) tensor_2d = np.random.random([1280, 640]) @@ -19,9 +19,9 @@ def test_tile_pattern(self): for mask_shape in [(1, 1), (2, 2), (1, 16), (4, 1), (1, 2)]: m0 = mask_shape[0] m1 = mask_shape[1] - pattern = patterns['tile_pattern_{}x{}'.format(m0, m1)]() + pattern = patterns["tile_pattern_{}x{}".format(m0, m1)]() new_shape = [shape[0] / m0] + [size // shape[0] / m1] - sparse_tensor = self.sparsify_tensor(tensor, [m0,m1], 0.2) + sparse_tensor = self.sparsify_tensor(tensor, [m0, m1], 0.2) reduced_tensor = pattern.reduce(sparse_tensor) self.assertEqual(list(reduced_tensor.shape), new_shape) self.assertAlmostEqual(pattern.compute_sparsity(sparse_tensor), 0.2, delta=0.01) @@ -31,10 +31,12 @@ def test_tile_pattern(self): def sparsify_tensor(self, tensor, mask_shape, ratio): tensor = copy.deepcopy(tensor) - for i in range(tensor.shape[0]//mask_shape[0]): - for j in range(tensor.shape[1]//mask_shape[1]): + for i in range(tensor.shape[0] // mask_shape[0]): + for j in range(tensor.shape[1] // mask_shape[1]): if random.random() < ratio: - tensor[i*mask_shape[0]:(i+1)*mask_shape[0], j*mask_shape[1]:(j+1)*mask_shape[1], ...] = 0 + tensor[ + i * mask_shape[0] : (i + 1) * mask_shape[0], j * mask_shape[1] : (j + 1) * mask_shape[1], ... + ] = 0 return tensor diff --git a/test/pruning_with_pt/pruning_1.x_v1/test_pruning_pure_yaml.py b/test/pruning_with_pt/pruning_1.x_v1/test_pruning_pure_yaml.py index b8b19dd36db..acb8c58e1d3 100644 --- a/test/pruning_with_pt/pruning_1.x_v1/test_pruning_pure_yaml.py +++ b/test/pruning_with_pt/pruning_1.x_v1/test_pruning_pure_yaml.py @@ -3,12 +3,13 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader + def build_fake_yaml(): fake_yaml = """ model: @@ -64,12 +65,11 @@ def build_fake_yaml(): shape: [128, 3, 224, 224] label: True """ - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) class TestPruning(unittest.TestCase): - model = torchvision.models.resnet18() @classmethod @@ -78,16 +78,18 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fake.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_pruning_internal(self): from neural_compressor.experimental import Pruning, common - prune = Pruning('fake.yaml') + + prune = Pruning("fake.yaml") prune.model = self.model _ = prune() + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_1.x_v2/test_pruning.py b/test/pruning_with_pt/pruning_1.x_v2/test_pruning.py index 947c44c6348..1cae122fe73 100644 --- a/test/pruning_with_pt/pruning_1.x_v2/test_pruning.py +++ b/test/pruning_with_pt/pruning_1.x_v2/test_pruning.py @@ -1,11 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning @@ -15,28 +16,16 @@ class TestPruning(unittest.TestCase): def test_pruning_basic(self): local_configs = [ { - "op_names": ['layer1.*'], - 'target_sparsity': 0.5, - "pattern": '8x2', + "op_names": ["layer1.*"], + "target_sparsity": 0.5, + "pattern": "8x2", "pruning_type": "magnitude_progressive", - "false_key": "this is to test unsupport keys" - }, - { - "op_names": ['layer2.*'], - 'target_sparsity': 0.5, - 'pattern': '2:4' + "false_key": "this is to test unsupport keys", }, - { - "op_names": ['layer3.*'], - 'target_sparsity': 0.7, - 'pattern': '5x1', - "pruning_type": "snip_progressive" - } + {"op_names": ["layer2.*"], "target_sparsity": 0.5, "pattern": "2:4"}, + {"op_names": ["layer3.*"], "target_sparsity": 0.7, "pattern": "5x1", "pruning_type": "snip_progressive"}, ] - conf = WeightPruningConfig( - local_configs, - target_sparsity=0.8 - ) + conf = WeightPruningConfig(local_configs, target_sparsity=0.8) config = Config(quantization=None, benchmark=None, pruning=conf, distillation=None) prune = Pruning(config) prune.update_config(start_step=1, end_step=10) @@ -44,8 +33,8 @@ def test_pruning_basic(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.on_train_begin() diff --git a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_config.py b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_config.py index ec43a0e409a..7eb2874956b 100644 --- a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_config.py +++ b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_config.py @@ -1,12 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning @@ -16,19 +16,16 @@ class TestPytorchPruning(unittest.TestCase): def test_pruning_class_config(self): local_configs = [ { - "op_names": ['layer1.*', 'layer2.*'], - "excluded_op_names": ['downsample.*'], - 'target_sparsity': 0.6, - "pattern": 'channelx1', + "op_names": ["layer1.*", "layer2.*"], + "excluded_op_names": ["downsample.*"], + "target_sparsity": 0.6, + "pattern": "channelx1", "pruning_type": "snip_progressive", "pruning_scope": "local", "start_step": 0, - "end_step": 10 + "end_step": 10, }, - { - "op_names": ['layer3.*'], - "pruning_type": "pattern_lock" - } + {"op_names": ["layer3.*"], "pruning_type": "pattern_lock"}, ] conf = WeightPruningConfig( local_configs, @@ -41,17 +38,17 @@ def test_pruning_class_config(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(12, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(12, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.update_config(pruning_frequency=4) prune.on_train_begin() - assert prune.pruners[0].config['pruning_frequency'] == 4 - assert prune.pruners[0].config['target_sparsity'] == 0.6 - assert prune.pruners[1].config['target_sparsity'] == 0.8 - assert prune.pruners[0].config['pattern'] == "channelx1" - assert prune.pruners[1].config['pruning_type'] == 'pattern_lock' + assert prune.pruners[0].config["pruning_frequency"] == 4 + assert prune.pruners[0].config["target_sparsity"] == 0.6 + assert prune.pruners[1].config["target_sparsity"] == 0.8 + assert prune.pruners[0].config["pattern"] == "channelx1" + assert prune.pruners[1].config["pruning_type"] == "pattern_lock" for epoch in range(1): self.model.train() diff --git a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_criteria.py b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_criteria.py index d99d156da37..ed76f5d90bb 100644 --- a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_criteria.py +++ b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_criteria.py @@ -1,12 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning @@ -16,37 +16,33 @@ class TestPruningCriteria(unittest.TestCase): def test_pruning_criteria(self): local_configs = [ { - "op_names": ['layer1.*'], - 'target_sparsity': 0.4, - "pattern": '8x2', + "op_names": ["layer1.*"], + "target_sparsity": 0.4, + "pattern": "8x2", "pruning_type": "magnitude_progressive", "pruning_scope": "local", - "sparsity_decay_type": "cube" + "sparsity_decay_type": "cube", }, { - "op_names": ['layer2.*'], - 'target_sparsity': 0.45, - 'pattern': '2:4', + "op_names": ["layer2.*"], + "target_sparsity": 0.45, + "pattern": "2:4", "pruning_type": "snip", - 'start_step': 6, - 'end_step': 6 + "start_step": 6, + "end_step": 6, }, { - "op_names": ['layer3.*'], - 'excluded_op_names': ['downsample.*'], - 'target_sparsity': 0.7, - 'pattern': '4x1', + "op_names": ["layer3.*"], + "excluded_op_names": ["downsample.*"], + "target_sparsity": 0.7, + "pattern": "4x1", "pruning_type": "snip_momentum_progressive", "pruning_frequency": 4, "min_sparsity_ratio_per_op": 0.5, "max_sparsity_ratio_per_op": 0.8, - } + }, ] - conf = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - sparsity_decay_type="cube" - ) + conf = WeightPruningConfig(local_configs, target_sparsity=0.8, sparsity_decay_type="cube") config = Config(quantization=None, benchmark=None, pruning=conf, distillation=None) prune = Pruning(config) prune.update_config(start_step=1, end_step=10) @@ -54,8 +50,8 @@ def test_pruning_criteria(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.on_train_begin() diff --git a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_patterns.py b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_patterns.py index 8704ead9bd5..b9db1bbb58f 100644 --- a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_patterns.py +++ b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_patterns.py @@ -1,12 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning @@ -15,26 +15,17 @@ class TestPruningPatterns(unittest.TestCase): def test_pruning_pattern(self): local_configs = [ - { - "op_names": ['layer1.*'], - 'target_sparsity': 0.5, - "pattern": '5:8', - "pruning_type": "magnitude" - }, - { - "op_names": ['layer2.*'], - "pattern": '1xchannel', - "pruning_scope": "global" - }, + {"op_names": ["layer1.*"], "target_sparsity": 0.5, "pattern": "5:8", "pruning_type": "magnitude"}, + {"op_names": ["layer2.*"], "pattern": "1xchannel", "pruning_scope": "global"}, { "start_step": 2, "end_step": 20, - "op_names": ['layer3.*'], - 'target_sparsity': 0.666666, - 'pattern': '4x2', + "op_names": ["layer3.*"], + "target_sparsity": 0.666666, + "pattern": "4x2", "pruning_type": "snip_progressive", - "pruning_frequency": 5 - } + "pruning_frequency": 5, + }, ] conf = WeightPruningConfig( local_configs, @@ -42,7 +33,7 @@ def test_pruning_pattern(self): sparsity_decay_type="cos", excluded_op_names=["downsample.*"], pruning_scope="local", - min_sparsity_ratio_per_op=0.1 + min_sparsity_ratio_per_op=0.1, ) config = Config(quantization=None, benchmark=None, pruning=conf, distillation=None) prune = Pruning(config) @@ -51,8 +42,8 @@ def test_pruning_pattern(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.on_train_begin() diff --git a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_regs.py b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_regs.py index 2b123a27241..85c09966eea 100644 --- a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_regs.py +++ b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_regs.py @@ -1,12 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning local_regs_config = [ @@ -14,13 +14,13 @@ "start_step": 0, "end_step": 10, "pruning_type": "magnitude", - "op_names": ['layer1.*'], - "excluded_op_names": ['layer2.*'], + "op_names": ["layer1.*"], + "excluded_op_names": ["layer2.*"], "pruning_scope": "global", "target_sparsity": 0.5, "pattern": "4x1", "reg_type": "group_lasso", - "parameters": {'reg_coeff': 0.2} + "parameters": {"reg_coeff": 0.2}, }, { "start_step": 1, @@ -28,30 +28,31 @@ "target_sparsity": 0.5, "pruning_type": "snip_momentum", "pruning_frequency": 2, - "op_names": ['layer2.*'], + "op_names": ["layer2.*"], "pruning_scope": "local", "pattern": "1x1", "sparsity_decay_type": "exp", "reg_type": "group_lasso", - "parameters": {'reg_coeff': 0.1} + "parameters": {"reg_coeff": 0.1}, }, { "start_step": 2, "end_step": 8, "pruning_type": "gradient", "pruning_frequency": 2, - "op_names": ['fc'], + "op_names": ["fc"], "pruning_scope": "local", "target_sparsity": 0.75, "pattern": "1x1", "sparsity_decay_type": "cube", "reg_type": "group_lasso", - "parameters": {'reg_coeff': 0.0} - } + "parameters": {"reg_coeff": 0.0}, + }, ] -fake_snip_config = WeightPruningConfig(local_regs_config, target_sparsity=0.9, start_step=0, \ - end_step=10, pruning_frequency=1, sparsity_decay_type="exp") +fake_snip_config = WeightPruningConfig( + local_regs_config, target_sparsity=0.9, start_step=0, end_step=10, pruning_frequency=1, sparsity_decay_type="exp" +) class TestPruningRegs(unittest.TestCase): diff --git a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_schedulers.py b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_schedulers.py index c555eed2b80..6c89e1511a0 100644 --- a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_schedulers.py +++ b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_schedulers.py @@ -1,12 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning local_schedulers_config = [ @@ -14,27 +14,33 @@ "start_step": 0, "end_step": 2, "pruning_type": "magnitude", - "op_names": ['layer1.*'], - "excluded_op_names": ['layer2.*'], + "op_names": ["layer1.*"], + "excluded_op_names": ["layer2.*"], "pruning_scope": "global", "target_sparsity": 0.5, - "pattern": "4x1" + "pattern": "4x1", }, { "start_step": 1, "end_step": 10, "pruning_type": "snip_momentum", "pruning_frequency": 2, - "op_names": ['layer2.*'], + "op_names": ["layer2.*"], "pruning_scope": "local", "target_sparsity": 0.75, "pattern": "32x1", - "sparsity_decay_type": "exp" - } + "sparsity_decay_type": "exp", + }, ] -fake_snip_config = WeightPruningConfig(local_schedulers_config, target_sparsity=0.9, start_step=0, \ - end_step=10, pruning_frequency=1, sparsity_decay_type="exp") +fake_snip_config = WeightPruningConfig( + local_schedulers_config, + target_sparsity=0.9, + start_step=0, + end_step=10, + pruning_frequency=1, + sparsity_decay_type="exp", +) class TestPruningCriteria(unittest.TestCase): @@ -47,8 +53,8 @@ def test_pruning_schedulers(self): prune.model = self.model criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.on_train_begin() prune.update_config(pruning_frequency=1) diff --git a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_types.py b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_types.py index 1a5bfeef270..4dd3f2518d3 100644 --- a/test/pruning_with_pt/pruning_1.x_v2/test_pruning_types.py +++ b/test/pruning_with_pt/pruning_1.x_v2/test_pruning_types.py @@ -1,12 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning local_types_config = [ @@ -14,9 +14,9 @@ "start_step": 0, "end_step": 0, "pruning_type": "pattern_lock", - "op_names": ['layer1.*'], - "excluded_op_names": ['layer2.*'], - "pruning_scope": "global" + "op_names": ["layer1.*"], + "excluded_op_names": ["layer2.*"], + "pruning_scope": "global", }, { "start_step": 1, @@ -24,10 +24,10 @@ "target_sparsity": 0.5, "pruning_type": "snip_momentum_progressive", "pruning_frequency": 2, - "op_names": ['layer2.*'], + "op_names": ["layer2.*"], "pruning_scope": "local", "pattern": "4x1", - "sparsity_decay_type": "exp" + "sparsity_decay_type": "exp", }, { "start_step": 2, @@ -35,15 +35,16 @@ "target_sparsity": 0.8, "pruning_type": "snip_progressive", "pruning_frequency": 1, - "op_names": ['layer3.*'], + "op_names": ["layer3.*"], "pruning_scope": "local", "pattern": "16x1", - "sparsity_decay_type": "cube" - } + "sparsity_decay_type": "cube", + }, ] -fake_snip_config = WeightPruningConfig(local_types_config, target_sparsity=0.9, start_step=0, \ - end_step=10, pruning_frequency=3, sparsity_decay_type="exp") +fake_snip_config = WeightPruningConfig( + local_types_config, target_sparsity=0.9, start_step=0, end_step=10, pruning_frequency=3, sparsity_decay_type="exp" +) class TestPruningTypes(unittest.TestCase): @@ -55,8 +56,8 @@ def test_pruning_types(self): prune.model = self.model criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.on_train_begin() prune.update_config(pruning_frequency=1) diff --git a/test/pruning_with_pt/pruning_1.x_v2/test_pytorch_pruning_experimental.py b/test/pruning_with_pt/pruning_1.x_v2/test_pytorch_pruning_experimental.py index 09567d7719c..1e7a71da386 100644 --- a/test/pruning_with_pt/pruning_1.x_v2/test_pytorch_pruning_experimental.py +++ b/test/pruning_with_pt/pruning_1.x_v2/test_pytorch_pruning_experimental.py @@ -3,13 +3,14 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.experimental.pytorch_pruner.pruning import Pruning + def build_fake_yaml_basic(): fake_snip_yaml = """ model: @@ -59,9 +60,10 @@ def build_fake_yaml_basic(): sparsity_decay_type: "cube" """ - with open('fake_snip.yaml', 'w', encoding="utf-8") as f: + with open("fake_snip.yaml", "w", encoding="utf-8") as f: f.write(fake_snip_yaml) + def build_fake_yaml_channel(): fake_channel_pruning_yaml = """ model: @@ -111,12 +113,11 @@ def build_fake_yaml_channel(): """ - with open('fake_channel_pruning.yaml', 'w', encoding="utf-8") as f: + with open("fake_channel_pruning.yaml", "w", encoding="utf-8") as f: f.write(fake_channel_pruning_yaml) class TestPytorchPruning(unittest.TestCase): - model = torchvision.models.resnet18() @classmethod @@ -124,13 +125,12 @@ def setUpClass(cls): build_fake_yaml_basic() build_fake_yaml_channel() - @classmethod def tearDownClass(cls): - os.remove('fake_channel_pruning.yaml') - os.remove('fake_snip.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) + os.remove("fake_channel_pruning.yaml") + os.remove("fake_snip.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def test_pytorch_pruning_basic(self): prune = Pruning("fake_snip.yaml") @@ -139,8 +139,8 @@ def test_pytorch_pruning_basic(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.prepare() @@ -173,8 +173,8 @@ def test_pytorch_pruner_channel_pruning(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) prune.prepare() @@ -197,5 +197,6 @@ def test_pytorch_pruner_channel_pruning(self): prune.on_epoch_end() + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_2.x/test_auto_excluding_classifier.py b/test/pruning_with_pt/pruning_2.x/test_auto_excluding_classifier.py index 3294ccbafc3..0eb2d04005a 100644 --- a/test/pruning_with_pt/pruning_2.x/test_auto_excluding_classifier.py +++ b/test/pruning_with_pt/pruning_2.x/test_auto_excluding_classifier.py @@ -1,11 +1,8 @@ import unittest import torch.nn as nn -from transformers import ( - AutoConfig, - AutoModelForSequenceClassification, - AutoTokenizer -) +from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer + class NaiveMLP(nn.Module): def __init__(self, hidden_size=16): @@ -15,7 +12,7 @@ def __init__(self, hidden_size=16): self.linear2 = nn.Linear(hidden_size, hidden_size, bias=True) self.ac2 = nn.ReLU() self.linear3 = nn.Linear(hidden_size, 2, bias=True) - + def forward(self, x): x = self.linear1(x) x = self.ac1(x) @@ -24,8 +21,8 @@ def forward(self, x): x = self.linear3(x) return x + class TestPruning(unittest.TestCase): - def test_pruning_basic(self): # import pdb;pdb.set_trace() hidden_size = 32 @@ -33,6 +30,7 @@ def test_pruning_basic(self): # import classifier searching functions # A naive MLP model from neural_compressor.compression.pruner.model_slim.pattern_analyzer import ClassifierHeadSearcher + searcher = ClassifierHeadSearcher(model) layer = searcher.search(return_name=True) assert layer == "linear3" @@ -53,5 +51,6 @@ def test_pruning_basic(self): layer = searcher.search(return_name=True) assert layer == "classifier" + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_2.x/test_auto_slim.py b/test/pruning_with_pt/pruning_2.x/test_auto_slim.py index f08245c2882..7af5cf8de20 100644 --- a/test/pruning_with_pt/pruning_2.x/test_auto_slim.py +++ b/test/pruning_with_pt/pruning_2.x/test_auto_slim.py @@ -1,55 +1,52 @@ +import sys import unittest import torch -import torchvision import torch.nn as nn -import sys +import torchvision + +from neural_compressor import WeightPruningConfig + +# auto slim +from neural_compressor.compression.pruner import model_slim, parse_auto_slim_config from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression -# auto slim -from neural_compressor.compression.pruner import parse_auto_slim_config -from neural_compressor.compression.pruner import model_slim class TestPruning(unittest.TestCase): - def test_pruning_basic(self): print("Run a Bert model") # create model, datasets, criterion and optimizer from transformers import BertForSequenceClassification - model = BertForSequenceClassification.from_pretrained('prajjwal1/bert-mini') + + model = BertForSequenceClassification.from_pretrained("prajjwal1/bert-mini") criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 16), low=0., high=1., dtype='int64', label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 16), low=0.0, high=1.0, dtype="int64", label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) # case 1: without external dataloader prune_ffn2_sparsity = 0.5 prune_mha_sparsity = 0.5 auto_slim_configs = parse_auto_slim_config( - model, - ffn2_sparsity = prune_ffn2_sparsity, - mha_sparsity = prune_mha_sparsity, + model, + ffn2_sparsity=prune_ffn2_sparsity, + mha_sparsity=prune_mha_sparsity, pruning_scope="local", ) # case 2: with external dataloader # get auto config for ffn and mha auto_slim_configs_2 = parse_auto_slim_config( - model, + model, dummy_dataloader, - ffn2_sparsity = prune_ffn2_sparsity, - mha_sparsity = prune_mha_sparsity, + ffn2_sparsity=prune_ffn2_sparsity, + mha_sparsity=prune_mha_sparsity, pruning_scope="local", ) pruning_configs = [] pruning_configs += auto_slim_configs_2 - configs = WeightPruningConfig( - pruning_configs, - start_step=1, - end_step=25 - ) + configs = WeightPruningConfig(pruning_configs, start_step=1, end_step=25) # run mha and ffn pruning compression_manager = prepare_compression(model=model, confs=configs) compression_manager.callbacks.on_train_begin() @@ -78,5 +75,6 @@ def test_pruning_basic(self): # execute real slim process (remove weights) model = model_slim(model) + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_2.x/test_conv_pruning.py b/test/pruning_with_pt/pruning_2.x/test_conv_pruning.py index cb6dd7f506d..99a1dd41c67 100644 --- a/test/pruning_with_pt/pruning_2.x/test_conv_pruning.py +++ b/test/pruning_with_pt/pruning_2.x/test_conv_pruning.py @@ -1,8 +1,9 @@ -import unittest import sys +import unittest + import torch -sys.path.insert(0, './') +sys.path.insert(0, "./") from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression @@ -11,18 +12,13 @@ class TestPruning(unittest.TestCase): def test_conv1_prunig(self): local_config = [ { - "op_names": ['conv1.*'], - 'target_sparsity': 0.6, - "pattern": 'channelx1', + "op_names": ["conv1.*"], + "target_sparsity": 0.6, + "pattern": "channelx1", "pruning_type": "snip", "pruning_scope": "local", }, - { - "op_names": ['conv2.*'], - 'target_sparsity': 0.5, - "pattern": '2:4', - "pruning_scope": "global" - } + {"op_names": ["conv2.*"], "target_sparsity": 0.5, "pattern": "2:4", "pruning_scope": "global"}, ] class Model(torch.nn.Module): @@ -48,10 +44,7 @@ def forward(self, x): criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - config = WeightPruningConfig(local_config, - target_sparsity=0.8, - start_step=1, - end_step=10) + config = WeightPruningConfig(local_config, target_sparsity=0.8, start_step=1, end_step=10) compression_manager = prepare_compression(model=model, confs=config) compression_manager.callbacks.on_train_begin() for epoch in range(2): @@ -59,9 +52,7 @@ def forward(self, x): compression_manager.callbacks.on_epoch_begin(epoch) local_step = 0 for _ in range(20): - data, target = torch.rand((1, 4, 10), - requires_grad=True), torch.empty( - 1, dtype=torch.long).random_(3) + data, target = torch.rand((1, 4, 10), requires_grad=True), torch.empty(1, dtype=torch.long).random_(3) compression_manager.callbacks.on_step_begin(local_step) output = model(data) loss = criterion(output, target) @@ -77,23 +68,19 @@ def forward(self, x): compression_manager.callbacks.on_train_end() compression_manager.callbacks.on_before_eval() compression_manager.callbacks.on_after_eval() - + def test_hf_conv1_prunig(self): import transformers + local_config = [ { - "op_names": ['conv1.*'], - 'target_sparsity': 0.6, - "pattern": 'channelx1', + "op_names": ["conv1.*"], + "target_sparsity": 0.6, + "pattern": "channelx1", "pruning_type": "snip", "pruning_scope": "local", }, - { - "op_names": ['conv2.*'], - 'target_sparsity': 0.5, - "pattern": '2:4', - "pruning_scope": "global" - } + {"op_names": ["conv2.*"], "target_sparsity": 0.5, "pattern": "2:4", "pruning_scope": "global"}, ] class Model(torch.nn.Module): @@ -119,10 +106,7 @@ def forward(self, x): criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - config = WeightPruningConfig(local_config, - target_sparsity=0.8, - start_step=1, - end_step=10) + config = WeightPruningConfig(local_config, target_sparsity=0.8, start_step=1, end_step=10) compression_manager = prepare_compression(model=model, confs=config) compression_manager.callbacks.on_train_begin() for epoch in range(2): @@ -130,9 +114,7 @@ def forward(self, x): compression_manager.callbacks.on_epoch_begin(epoch) local_step = 0 for _ in range(20): - data, target = torch.rand((1, 4, 4), - requires_grad=True), torch.empty( - 1, dtype=torch.long).random_(3) + data, target = torch.rand((1, 4, 4), requires_grad=True), torch.empty(1, dtype=torch.long).random_(3) compression_manager.callbacks.on_step_begin(local_step) output = model(data) loss = criterion(output, target) @@ -150,5 +132,5 @@ def forward(self, x): compression_manager.callbacks.on_after_eval() -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning.py b/test/pruning_with_pt/pruning_2.x/test_pruning.py index fdc40ae6673..ad0d39dec10 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning.py @@ -1,50 +1,38 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression + class TestPruning(unittest.TestCase): model = torchvision.models.resnet18() def test_pruning_basic(self): local_configs = [ { - "op_names": ['layer1.*'], - 'target_sparsity': 0.5, - "pattern": '8x2', + "op_names": ["layer1.*"], + "target_sparsity": 0.5, + "pattern": "8x2", "pruning_type": "magnitude_progressive", - "false_key": "this is to test unsupport keys" + "false_key": "this is to test unsupport keys", }, - { - "op_names": ['layer2.*'], - 'target_sparsity': 0.5, - 'pattern': '2:4' - }, - { - "op_names": ['layer3.*'], - 'target_sparsity': 0.7, - 'pattern': '5x1', - "pruning_type": "snip_progressive" - } + {"op_names": ["layer2.*"], "target_sparsity": 0.5, "pattern": "2:4"}, + {"op_names": ["layer3.*"], "target_sparsity": 0.7, "pattern": "5x1", "pruning_type": "snip_progressive"}, ] - config = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - start_step=1, - end_step=10 - ) + config = WeightPruningConfig(local_configs, target_sparsity=0.8, start_step=1, end_step=10) compression_manager = prepare_compression(model=self.model, confs=config) compression_manager.callbacks.on_train_begin() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) compression_manager.callbacks.on_train_begin() @@ -70,7 +58,5 @@ def test_pruning_basic(self): compression_manager.callbacks.on_after_eval() - - if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning_block.py b/test/pruning_with_pt/pruning_2.x/test_pruning_block.py index 9551bb3a829..ce7e3e4d6d7 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning_block.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning_block.py @@ -1,23 +1,25 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression class TestPruning(unittest.TestCase): # model = torchvision.models.resnet18() model = torchvision.models.vit_b_16() + def test_pruning_basic(self): local_configs = [ { - "op_names": ['encoder_layer_1.mlp*'], + "op_names": ["encoder_layer_1.mlp*"], "target_sparsity": 0.6, - "pattern": '2xchannel', + "pattern": "2xchannel", "pruning_type": "block_mask", "pruning_scope": "global", "criterion_type": "snip_momentum_block", @@ -25,9 +27,9 @@ def test_pruning_basic(self): "pruning_op_types": "Linear", }, { - "op_names": ['encoder_layer_2.mlp*'], + "op_names": ["encoder_layer_2.mlp*"], "target_sparsity": 0.9, - "pattern": '32x32', + "pattern": "32x32", "pruning_op_types": "Linear", "pruning_type": "block_mask", "pruning_scope": "local", @@ -35,37 +37,32 @@ def test_pruning_basic(self): "criterion_reduce_type": "sum", }, { - "op_names": ['encoder_layer_3.mlp*'], - 'target_sparsity': 0.4, - 'pattern': 'channelx1', + "op_names": ["encoder_layer_3.mlp*"], + "target_sparsity": 0.4, + "pattern": "channelx1", "pruning_op_types": "Linear", "pruning_type": "retrain_free", "pruning_scope": "local", "pruning_frequency": 2, }, { - "op_names": ['encoder_layer_0.mlp*', "conv_proj"], - 'target_sparsity': 0.4, - 'pattern': 'channelx2', - "pruning_op_types": ["Linear","Conv2d"], + "op_names": ["encoder_layer_0.mlp*", "conv_proj"], + "target_sparsity": 0.4, + "pattern": "channelx2", + "pruning_op_types": ["Linear", "Conv2d"], "pruning_type": "retrain_free", "pruning_scope": "global", "pruning_frequency": 3, - } + }, ] - config = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - start_step=1, - end_step=10 - ) - + config = WeightPruningConfig(local_configs, target_sparsity=0.8, start_step=1, end_step=10) + criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) - + compression_manager = prepare_compression(model=self.model, confs=config) compression_manager.callbacks.on_train_begin() for epoch in range(2): @@ -92,5 +89,3 @@ def test_pruning_basic(self): if __name__ == "__main__": unittest.main() - - diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning_config.py b/test/pruning_with_pt/pruning_2.x/test_pruning_config.py index b87653ad33a..321b7c0f597 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning_config.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning_config.py @@ -1,33 +1,32 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression from neural_compressor.utils import logger + class TestPytorchPruning(unittest.TestCase): model = torchvision.models.resnet18() def test_pruning_class_config(self): local_configs = [ { - "op_names": ['layer1.*', 'layer2.*'], - "excluded_op_names": ['downsample.*'], - 'target_sparsity': 0.6, - "pattern": 'channelx1', + "op_names": ["layer1.*", "layer2.*"], + "excluded_op_names": ["downsample.*"], + "target_sparsity": 0.6, + "pattern": "channelx1", "pruning_type": "snip_progressive", "pruning_scope": "local", "start_step": 0, - "end_step": 10 + "end_step": 10, }, - { - "op_names": ['layer3.*'], - "pruning_type": "pattern_lock" - } + {"op_names": ["layer3.*"], "pruning_type": "pattern_lock"}, ] config = WeightPruningConfig( local_configs, @@ -39,16 +38,16 @@ def test_pruning_class_config(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(12, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(12, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) logger.info(compression_manager.callbacks.callbacks_list[0].pruners) - assert compression_manager.callbacks.callbacks_list[0].pruners[0].config['pruning_frequency'] == 2 - assert compression_manager.callbacks.callbacks_list[0].pruners[0].config['target_sparsity'] == 0.6 - assert compression_manager.callbacks.callbacks_list[0].pruners[1].config['target_sparsity'] == 0.8 - assert compression_manager.callbacks.callbacks_list[0].pruners[0].config['pattern'] == "channelx1" - assert compression_manager.callbacks.callbacks_list[0].pruners[1].config['pruning_type'] == 'pattern_lock' + assert compression_manager.callbacks.callbacks_list[0].pruners[0].config["pruning_frequency"] == 2 + assert compression_manager.callbacks.callbacks_list[0].pruners[0].config["target_sparsity"] == 0.6 + assert compression_manager.callbacks.callbacks_list[0].pruners[1].config["target_sparsity"] == 0.8 + assert compression_manager.callbacks.callbacks_list[0].pruners[0].config["pattern"] == "channelx1" + assert compression_manager.callbacks.callbacks_list[0].pruners[1].config["pruning_type"] == "pattern_lock" for epoch in range(1): self.model.train() diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning_criteria.py b/test/pruning_with_pt/pruning_2.x/test_pruning_criteria.py index 4e33ebe269f..ccca040d32c 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning_criteria.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning_criteria.py @@ -1,11 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression @@ -15,38 +16,34 @@ class TestPruningCriteria(unittest.TestCase): def test_pruning_criteria(self): local_configs = [ { - "op_names": ['layer1.*'], - 'target_sparsity': 0.4, - "pattern": '8x2', + "op_names": ["layer1.*"], + "target_sparsity": 0.4, + "pattern": "8x2", "pruning_type": "magnitude_progressive", "pruning_scope": "local", - "sparsity_decay_type": "cube" + "sparsity_decay_type": "cube", }, { - "op_names": ['layer2.*'], - 'target_sparsity': 0.45, - 'pattern': '2:4', + "op_names": ["layer2.*"], + "target_sparsity": 0.45, + "pattern": "2:4", "pruning_type": "snip", - 'start_step': 6, - 'end_step': 6 + "start_step": 6, + "end_step": 6, }, { - "op_names": ['layer3.*'], - 'excluded_op_names': ['downsample.*'], - 'target_sparsity': 0.7, - 'pattern': '4x1', + "op_names": ["layer3.*"], + "excluded_op_names": ["downsample.*"], + "target_sparsity": 0.7, + "pattern": "4x1", "pruning_type": "snip_momentum_progressive", "pruning_frequency": 4, "min_sparsity_ratio_per_op": 0.5, "max_sparsity_ratio_per_op": 0.8, - } + }, ] config = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - sparsity_decay_type="cube", - start_step=1, - end_step=10 + local_configs, target_sparsity=0.8, sparsity_decay_type="cube", start_step=1, end_step=10 ) compression_manager = prepare_compression(model=self.model, confs=config) # compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs['progressive_type'] = 'scores' @@ -55,8 +52,8 @@ def test_pruning_criteria(self): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) compression_manager.callbacks.on_train_begin() @@ -84,42 +81,38 @@ def test_pruning_criteria(self): def test_pruning_criteria_for_ut(self): local_configs = [ { - "op_names": ['layer1.*'], - 'target_sparsity': 0.4, - "pattern": '8x2', + "op_names": ["layer1.*"], + "target_sparsity": 0.4, + "pattern": "8x2", "pruning_type": "magnitude_progressive", "pruning_scope": "local", - "sparsity_decay_type": "cube" + "sparsity_decay_type": "cube", }, { - "op_names": ['layer3.*'], - 'excluded_op_names': ['downsample.*'], - 'target_sparsity': 0.7, - 'pattern': '4x1', + "op_names": ["layer3.*"], + "excluded_op_names": ["downsample.*"], + "target_sparsity": 0.7, + "pattern": "4x1", "pruning_type": "snip_momentum_progressive", "pruning_frequency": 4, "min_sparsity_ratio_per_op": 0.5, "max_sparsity_ratio_per_op": 0.8, - } + }, ] config = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - sparsity_decay_type="cube", - start_step=1, - end_step=10 + local_configs, target_sparsity=0.8, sparsity_decay_type="cube", start_step=1, end_step=10 ) compression_manager = prepare_compression(model=self.model, confs=config) - compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs['progressive_type'] = 'scores' - compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs['use_global'] = False - compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs['progressive_type'] = 'linear' - compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs['use_global'] = False + compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs["progressive_type"] = "scores" + compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs["use_global"] = False + compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs["progressive_type"] = "linear" + compression_manager.callbacks.callbacks_list[0].pruners[0].progressive_configs["use_global"] = False compression_manager.callbacks.on_train_begin() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) compression_manager.callbacks.on_train_begin() @@ -143,5 +136,7 @@ def test_pruning_criteria_for_ut(self): compression_manager.callbacks.on_train_end() compression_manager.callbacks.on_before_eval() compression_manager.callbacks.on_after_eval() + + if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning_patterns.py b/test/pruning_with_pt/pruning_2.x/test_pruning_patterns.py index c307e808ea7..cca357f5677 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning_patterns.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning_patterns.py @@ -1,11 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression @@ -14,26 +15,17 @@ class TestPruningPatterns(unittest.TestCase): def test_pruning_pattern(self): local_configs = [ - { - "op_names": ['layer1.*'], - 'target_sparsity': 0.5, - "pattern": '5:8', - "pruning_type": "magnitude" - }, - { - "op_names": ['layer2.*'], - "pattern": '1xchannel', - "pruning_scope": "global" - }, + {"op_names": ["layer1.*"], "target_sparsity": 0.5, "pattern": "5:8", "pruning_type": "magnitude"}, + {"op_names": ["layer2.*"], "pattern": "1xchannel", "pruning_scope": "global"}, { "start_step": 2, "end_step": 20, - "op_names": ['layer3.*'], - 'target_sparsity': 0.666666, - 'pattern': '4x2', + "op_names": ["layer3.*"], + "target_sparsity": 0.666666, + "pattern": "4x2", "pruning_type": "snip_progressive", - "pruning_frequency": 5 - } + "pruning_frequency": 5, + }, ] config = WeightPruningConfig( local_configs, @@ -43,15 +35,15 @@ def test_pruning_pattern(self): pruning_scope="local", min_sparsity_ratio_per_op=0.1, start_step=1, - end_step=10 + end_step=10, ) compression_manager = prepare_compression(model=self.model, confs=config) compression_manager.callbacks.on_train_begin() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) compression_manager.callbacks.on_train_begin() diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning_progressive.py b/test/pruning_with_pt/pruning_2.x/test_pruning_progressive.py index c44527b3ce6..df15a0807fa 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning_progressive.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning_progressive.py @@ -1,11 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression @@ -15,25 +16,21 @@ class TestPruningPatterns(unittest.TestCase): def test_pruning_pattern(self): local_configs = [ { - "op_names": ['layer1.*'], - 'target_sparsity': 0.75, - "pattern": '6:8', - "pruning_type": "magnitude_progressive" - }, - { - "op_names": ['layer2.*'], - "pattern": '1xchannel', - "pruning_scope": "global" + "op_names": ["layer1.*"], + "target_sparsity": 0.75, + "pattern": "6:8", + "pruning_type": "magnitude_progressive", }, + {"op_names": ["layer2.*"], "pattern": "1xchannel", "pruning_scope": "global"}, { "start_step": 2, "end_step": 20, - "op_names": ['layer3.*'], - 'target_sparsity': 0.666666, - 'pattern': '4x2', + "op_names": ["layer3.*"], + "target_sparsity": 0.666666, + "pattern": "4x2", "pruning_type": "snip_progressive", - "pruning_frequency": 5 - } + "pruning_frequency": 5, + }, ] config = WeightPruningConfig( local_configs, @@ -43,19 +40,19 @@ def test_pruning_pattern(self): pruning_scope="local", min_sparsity_ratio_per_op=0.1, start_step=1, - end_step=10 + end_step=10, ) compression_manager = prepare_compression(model=self.model, confs=config) compression_manager.callbacks.on_train_begin() # fix code coverage - compression_manager.callbacks.callbacks_list[0].pruners[-1].progressive_configs['progressive_type'] = "linear" - compression_manager.callbacks.callbacks_list[0].pruners[-1].progressive_configs['use_global'] = False + compression_manager.callbacks.callbacks_list[0].pruners[-1].progressive_configs["progressive_type"] = "linear" + compression_manager.callbacks.callbacks_list[0].pruners[-1].progressive_configs["use_global"] = False compression_manager.callbacks.callbacks_list[0].pruners[-1].progressive_logger = True criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) compression_manager.callbacks.on_train_begin() diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning_regs.py b/test/pruning_with_pt/pruning_2.x/test_pruning_regs.py index ecc6ce4ee57..985054e79a2 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning_regs.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning_regs.py @@ -1,11 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression @@ -15,40 +16,34 @@ class TestPruning(unittest.TestCase): def test_pruning_basic(self): local_configs = [ { - "op_names": ['layer1.*'], - 'target_sparsity': 0.5, - "pattern": '8x2', + "op_names": ["layer1.*"], + "target_sparsity": 0.5, + "pattern": "8x2", "pruning_type": "magnitude_progressive", - "false_key": "this is to test unsupport keys" + "false_key": "this is to test unsupport keys", }, { - "op_names": ['layer2.*'], - 'target_sparsity': 0.5, - 'pattern': '2:4', - + "op_names": ["layer2.*"], + "target_sparsity": 0.5, + "pattern": "2:4", }, { - "op_names": ['layer3.*'], - 'target_sparsity': 0.7, - 'pattern': '5x1', + "op_names": ["layer3.*"], + "target_sparsity": 0.7, + "pattern": "5x1", "pruning_type": "snip_progressive", - 'reg_type':"group_lasso", - 'reg_coeff':0.1 - } + "reg_type": "group_lasso", + "reg_coeff": 0.1, + }, ] - config = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - start_step=1, - end_step=10 - ) + config = WeightPruningConfig(local_configs, target_sparsity=0.8, start_step=1, end_step=10) compression_manager = prepare_compression(model=self.model, confs=config) compression_manager.callbacks.on_train_begin() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) compression_manager.callbacks.on_train_begin() diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning_schedulers.py b/test/pruning_with_pt/pruning_2.x/test_pruning_schedulers.py index 9cbf7fa2251..988487967ca 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning_schedulers.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning_schedulers.py @@ -1,11 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression local_schedulers_config = [ @@ -13,40 +14,45 @@ "start_step": 0, "end_step": 2, "pruning_type": "magnitude", - "op_names": ['layer1.*'], - "excluded_op_names": ['layer2.*'], + "op_names": ["layer1.*"], + "excluded_op_names": ["layer2.*"], "pruning_scope": "global", "target_sparsity": 0.5, - "pattern": "4x1" + "pattern": "4x1", }, { "start_step": 1, "end_step": 10, "pruning_type": "snip_momentum", "pruning_frequency": 2, - "op_names": ['layer2.*'], + "op_names": ["layer2.*"], "pruning_scope": "local", "target_sparsity": 0.75, "pattern": "32x1", - "sparsity_decay_type": "exp" - } + "sparsity_decay_type": "exp", + }, ] -fake_snip_config = WeightPruningConfig(local_schedulers_config, target_sparsity=0.9, start_step=0, \ - end_step=10, pruning_frequency=1, sparsity_decay_type="exp") +fake_snip_config = WeightPruningConfig( + local_schedulers_config, + target_sparsity=0.9, + start_step=0, + end_step=10, + pruning_frequency=1, + sparsity_decay_type="exp", +) class TestPruningCriteria(unittest.TestCase): model = torchvision.models.resnet18() def test_pruning_schedulers(self): - compression_manager = prepare_compression(model=self.model, confs=fake_snip_config) compression_manager.callbacks.on_train_begin() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) compression_manager.callbacks.on_train_begin() for epoch in range(2): diff --git a/test/pruning_with_pt/pruning_2.x/test_pruning_types.py b/test/pruning_with_pt/pruning_2.x/test_pruning_types.py index b04331d26d9..d7bef597dce 100644 --- a/test/pruning_with_pt/pruning_2.x/test_pruning_types.py +++ b/test/pruning_with_pt/pruning_2.x/test_pruning_types.py @@ -1,11 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig from neural_compressor.training import prepare_compression local_types_config = [ @@ -13,9 +14,9 @@ "start_step": 0, "end_step": 0, "pruning_type": "pattern_lock", - "op_names": ['layer1.*'], - "excluded_op_names": ['layer2.*'], - "pruning_scope": "global" + "op_names": ["layer1.*"], + "excluded_op_names": ["layer2.*"], + "pruning_scope": "global", }, { "start_step": 1, @@ -23,10 +24,10 @@ "target_sparsity": 0.5, "pruning_type": "snip_momentum_progressive", "pruning_frequency": 2, - "op_names": ['layer2.*'], + "op_names": ["layer2.*"], "pruning_scope": "local", "pattern": "4x1", - "sparsity_decay_type": "exp" + "sparsity_decay_type": "exp", }, { "start_step": 2, @@ -34,23 +35,24 @@ "target_sparsity": 0.8, "pruning_type": "snip_progressive", "pruning_frequency": 1, - "op_names": ['layer3.*'], + "op_names": ["layer3.*"], "pruning_scope": "local", "pattern": "16x1", - "sparsity_decay_type": "cube" + "sparsity_decay_type": "cube", }, { "start_step": 0, "end_step": 0, "pruning_type": "pattern_lock", - "op_names": ['layer4.*'], + "op_names": ["layer4.*"], "pattern": "2:4", - "pruning_scope": "global" + "pruning_scope": "global", }, ] -fake_snip_config = WeightPruningConfig(local_types_config, target_sparsity=0.9, start_step=0, \ - end_step=10, pruning_frequency=3, sparsity_decay_type="exp") +fake_snip_config = WeightPruningConfig( + local_types_config, target_sparsity=0.9, start_step=0, end_step=10, pruning_frequency=3, sparsity_decay_type="exp" +) class TestPruningTypes(unittest.TestCase): @@ -61,8 +63,8 @@ def test_pruning_types(self): compression_manager.callbacks.on_train_begin() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) compression_manager.callbacks.on_train_begin() for epoch in range(2): diff --git a/test/pruning_with_pt/pruning_2_plus.x/test_pruning.py b/test/pruning_with_pt/pruning_2_plus.x/test_pruning.py index 29f1e3b072d..23a5c480752 100644 --- a/test/pruning_with_pt/pruning_2_plus.x/test_pruning.py +++ b/test/pruning_with_pt/pruning_2_plus.x/test_pruning.py @@ -1,12 +1,12 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig class TestPruning(unittest.TestCase): @@ -15,33 +15,29 @@ class TestPruning(unittest.TestCase): def test_pruning_basic(self): local_configs = [ { - "op_names": ['layer1.*'], - 'target_sparsity': 0.6, - "pattern": '8x2', + "op_names": ["layer1.*"], + "target_sparsity": 0.6, + "pattern": "8x2", "pruning_type": "magnitude_progressive", - "false_key": "this is to test unsupport keys" + "false_key": "this is to test unsupport keys", }, { - "op_names": ['layer2.*'], + "op_names": ["layer2.*"], "pruning_type": "snip_momentum", - 'target_sparsity': 0.5, + "target_sparsity": 0.5, "pruning_scope": "local", - 'pattern': '2:4' + "pattern": "2:4", }, ] - config = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - start_step=1, - end_step=4 - ) + config = WeightPruningConfig(local_configs, target_sparsity=0.8, start_step=1, end_step=4) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) from neural_compressor.compression.pruner import prepare_pruning + pruning = prepare_pruning(config, self.model, optimizer) for epoch in range(4): @@ -55,9 +51,8 @@ def test_pruning_basic(self): optimizer.step() local_step += 1 - assert (self.model != None) + assert self.model is not None if __name__ == "__main__": unittest.main() - diff --git a/test/pruning_with_pt/pruning_2_plus.x/test_pruning_block.py b/test/pruning_with_pt/pruning_2_plus.x/test_pruning_block.py index 8bad882f2f5..0506f1e0406 100644 --- a/test/pruning_with_pt/pruning_2_plus.x/test_pruning_block.py +++ b/test/pruning_with_pt/pruning_2_plus.x/test_pruning_block.py @@ -1,47 +1,45 @@ import unittest import torch -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig class TestPruning(unittest.TestCase): model = torchvision.models.vit_b_16() + def test_pruning_basic(self): local_configs = [ { - "op_names": ['encoder_layer_1.mlp*'], + "op_names": ["encoder_layer_1.mlp*"], "target_sparsity": 0.95, - "pattern": 'channelx2', + "pattern": "channelx2", "pruning_type": "block_mask", "pruning_scope": "global", "criterion_type": "block_mask", "pruning_op_types": "Linear", }, { - "op_names": ['encoder_layer_2.mlp*'], + "op_names": ["encoder_layer_2.mlp*"], "target_sparsity": 0.5, - "pattern": '32x32', + "pattern": "32x32", "pruning_op_types": "Linear", "pruning_type": "block_mask", "pruning_scope": "local", }, ] - config = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - start_step=1, - end_step=10 - ) + config = WeightPruningConfig(local_configs, target_sparsity=0.8, start_step=1, end_step=10) criterion = nn.CrossEntropyLoss() from neural_compressor.compression.pruner import prepare_pruning + optimizer = torch.optim.SGD(self.model.parameters(), lr=0.0001) - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(20, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(20, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) pruning = prepare_pruning(config, self.model, optimizer) @@ -59,6 +57,3 @@ def test_pruning_basic(self): if __name__ == "__main__": unittest.main() - - - diff --git a/test/pruning_with_pt/pruning_2_plus.x/test_pruning_retrain_free.py b/test/pruning_with_pt/pruning_2_plus.x/test_pruning_retrain_free.py index ea2cfe60a0c..8a41cd298ab 100644 --- a/test/pruning_with_pt/pruning_2_plus.x/test_pruning_retrain_free.py +++ b/test/pruning_with_pt/pruning_2_plus.x/test_pruning_retrain_free.py @@ -1,10 +1,11 @@ import unittest -import torchvision import torch.nn as nn +import torchvision + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig class TestPruning(unittest.TestCase): @@ -13,48 +14,44 @@ class TestPruning(unittest.TestCase): def test_pruning_basic(self): local_configs = [ { - "op_names": ['encoder_layer_3.mlp*'], - 'target_sparsity': 0.9, - 'pattern': 'channelx1', + "op_names": ["encoder_layer_3.mlp*"], + "target_sparsity": 0.9, + "pattern": "channelx1", "pruning_op_types": ["Linear"], "pruning_type": "retrain_free", "pruning_scope": "local", "pruning_frequency": 2, }, { - "op_names": ['encoder_layer_2.mlp*'], - 'target_sparsity': 0.4, - 'pattern': 'channelx2', + "op_names": ["encoder_layer_2.mlp*"], + "target_sparsity": 0.4, + "pattern": "channelx2", "pruning_op_types": ["Linear"], "pruning_type": "retrain_free", "pruning_scope": "global", "pruning_frequency": 3, }, { - "op_names": ['encoder_layer_0.mlp*', "conv_proj"], - 'target_sparsity': 0.4, - 'pattern': 'channelx1', - "pruning_op_types": ["Linear","Conv2d"], + "op_names": ["encoder_layer_0.mlp*", "conv_proj"], + "target_sparsity": 0.4, + "pattern": "channelx1", + "pruning_op_types": ["Linear", "Conv2d"], "pruning_type": "retrain_free", "pruning_scope": "global", "pruning_frequency": 3, - } + }, ] - config = WeightPruningConfig( - local_configs, - target_sparsity=0.8, - start_step=1, - end_step=10 - ) + config = WeightPruningConfig(local_configs, target_sparsity=0.8, start_step=1, end_step=10) criterion = nn.CrossEntropyLoss() from neural_compressor.compression.pruner import prepare_pruning - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) + + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) pruning = prepare_pruning(config, self.model, dataloader=dummy_dataloader, loss_func=criterion) - + # pruning = prepare_pruning(config, self.model) for epoch in range(2): self.model.train() @@ -71,7 +68,3 @@ def test_pruning_basic(self): if __name__ == "__main__": unittest.main() - - - - diff --git a/test/pruning_with_pt/pruning_2_plus.x/test_pruning_sparsegpt.py b/test/pruning_with_pt/pruning_2_plus.x/test_pruning_sparsegpt.py index 114c03c4864..b3c5e73b43d 100644 --- a/test/pruning_with_pt/pruning_2_plus.x/test_pruning_sparsegpt.py +++ b/test/pruning_with_pt/pruning_2_plus.x/test_pruning_sparsegpt.py @@ -1,49 +1,44 @@ import unittest +from transformers import AutoModelForCausalLM + +from neural_compressor import WeightPruningConfig from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig -from transformers import (AutoModelForCausalLM) class TestPruning(unittest.TestCase): - model = AutoModelForCausalLM.from_pretrained( - "facebook/opt-125m" - ) + model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m") + def test_pruning_basic(self): local_configs = [ { - "op_names": ['5.fc', '5.attn'], + "op_names": ["5.fc", "5.attn"], "target_sparsity": 0.65, - "pattern": '1x1', + "pattern": "1x1", "pruning_type": "sparse_gpt", "pruning_op_types": ["Linear"], }, { - "op_names": ['7.fc', '7.attn'], + "op_names": ["7.fc", "7.attn"], "target_sparsity": 0.5, - "pattern": '2:4', + "pattern": "2:4", "pruning_op_types": ["Linear"], "pruning_type": "sparse_gpt", }, ] - config = WeightPruningConfig( - local_configs, - target_sparsity=0.5, - start_step=1, - end_step=10 - ) + config = WeightPruningConfig(local_configs, target_sparsity=0.5, start_step=1, end_step=10) from neural_compressor.compression.pruner import prepare_pruning - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(10, 512), low=0., high=1., label=True, dtype='int64') + + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(10, 512), low=0.0, high=1.0, label=True, dtype="int64") dummy_dataloader = PyTorchDataLoader(dummy_dataset) - - pruning = prepare_pruning(config, self.model, dataloader=dummy_dataloader, device='cpu') + + pruning = prepare_pruning(config, self.model, dataloader=dummy_dataloader, device="cpu") pruning.on_train_begin(dummy_dataloader) pruning.on_train_end() if __name__ == "__main__": unittest.main() - diff --git a/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_distributed_pruning.py b/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_distributed_pruning.py index 9a54834595d..dce2c1d05ca 100644 --- a/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_distributed_pruning.py +++ b/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_distributed_pruning.py @@ -1,18 +1,21 @@ """Tests for the TensorFlow pruning with distributed training and inference.""" +import hashlib import os -import sys -import cpuinfo -from platform import platform, system -import signal +import re import shutil +import signal import subprocess -import unittest -import re -import hashlib +import sys import time +import unittest +from platform import platform, system + +import cpuinfo import tensorflow as tf -from neural_compressor.utils import logger + from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 +from neural_compressor.utils import logger + def build_fake_ut(): fake_ut = ''' @@ -333,7 +336,7 @@ def test_tensorflow_pruning(self): if __name__ == '__main__': unittest.main() ''' - with open('fake_ut.py', 'w', encoding="utf-8") as f: + with open("fake_ut.py", "w", encoding="utf-8") as f: f.write(fake_ut) build_fake_yaml() @@ -371,12 +374,14 @@ def build_fake_yaml(): metric: topk: 1 """ - with open('fake_yaml.yaml', 'w', encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def dir_md5_check(dir): files_list = [] md5_list = [] + def get_files_list(path, list_name): for file in sorted(os.listdir(path)): file_path = os.path.join(path, file) @@ -384,16 +389,19 @@ def get_files_list(path, list_name): get_files_list(file_path, list_name) else: list_name.append(file_path) + get_files_list(dir, files_list) for file_path in files_list: - with open(file_path, 'rb') as fp: + with open(file_path, "rb") as fp: data = fp.read() file_md5 = hashlib.md5(data).hexdigest() md5_list.append(file_md5) return md5_list + class TestDistributed(unittest.TestCase): - dst_path = './baseline_model' + dst_path = "./baseline_model" + @classmethod def setUpClass(cls): build_fake_ut() @@ -404,9 +412,12 @@ def setUpClass(cls): shutil.copytree("/tmp/.neural_compressor/inc_ut/resnet_v2/", os.getcwd(), dirs_exist_ok=True) if not os.path.exists(cls.dst_path): raise FileNotFoundError(f"'{cls.dst_path}' doesn't exist.") - elif dir_md5_check(cls.dst_path) != \ - ['65625fef42f44e6853d4d6d5e4188a49', 'a783396652bf62db3db4c9f647953175', - 'c7259753419d9fc053df5b2059aef8c0', '77f2a1045cffee9f6a43f2594a5627ba']: + elif dir_md5_check(cls.dst_path) != [ + "65625fef42f44e6853d4d6d5e4188a49", + "a783396652bf62db3db4c9f647953175", + "c7259753419d9fc053df5b2059aef8c0", + "77f2a1045cffee9f6a43f2594a5627ba", + ]: logger.warning("resnet_v2 baseline_model md5 verification failed.") raise ValueError(f"'{cls.dst_path}' md5 verification failed.") else: @@ -414,10 +425,10 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake_ut.py') - os.remove('fake_yaml.yaml') - shutil.rmtree('nc_workspace', ignore_errors=True) - shutil.rmtree('baseline_model', ignore_errors=True) + os.remove("fake_ut.py") + os.remove("fake_yaml.yaml") + shutil.rmtree("nc_workspace", ignore_errors=True) + shutil.rmtree("baseline_model", ignore_errors=True) def setUp(self): logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -426,19 +437,20 @@ def setUp(self): def tearDown(self): logger.info(f"{self._testMethodName} done.\n") - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") def test_tf_distributed_pruning(self): - distributed_cmd = 'horovodrun -np 2 python fake_ut.py' - p = subprocess.Popen(distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, shell=True) + distributed_cmd = "horovodrun -np 2 python fake_ut.py" + p = subprocess.Popen( + distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True + ) try: out, _ = p.communicate() for line in out.splitlines(): print(line.decode().strip()) - matches = re.findall(r'FAILED', out.decode('utf-8')) + matches = re.findall(r"FAILED", out.decode("utf-8")) self.assertEqual(matches, []) - matches = re.findall(r'OK', out.decode('utf-8')) + matches = re.findall(r"OK", out.decode("utf-8")) self.assertTrue(len(matches) > 0) except KeyboardInterrupt: os.killpg(os.getpgid(p.pid), signal.SIGKILL) diff --git a/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_pruning.py b/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_pruning.py index 0afdecd66c0..68169e1d4a9 100644 --- a/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_pruning.py +++ b/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_pruning.py @@ -1,22 +1,26 @@ """Tests for the TensorFlow pruning.""" from __future__ import print_function -import numpy as np + +import hashlib import os -import sys -import cpuinfo import shutil -import unittest -import hashlib +import sys import types +import unittest +from platform import platform, system + +import cpuinfo +import numpy as np import tensorflow as tf + +from neural_compressor.adaptor import FRAMEWORKS +from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 +from neural_compressor.conf.dotdict import DotDict from neural_compressor.experimental import Pruning, common +from neural_compressor.experimental.pruning import TfPruningCallback from neural_compressor.utils import logger -from neural_compressor.adaptor import FRAMEWORKS from neural_compressor.utils.create_obj_from_config import create_train_func -from neural_compressor.experimental.pruning import TfPruningCallback -from neural_compressor.conf.dotdict import DotDict -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 -from platform import platform, system + def build_fake_yaml(): fake_yaml = """ @@ -50,12 +54,14 @@ def build_fake_yaml(): metric: topk: 1 """ - with open('fake_yaml.yaml', 'w', encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def lr_schedule(epoch): """Learning Rate Schedule Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. + Called automatically every epoch as part of callbacks during training. # Arguments epoch (int): The number of epochs @@ -71,16 +77,13 @@ def lr_schedule(epoch): lr *= 1e-2 elif epoch > 80: lr *= 1e-1 - print('Learning rate: ', lr) + print("Learning rate: ", lr) return lr -def resnet_layer(inputs, - num_filters=8, - kernel_size=3, - strides=1, - activation='relu', - batch_normalization=True, - conv_first=True): + +def resnet_layer( + inputs, num_filters=8, kernel_size=3, strides=1, activation="relu", batch_normalization=True, conv_first=True +): """2D Convolution-Batch Normalization-Activation stack builder # Arguments inputs (tensor): input tensor from input image or previous layer @@ -92,15 +95,16 @@ def resnet_layer(inputs, conv_first (bool): conv-bn-activation (True) or bn-activation-conv (False) # Returns - x (tensor): tensor as input to the next layer - """ - conv = tf.keras.layers.Conv2D(num_filters, - kernel_size=kernel_size, - strides=strides, - padding='same', - use_bias=True, - kernel_initializer='he_normal', - kernel_regularizer=tf.keras.regularizers.l2(1e-4)) + x (tensor): tensor as input to the next layer.""" + conv = tf.keras.layers.Conv2D( + num_filters, + kernel_size=kernel_size, + strides=strides, + padding="same", + use_bias=True, + kernel_initializer="he_normal", + kernel_regularizer=tf.keras.regularizers.l2(1e-4), + ) x = inputs if conv_first: @@ -117,11 +121,13 @@ def resnet_layer(inputs, x = conv(x) return x + def resnet_v2(input_shape, depth, num_classes=10): """ResNet Version 2 Model builder [b] Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as bottleneck layer First shortcut connection per layer is 1 x 1 Conv2D. + Second and onwards shortcut connection is identity. At the beginning of each stage, the feature map size is halved (downsampled) by a convolutional layer with strides=2, while the number of filter maps is @@ -140,21 +146,19 @@ def resnet_v2(input_shape, depth, num_classes=10): model (Model): Keras model instance """ if (depth - 2) % 9 != 0: - raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') + raise ValueError("depth should be 9n+2 (eg 56 or 110 in [b])") # Start model definition. num_filters_in = 4 num_res_blocks = int((depth - 2) / 9) inputs = tf.keras.layers.Input(shape=input_shape) # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths - x = resnet_layer(inputs=inputs, - num_filters=num_filters_in, - conv_first=True) + x = resnet_layer(inputs=inputs, num_filters=num_filters_in, conv_first=True) # Instantiate the stack of residual units for stage in range(1): for res_block in range(num_res_blocks): - activation = 'relu' + activation = "relu" batch_normalization = True strides = 1 if stage == 0: @@ -165,33 +169,32 @@ def resnet_v2(input_shape, depth, num_classes=10): else: num_filters_out = num_filters_in * 2 if res_block == 0: # first layer but not first stage - strides = 2 # downsample + strides = 2 # downsample # bottleneck residual unit - y = resnet_layer(inputs=x, - num_filters=num_filters_in, - kernel_size=1, - strides=strides, - activation=activation, - batch_normalization=batch_normalization, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_in, - conv_first=False) - - y = resnet_layer(inputs=y, - num_filters=num_filters_out, - kernel_size=1, - conv_first=False) + y = resnet_layer( + inputs=x, + num_filters=num_filters_in, + kernel_size=1, + strides=strides, + activation=activation, + batch_normalization=batch_normalization, + conv_first=False, + ) + y = resnet_layer(inputs=y, num_filters=num_filters_in, conv_first=False) + + y = resnet_layer(inputs=y, num_filters=num_filters_out, kernel_size=1, conv_first=False) if res_block == 0: # linear projection residual shortcut connection to match # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters_out, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) + x = resnet_layer( + inputs=x, + num_filters=num_filters_out, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False, + ) x = tf.keras.layers.add([x, y]) num_filters_in = num_filters_out @@ -199,17 +202,16 @@ def resnet_v2(input_shape, depth, num_classes=10): # Add classifier on top. # v2 has BN-ReLU before Pooling # x = BatchNormalization()(x) - x = tf.keras.layers.Activation('relu')(x) + x = tf.keras.layers.Activation("relu")(x) x = tf.keras.layers.AveragePooling2D(pool_size=8)(x) y = tf.keras.layers.Flatten()(x) - outputs = tf.keras.layers.Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(y) + outputs = tf.keras.layers.Dense(num_classes, activation="softmax", kernel_initializer="he_normal")(y) # Instantiate model. model = tf.keras.models.Model(inputs=inputs, outputs=outputs) return model + # Training parameters batch_size = 128 # orig paper trained all networks with batch_size=128 epochs = 1 @@ -221,6 +223,7 @@ def resnet_v2(input_shape, depth, num_classes=10): n = 1 depth = n * 9 + 2 + def train(dst_path): # Load the CIFAR10 data. (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() @@ -228,8 +231,8 @@ def train(dst_path): # Input image dimensions. input_shape = x_train.shape[1:] # Normalize data. - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 x_train_mean = np.mean(x_train, axis=0) x_train -= x_train_mean @@ -241,37 +244,39 @@ def train(dst_path): model = resnet_v2(input_shape=input_shape, depth=depth) - model.compile(loss='categorical_crossentropy', - optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), - metrics=['accuracy']) + model.compile( + loss="categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), metrics=["accuracy"] + ) model.summary() lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lr_schedule) - lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(factor=np.sqrt(0.1), - cooldown=0, - patience=5, - min_lr=0.5e-6) + lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) callbacks = [lr_reducer, lr_scheduler] # Run training, with or without data augmentation. - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True, - callbacks=callbacks) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs, + validation_data=(x_test, y_test), + shuffle=True, + callbacks=callbacks, + ) # Score trained model. scores = model.evaluate(x_test, y_test, verbose=1) - print('Test loss:', scores[0]) - print('Test accuracy:', scores[1]) + print("Test loss:", scores[0]) + print("Test accuracy:", scores[1]) model.save(dst_path) + def dir_md5_check(dir): files_list = [] md5_list = [] + def get_files_list(path, list_name): for file in sorted(os.listdir(path)): file_path = os.path.join(path, file) @@ -279,20 +284,22 @@ def get_files_list(path, list_name): get_files_list(file_path, list_name) else: list_name.append(file_path) + get_files_list(dir, files_list) for file_path in files_list: - with open(file_path, 'rb') as fp: + with open(file_path, "rb") as fp: data = fp.read() file_md5 = hashlib.md5(data).hexdigest() md5_list.append(file_md5) return md5_list + class TrainDataset(object): def __init__(self): (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() x_train, y_train = x_train[:64], y_train[:64] - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 # If subtract pixel mean is enabled x_train_mean = np.mean(x_train, axis=0) @@ -313,12 +320,13 @@ def __len__(self): def __getitem__(self, idx): return self.train_images[idx], self.train_labels[idx] + class EvalDataset(object): def __init__(self): (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 # If subtract pixel mean is enabled x_train_mean = np.mean(x_train, axis=0) @@ -337,8 +345,10 @@ def __len__(self): def __getitem__(self, idx): return self.test_images[idx], self.test_labels[idx] + class TestTensorflowPruning(unittest.TestCase): - dst_path = './baseline_model' + dst_path = "./baseline_model" + @classmethod def setUpClass(cls): build_fake_yaml() @@ -349,9 +359,12 @@ def setUpClass(cls): if not os.path.exists(cls.dst_path): logger.warning("resnet_v2 baseline_model doesn't exist.") return unittest.skip("resnet_v2 baseline_model doesn't exist")(TestTensorflowPruning) - elif dir_md5_check(cls.dst_path) != \ - ['65625fef42f44e6853d4d6d5e4188a49', 'a783396652bf62db3db4c9f647953175', - 'c7259753419d9fc053df5b2059aef8c0', '77f2a1045cffee9f6a43f2594a5627ba']: + elif dir_md5_check(cls.dst_path) != [ + "65625fef42f44e6853d4d6d5e4188a49", + "a783396652bf62db3db4c9f647953175", + "c7259753419d9fc053df5b2059aef8c0", + "77f2a1045cffee9f6a43f2594a5627ba", + ]: logger.warning("resnet_v2 baseline_model md5 verification failed.") return unittest.skip("resnet_v2 baseline_model md5 verification failed.")(TestTensorflowPruning) else: @@ -359,9 +372,9 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake_yaml.yaml') - shutil.rmtree('nc_workspace',ignore_errors=True) - shutil.rmtree('baseline_model', ignore_errors=True) + os.remove("fake_yaml.yaml") + shutil.rmtree("nc_workspace", ignore_errors=True) + shutil.rmtree("baseline_model", ignore_errors=True) def setUp(self): logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -370,59 +383,89 @@ def setUp(self): def tearDown(self): logger.info(f"{self._testMethodName} done.\n") - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.3.0'), "Keras model need tensorflow version >= 2.3.0, so the case is skipped") + @unittest.skipIf( + version1_lt_version2(tf.version.VERSION, "2.3.0"), + "Keras model need tensorflow version >= 2.3.0, so the case is skipped", + ) def test_create_train_func1(self): - framework = 'tensorflow' - framework_specific_info = DotDict({'device': 'cpu', - 'random_seed': 1978, - 'workspace_path': './nc_workspace/', - 'q_dataloader': None, - 'inputs': [], - 'outputs': [], - 'format': 'default', - 'backend': 'default'}) + framework = "tensorflow" + framework_specific_info = DotDict( + { + "device": "cpu", + "random_seed": 1978, + "workspace_path": "./nc_workspace/", + "q_dataloader": None, + "inputs": [], + "outputs": [], + "format": "default", + "backend": "default", + } + ) adaptor = FRAMEWORKS[framework](framework_specific_info) dataloader = common.DataLoader(TrainDataset(), batch_size=32) - train_cfg = DotDict({'epoch': 1, - 'optimizer': {'AdamW': {'learning_rate': 0.001, 'weight_decay': 0.0001}}, - 'criterion': {'CrossEntropyLoss': {'reduction': 'sum_over_batch_size', 'from_logits': True}}, - 'execution_mode': 'eager', - 'start_epoch': 0}) + train_cfg = DotDict( + { + "epoch": 1, + "optimizer": {"AdamW": {"learning_rate": 0.001, "weight_decay": 0.0001}}, + "criterion": {"CrossEntropyLoss": {"reduction": "sum_over_batch_size", "from_logits": True}}, + "execution_mode": "eager", + "start_epoch": 0, + } + ) callbacks = TfPruningCallback hooks = {} pruning_func1 = create_train_func(framework, dataloader, adaptor, train_cfg, hooks, callbacks) self.assertTrue(isinstance(pruning_func1, types.FunctionType)) - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.3.0'), "Keras model need tensorflow version >= 2.3.0, so the case is skipped") + @unittest.skipIf( + version1_lt_version2(tf.version.VERSION, "2.3.0"), + "Keras model need tensorflow version >= 2.3.0, so the case is skipped", + ) def test_create_train_func2(self): - framework = 'tensorflow' - framework_specific_info = DotDict({'device': 'cpu', - 'random_seed': 1978, - 'workspace_path': './nc_workspace/', - 'q_dataloader': None, - 'inputs': [], - 'outputs': [], - 'format': 'default', - 'backend': 'default'}) + framework = "tensorflow" + framework_specific_info = DotDict( + { + "device": "cpu", + "random_seed": 1978, + "workspace_path": "./nc_workspace/", + "q_dataloader": None, + "inputs": [], + "outputs": [], + "format": "default", + "backend": "default", + } + ) adaptor = FRAMEWORKS[framework](framework_specific_info) dataloader = common.DataLoader(TrainDataset(), batch_size=32) - train_cfg = DotDict({'epoch': 1, - 'dataloader': {'distributed': False, 'batch_size': 32, - 'dataset': {'ImageRecord': {'root': './ImageNet'}}, - 'transform': {'ResizeCropImagenet': {'height': 224, - 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'last_batch': 'rollover', 'shuffle': False}, - 'postprocess': {'transform': {'LabelShift': 1}}, - 'optimizer': {'SGD': {'learning_rate': 0.0001, - 'momentum': 0.9, 'nesterov': True}}, - 'criterion': {'SparseCategoricalCrossentropy': {'reduction': 'sum_over_batch_size'}}, - 'execution_mode': 'eager', 'start_epoch': 0}) + train_cfg = DotDict( + { + "epoch": 1, + "dataloader": { + "distributed": False, + "batch_size": 32, + "dataset": {"ImageRecord": {"root": "./ImageNet"}}, + "transform": { + "ResizeCropImagenet": {"height": 224, "width": 224, "mean_value": [123.68, 116.78, 103.94]} + }, + "last_batch": "rollover", + "shuffle": False, + }, + "postprocess": {"transform": {"LabelShift": 1}}, + "optimizer": {"SGD": {"learning_rate": 0.0001, "momentum": 0.9, "nesterov": True}}, + "criterion": {"SparseCategoricalCrossentropy": {"reduction": "sum_over_batch_size"}}, + "execution_mode": "eager", + "start_epoch": 0, + } + ) pruning_func2 = create_train_func(framework, dataloader, adaptor, train_cfg) self.assertTrue(isinstance(pruning_func2, types.FunctionType)) - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.3.0'), "Keras model need tensorflow version >= 2.3.0, so the case is skipped") + @unittest.skipIf( + version1_lt_version2(tf.version.VERSION, "2.3.0"), + "Keras model need tensorflow version >= 2.3.0, so the case is skipped", + ) def test_tensorflow_pruning(self): prune = Pruning("./fake_yaml.yaml") prune.train_dataloader = common.DataLoader(TrainDataset(), batch_size=32) @@ -437,5 +480,5 @@ def test_tensorflow_pruning(self): self.assertGreater(prune.last_score, 0.73) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_pruning_utility.py b/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_pruning_utility.py index 21fff9e7bf2..00d9bb6acd7 100644 --- a/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_pruning_utility.py +++ b/test/pruning_with_tf/pruning_1.x_v1/test_tensorflow_pruning_utility.py @@ -1,9 +1,11 @@ -import unittest import shutil +import unittest + def train_func(): import tensorflow as tf from tensorflow import keras + # Load MNIST dataset mnist = keras.datasets.mnist (train_images, train_labels), (test_images, test_labels) = mnist.load_data() @@ -13,19 +15,20 @@ def train_func(): test_images = test_images / 255.0 # Define the model architecture. - model = keras.Sequential([ - keras.layers.InputLayer(input_shape=(28, 28)), - keras.layers.Reshape(target_shape=(28, 28, 1)), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - keras.layers.MaxPooling2D(pool_size=(2, 2)), - keras.layers.Flatten(), - keras.layers.Dense(10) - ]) + model = keras.Sequential( + [ + keras.layers.InputLayer(input_shape=(28, 28)), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10), + ] + ) # Train the digit classification model - model.compile(optimizer='adam', - loss=tf.keras.losses.SparseCategoricalCrossentropy( - from_logits=True), - metrics=['accuracy']) + model.compile( + optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"] + ) model.fit( train_images, @@ -36,6 +39,7 @@ def train_func(): model.save("baseline_model") + class TestTensorflowPruning(unittest.TestCase): @classmethod def setUpClass(self): @@ -43,10 +47,11 @@ def setUpClass(self): @classmethod def tearDownClass(self): - shutil.rmtree('baseline_model',ignore_errors=True) - + shutil.rmtree("baseline_model", ignore_errors=True) + def test_pruning_utility(self): from neural_compressor.model import Model + pruning_model = Model("baseline_model") all_weights_name = pruning_model.get_all_weight_names() df, sparsity = pruning_model.report_sparsity() @@ -54,5 +59,6 @@ def test_pruning_utility(self): self.assertEqual(df.empty, False) self.assertNotEqual(sparsity, None) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_tf/pruning_1.x_v2/test_tensorflow_pruning.py b/test/pruning_with_tf/pruning_1.x_v2/test_tensorflow_pruning.py index 12abc63e475..27afcdead65 100644 --- a/test/pruning_with_tf/pruning_1.x_v2/test_tensorflow_pruning.py +++ b/test/pruning_with_tf/pruning_1.x_v2/test_tensorflow_pruning.py @@ -1,22 +1,26 @@ """Tests for the TensorFlow pruning.""" from __future__ import print_function -import numpy as np + +import hashlib import os -import sys -import cpuinfo import shutil -import unittest -import hashlib +import sys import types +import unittest +from platform import platform, system + +import cpuinfo +import numpy as np import tensorflow as tf + +from neural_compressor.adaptor import FRAMEWORKS +from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 +from neural_compressor.conf.dotdict import DotDict from neural_compressor.experimental import Pruning, common +from neural_compressor.experimental.pruning_v2 import TfPruningCallback from neural_compressor.utils import logger -from neural_compressor.adaptor import FRAMEWORKS from neural_compressor.utils.create_obj_from_config import create_train_func -from neural_compressor.experimental.pruning_v2 import TfPruningCallback -from neural_compressor.conf.dotdict import DotDict -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 -from platform import platform, system + def build_fake_yaml(): fake_yaml = """ @@ -50,12 +54,14 @@ def build_fake_yaml(): metric: topk: 1 """ - with open('fake_yaml.yaml', 'w', encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def lr_schedule(epoch): """Learning Rate Schedule Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. + Called automatically every epoch as part of callbacks during training. # Arguments epoch (int): The number of epochs @@ -71,16 +77,13 @@ def lr_schedule(epoch): lr *= 1e-2 elif epoch > 80: lr *= 1e-1 - print('Learning rate: ', lr) + print("Learning rate: ", lr) return lr -def resnet_layer(inputs, - num_filters=8, - kernel_size=3, - strides=1, - activation='relu', - batch_normalization=True, - conv_first=True): + +def resnet_layer( + inputs, num_filters=8, kernel_size=3, strides=1, activation="relu", batch_normalization=True, conv_first=True +): """2D Convolution-Batch Normalization-Activation stack builder # Arguments inputs (tensor): input tensor from input image or previous layer @@ -92,15 +95,16 @@ def resnet_layer(inputs, conv_first (bool): conv-bn-activation (True) or bn-activation-conv (False) # Returns - x (tensor): tensor as input to the next layer - """ - conv = tf.keras.layers.Conv2D(num_filters, - kernel_size=kernel_size, - strides=strides, - padding='same', - use_bias=True, - kernel_initializer='he_normal', - kernel_regularizer=tf.keras.regularizers.l2(1e-4)) + x (tensor): tensor as input to the next layer.""" + conv = tf.keras.layers.Conv2D( + num_filters, + kernel_size=kernel_size, + strides=strides, + padding="same", + use_bias=True, + kernel_initializer="he_normal", + kernel_regularizer=tf.keras.regularizers.l2(1e-4), + ) x = inputs if conv_first: @@ -117,11 +121,13 @@ def resnet_layer(inputs, x = conv(x) return x + def resnet_v2(input_shape, depth, num_classes=10): """ResNet Version 2 Model builder [b] Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as bottleneck layer First shortcut connection per layer is 1 x 1 Conv2D. + Second and onwards shortcut connection is identity. At the beginning of each stage, the feature map size is halved (downsampled) by a convolutional layer with strides=2, while the number of filter maps is @@ -140,21 +146,19 @@ def resnet_v2(input_shape, depth, num_classes=10): model (Model): Keras model instance """ if (depth - 2) % 9 != 0: - raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') + raise ValueError("depth should be 9n+2 (eg 56 or 110 in [b])") # Start model definition. num_filters_in = 4 num_res_blocks = int((depth - 2) / 9) inputs = tf.keras.layers.Input(shape=input_shape) # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths - x = resnet_layer(inputs=inputs, - num_filters=num_filters_in, - conv_first=True) + x = resnet_layer(inputs=inputs, num_filters=num_filters_in, conv_first=True) # Instantiate the stack of residual units for stage in range(1): for res_block in range(num_res_blocks): - activation = 'relu' + activation = "relu" batch_normalization = True strides = 1 if stage == 0: @@ -165,33 +169,32 @@ def resnet_v2(input_shape, depth, num_classes=10): else: num_filters_out = num_filters_in * 2 if res_block == 0: # first layer but not first stage - strides = 2 # downsample + strides = 2 # downsample # bottleneck residual unit - y = resnet_layer(inputs=x, - num_filters=num_filters_in, - kernel_size=1, - strides=strides, - activation=activation, - batch_normalization=batch_normalization, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_in, - conv_first=False) - - y = resnet_layer(inputs=y, - num_filters=num_filters_out, - kernel_size=1, - conv_first=False) + y = resnet_layer( + inputs=x, + num_filters=num_filters_in, + kernel_size=1, + strides=strides, + activation=activation, + batch_normalization=batch_normalization, + conv_first=False, + ) + y = resnet_layer(inputs=y, num_filters=num_filters_in, conv_first=False) + + y = resnet_layer(inputs=y, num_filters=num_filters_out, kernel_size=1, conv_first=False) if res_block == 0: # linear projection residual shortcut connection to match # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters_out, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) + x = resnet_layer( + inputs=x, + num_filters=num_filters_out, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False, + ) x = tf.keras.layers.add([x, y]) num_filters_in = num_filters_out @@ -199,17 +202,16 @@ def resnet_v2(input_shape, depth, num_classes=10): # Add classifier on top. # v2 has BN-ReLU before Pooling # x = BatchNormalization()(x) - x = tf.keras.layers.Activation('relu')(x) + x = tf.keras.layers.Activation("relu")(x) x = tf.keras.layers.AveragePooling2D(pool_size=8)(x) y = tf.keras.layers.Flatten()(x) - outputs = tf.keras.layers.Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(y) + outputs = tf.keras.layers.Dense(num_classes, activation="softmax", kernel_initializer="he_normal")(y) # Instantiate model. model = tf.keras.models.Model(inputs=inputs, outputs=outputs) return model + # Training parameters batch_size = 128 # orig paper trained all networks with batch_size=128 epochs = 1 @@ -221,6 +223,7 @@ def resnet_v2(input_shape, depth, num_classes=10): n = 1 depth = n * 9 + 2 + def train(dst_path): # Load the CIFAR10 data. (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() @@ -228,8 +231,8 @@ def train(dst_path): # Input image dimensions. input_shape = x_train.shape[1:] # Normalize data. - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 x_train_mean = np.mean(x_train, axis=0) x_train -= x_train_mean @@ -241,37 +244,39 @@ def train(dst_path): model = resnet_v2(input_shape=input_shape, depth=depth) - model.compile(loss='categorical_crossentropy', - optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), - metrics=['accuracy']) + model.compile( + loss="categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), metrics=["accuracy"] + ) model.summary() lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lr_schedule) - lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(factor=np.sqrt(0.1), - cooldown=0, - patience=5, - min_lr=0.5e-6) + lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) callbacks = [lr_reducer, lr_scheduler] # Run training, with or without data augmentation. - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True, - callbacks=callbacks) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs, + validation_data=(x_test, y_test), + shuffle=True, + callbacks=callbacks, + ) # Score trained model. scores = model.evaluate(x_test, y_test, verbose=1) - print('Test loss:', scores[0]) - print('Test accuracy:', scores[1]) + print("Test loss:", scores[0]) + print("Test accuracy:", scores[1]) model.save(dst_path) + def dir_md5_check(dir): files_list = [] md5_list = [] + def get_files_list(path, list_name): for file in sorted(os.listdir(path)): file_path = os.path.join(path, file) @@ -279,20 +284,22 @@ def get_files_list(path, list_name): get_files_list(file_path, list_name) else: list_name.append(file_path) + get_files_list(dir, files_list) for file_path in files_list: - with open(file_path, 'rb') as fp: + with open(file_path, "rb") as fp: data = fp.read() file_md5 = hashlib.md5(data).hexdigest() md5_list.append(file_md5) return md5_list + class TrainDataset(object): def __init__(self): (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() x_train, y_train = x_train[:64], y_train[:64] - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 # If subtract pixel mean is enabled x_train_mean = np.mean(x_train, axis=0) @@ -313,12 +320,13 @@ def __len__(self): def __getitem__(self, idx): return self.train_images[idx], self.train_labels[idx] + class EvalDataset(object): def __init__(self): (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() - x_train = x_train.astype('float32') / 255 - x_test = x_test.astype('float32') / 255 + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 # If subtract pixel mean is enabled x_train_mean = np.mean(x_train, axis=0) @@ -337,8 +345,10 @@ def __len__(self): def __getitem__(self, idx): return self.test_images[idx], self.test_labels[idx] + class TestTensorflowPruning(unittest.TestCase): - dst_path = './baseline_model' + dst_path = "./baseline_model" + @classmethod def setUpClass(cls): build_fake_yaml() @@ -349,9 +359,12 @@ def setUpClass(cls): if not os.path.exists(cls.dst_path): logger.warning("resnet_v2 baseline_model doesn't exist.") return unittest.skip("resnet_v2 baseline_model doesn't exist")(TestTensorflowPruning) - elif dir_md5_check(cls.dst_path) != \ - ['65625fef42f44e6853d4d6d5e4188a49', 'a783396652bf62db3db4c9f647953175', - 'c7259753419d9fc053df5b2059aef8c0', '77f2a1045cffee9f6a43f2594a5627ba']: + elif dir_md5_check(cls.dst_path) != [ + "65625fef42f44e6853d4d6d5e4188a49", + "a783396652bf62db3db4c9f647953175", + "c7259753419d9fc053df5b2059aef8c0", + "77f2a1045cffee9f6a43f2594a5627ba", + ]: logger.warning("resnet_v2 baseline_model md5 verification failed.") return unittest.skip("resnet_v2 baseline_model md5 verification failed.")(TestTensorflowPruning) else: @@ -359,9 +372,9 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake_yaml.yaml') - shutil.rmtree('nc_workspace',ignore_errors=True) - shutil.rmtree('baseline_model', ignore_errors=True) + os.remove("fake_yaml.yaml") + shutil.rmtree("nc_workspace", ignore_errors=True) + shutil.rmtree("baseline_model", ignore_errors=True) def setUp(self): logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -370,59 +383,89 @@ def setUp(self): def tearDown(self): logger.info(f"{self._testMethodName} done.\n") - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.3.0'), "Keras model need tensorflow version >= 2.3.0, so the case is skipped") + @unittest.skipIf( + version1_lt_version2(tf.version.VERSION, "2.3.0"), + "Keras model need tensorflow version >= 2.3.0, so the case is skipped", + ) def test_create_train_func1(self): - framework = 'tensorflow' - framework_specific_info = DotDict({'device': 'cpu', - 'random_seed': 1978, - 'workspace_path': './nc_workspace/', - 'q_dataloader': None, - 'inputs': [], - 'outputs': [], - 'format': 'default', - 'backend': 'default'}) + framework = "tensorflow" + framework_specific_info = DotDict( + { + "device": "cpu", + "random_seed": 1978, + "workspace_path": "./nc_workspace/", + "q_dataloader": None, + "inputs": [], + "outputs": [], + "format": "default", + "backend": "default", + } + ) adaptor = FRAMEWORKS[framework](framework_specific_info) dataloader = common.DataLoader(TrainDataset(), batch_size=32) - train_cfg = DotDict({'epoch': 1, - 'optimizer': {'AdamW': {'learning_rate': 0.001, 'weight_decay': 0.0001}}, - 'criterion': {'CrossEntropyLoss': {'reduction': 'sum_over_batch_size', 'from_logits': True}}, - 'execution_mode': 'eager', - 'start_epoch': 0}) + train_cfg = DotDict( + { + "epoch": 1, + "optimizer": {"AdamW": {"learning_rate": 0.001, "weight_decay": 0.0001}}, + "criterion": {"CrossEntropyLoss": {"reduction": "sum_over_batch_size", "from_logits": True}}, + "execution_mode": "eager", + "start_epoch": 0, + } + ) callbacks = TfPruningCallback hooks = {} pruning_func1 = create_train_func(framework, dataloader, adaptor, train_cfg, hooks, callbacks) self.assertTrue(isinstance(pruning_func1, types.FunctionType)) - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.3.0'), "Keras model need tensorflow version >= 2.3.0, so the case is skipped") + @unittest.skipIf( + version1_lt_version2(tf.version.VERSION, "2.3.0"), + "Keras model need tensorflow version >= 2.3.0, so the case is skipped", + ) def test_create_train_func2(self): - framework = 'tensorflow' - framework_specific_info = DotDict({'device': 'cpu', - 'random_seed': 1978, - 'workspace_path': './nc_workspace/', - 'q_dataloader': None, - 'inputs': [], - 'outputs': [], - 'format': 'default', - 'backend': 'default'}) + framework = "tensorflow" + framework_specific_info = DotDict( + { + "device": "cpu", + "random_seed": 1978, + "workspace_path": "./nc_workspace/", + "q_dataloader": None, + "inputs": [], + "outputs": [], + "format": "default", + "backend": "default", + } + ) adaptor = FRAMEWORKS[framework](framework_specific_info) dataloader = common.DataLoader(TrainDataset(), batch_size=32) - train_cfg = DotDict({'epoch': 1, - 'dataloader': {'distributed': False, 'batch_size': 32, - 'dataset': {'ImageRecord': {'root': './ImageNet'}}, - 'transform': {'ResizeCropImagenet': {'height': 224, - 'width': 224, 'mean_value': [123.68, 116.78, 103.94]}}, - 'last_batch': 'rollover', 'shuffle': False}, - 'postprocess': {'transform': {'LabelShift': 1}}, - 'optimizer': {'SGD': {'learning_rate': 0.0001, - 'momentum': 0.9, 'nesterov': True}}, - 'criterion': {'SparseCategoricalCrossentropy': {'reduction': 'sum_over_batch_size'}}, - 'execution_mode': 'eager', 'start_epoch': 0}) + train_cfg = DotDict( + { + "epoch": 1, + "dataloader": { + "distributed": False, + "batch_size": 32, + "dataset": {"ImageRecord": {"root": "./ImageNet"}}, + "transform": { + "ResizeCropImagenet": {"height": 224, "width": 224, "mean_value": [123.68, 116.78, 103.94]} + }, + "last_batch": "rollover", + "shuffle": False, + }, + "postprocess": {"transform": {"LabelShift": 1}}, + "optimizer": {"SGD": {"learning_rate": 0.0001, "momentum": 0.9, "nesterov": True}}, + "criterion": {"SparseCategoricalCrossentropy": {"reduction": "sum_over_batch_size"}}, + "execution_mode": "eager", + "start_epoch": 0, + } + ) pruning_func2 = create_train_func(framework, dataloader, adaptor, train_cfg) self.assertTrue(isinstance(pruning_func2, types.FunctionType)) - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.3.0'), "Keras model need tensorflow version >= 2.3.0, so the case is skipped") + @unittest.skipIf( + version1_lt_version2(tf.version.VERSION, "2.3.0"), + "Keras model need tensorflow version >= 2.3.0, so the case is skipped", + ) def test_tensorflow_pruning(self): prune = Pruning("./fake_yaml.yaml") prune.train_dataloader = common.DataLoader(TrainDataset(), batch_size=32) @@ -437,5 +480,5 @@ def test_tensorflow_pruning(self): self.assertGreater(prune.last_score, 0.73) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/pruning_with_tf/pruning_2.x/test_pruning_keras.py b/test/pruning_with_tf/pruning_2.x/test_pruning_keras.py index 7aafaacffae..4f78ff380bc 100644 --- a/test/pruning_with_tf/pruning_2.x/test_pruning_keras.py +++ b/test/pruning_with_tf/pruning_2.x/test_pruning_keras.py @@ -1,59 +1,63 @@ import unittest -class TestPruning(unittest.TestCase): +class TestPruning(unittest.TestCase): def test_pruning_keras(self): import tensorflow as tf - from neural_compressor.data import Datasets + from neural_compressor import WeightPruningConfig - from neural_compressor.training import prepare_compression - from neural_compressor.data import DataLoader from neural_compressor.adaptor import FRAMEWORKS - from neural_compressor.conf.dotdict import DotDict - from neural_compressor.utils import create_obj_from_config - from neural_compressor.utils import logger from neural_compressor.conf.config import default_workspace + from neural_compressor.conf.dotdict import DotDict + from neural_compressor.data import DataLoader, Datasets + from neural_compressor.training import prepare_compression + from neural_compressor.utils import create_obj_from_config, logger + + model = tf.keras.applications.ResNet50V2(weights="imagenet") - model = tf.keras.applications.ResNet50V2(weights='imagenet') def train(model, adaptor, compression_manager, train_dataloader): train_cfg = { - 'epoch': 1, - 'start_epoch': 0, - 'execution_mode': 'eager', - 'criterion': {'SparseCategoricalCrossentropy': {'reduction': 'sum_over_batch_size'}}, - 'optimizer': {'SGD': {'learning_rate': 1e-03, 'momentum': 0.9, 'nesterov': True}}, + "epoch": 1, + "start_epoch": 0, + "execution_mode": "eager", + "criterion": {"SparseCategoricalCrossentropy": {"reduction": "sum_over_batch_size"}}, + "optimizer": {"SGD": {"learning_rate": 1e-03, "momentum": 0.9, "nesterov": True}}, } train_cfg = DotDict(train_cfg) train_func = create_obj_from_config.create_train_func( - 'tensorflow', \ - train_dataloader, \ - adaptor, \ - train_cfg, \ - hooks=compression_manager.callbacks.callbacks_list[0].hooks, \ - callbacks=compression_manager.callbacks.callbacks_list[0]) + "tensorflow", + train_dataloader, + adaptor, + train_cfg, + hooks=compression_manager.callbacks.callbacks_list[0].hooks, + callbacks=compression_manager.callbacks.callbacks_list[0], + ) train_func(model) - tf_datasets = Datasets('tensorflow') - dummy_dataset = tf_datasets['dummy'](shape=(100, 224, 224, 3), low=0., high=1., label=True) - train_dataloader = DataLoader(dataset=dummy_dataset, batch_size=32, - framework='tensorflow', distributed=False) + tf_datasets = Datasets("tensorflow") + dummy_dataset = tf_datasets["dummy"](shape=(100, 224, 224, 3), low=0.0, high=1.0, label=True) + train_dataloader = DataLoader(dataset=dummy_dataset, batch_size=32, framework="tensorflow", distributed=False) framework_specific_info = { - 'device': 'cpu', 'random_seed': 9527, - 'workspace_path': default_workspace, - 'q_dataloader': None, 'format': 'default', - 'backend': 'default', 'inputs': [], 'outputs': [] + "device": "cpu", + "random_seed": 9527, + "workspace_path": default_workspace, + "q_dataloader": None, + "format": "default", + "backend": "default", + "inputs": [], + "outputs": [], } - adaptor = FRAMEWORKS['keras'](framework_specific_info) + adaptor = FRAMEWORKS["keras"](framework_specific_info) configs = WeightPruningConfig( - backend='itex', - pruning_type='magnitude', - pattern='3x1', + backend="itex", + pruning_type="magnitude", + pattern="3x1", target_sparsity=0.5, start_step=1, end_step=10, - pruning_op_types=['Conv', 'Dense'] + pruning_op_types=["Conv", "Dense"], ) compression_manager = prepare_compression(model, confs=configs) compression_manager.callbacks.on_train_begin() diff --git a/test/quantization/test_quantization.py b/test/quantization/test_quantization.py index cc70c38678d..519b2030aa9 100644 --- a/test/quantization/test_quantization.py +++ b/test/quantization/test_quantization.py @@ -1,13 +1,15 @@ -"""Tests for neural_compressor quantization""" -import numpy as np -import unittest -import os -import yaml +"""Tests for neural_compressor quantization.""" import importlib +import os import shutil +import unittest + +import numpy as np +import yaml + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -25,14 +27,15 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -51,14 +54,15 @@ def build_fake_yaml2(): workspace: path: saved resume: ./saved/history.snapshot - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml3(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -77,14 +81,15 @@ def build_fake_yaml3(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml3.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml3.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml4(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -101,14 +106,15 @@ def build_fake_yaml4(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml4.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml4.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml5(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -125,17 +131,18 @@ def build_fake_yaml5(): accuracy_criterion: relative: 0.01 exit_policy: - max_trials: 10 + max_trials: 10 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml5.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml5.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml6(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -149,123 +156,135 @@ def build_fake_yaml6(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml6.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml6.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() def build_fake_model(): import tensorflow as tf from tensorflow.compat.v1 import graph_util + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def build_fake_strategy(): - with open(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'experimental/strategy/fake.py'), 'w', encoding='utf-8') as f: - seq = ["import time \n", - "import copy \n", - "import numpy as np \n", - "from collections import OrderedDict \n", - "from .strategy import strategy_registry, TuneStrategy \n", - "from ...utils import logger \n", - "from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler \n", - "from .utils.tuning_structs import OpTuningConfig \n", - "import copy \n", - "@strategy_registry \n", - "class FakeTuneStrategy(TuneStrategy): \n", - " def __init__(self, model, cfg, q_dataloader, q_func=None, eval_dataloader=None, \n", - " eval_func=None, dicts=None, q_hooks=None): \n", - " self.id = 0 \n", - " self.resume = True if dicts else False \n", - " super(FakeTuneStrategy, self).__init__(model, cfg, q_dataloader, \n", - " q_func, eval_dataloader, eval_func, dicts) \n", - " def __getstate__(self): \n", - " for history in self.tuning_history: \n", - " if self._same_yaml(history['cfg'], self.cfg): \n", - " history['id'] = self.id \n", - " save_dict = super(FakeTuneStrategy, self).__getstate__() \n", - " return save_dict \n", - " def next_tune_cfg(self): \n", - " if self.resume: \n", - " #assert self.id == 1 \n", - " assert len(self.tuning_history) == 1 \n", - " history = self.tuning_history[0] \n", - " assert self._same_yaml(history['cfg'], self.cfg) \n", - " assert len(history['history']) \n", - " for h in history['history']: \n", - " assert h \n", - " from copy import deepcopy \n", - " tuning_space = self.tuning_space \n", - " initial_op_tuning_cfg = {} \n", - " for item in tuning_space.root_item.options: \n", - " if item.item_type == 'op': \n", - " op_name, op_type = item.name \n", - " initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) \n", - " calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options \n", - " for calib_sampling_size in calib_sampling_size_lst: \n", - " # step1. collect the ops that support static and dynamic \n", - " quant_mode_wise_items = OrderedDict() \n", - " query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] \n", - " pre_items = set() \n", - " for quant_mode in query_order: \n", - " items = tuning_space.query_items_by_quant_mode(quant_mode) \n", - " filtered_items = [item for item in items if item not in pre_items] \n", - " pre_items = pre_items.union(set(items)) \n", - " quant_mode_wise_items[quant_mode] = filtered_items \n", - " def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): \n", - " for item in items_lst: \n", - " op_item_dtype_dict[item.name] = target_quant_mode \n", - " op_item_dtype_dict = OrderedDict() \n", - " for quant_mode, quant_mode_items in quant_mode_wise_items.items(): \n", - " initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) \n", - " # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) \n", - " early_stop_tuning = False \n", - " stage1_cnt = 0 \n", - " int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] \n", - " stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value \n", - " op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], \n", - " op_item_dtype_dict, initial_op_tuning_cfg) \n", - " for op_tuning_cfg in op_wise_tuning_sampler: \n", - " stage1_cnt += 1 \n", - " if early_stop_tuning and stage1_cnt > stage1_max: \n", - " logger.info('Early stopping the stage 1.') \n", - " break \n", - " op_tuning_cfg['calib_sampling_size'] = calib_sampling_size \n", - " self.id += 1 \n", - " yield op_tuning_cfg \n", + with open( + os.path.join( + os.path.dirname(importlib.util.find_spec("neural_compressor").origin), "experimental/strategy/fake.py" + ), + "w", + encoding="utf-8", + ) as f: + seq = [ + "import time \n", + "import copy \n", + "import numpy as np \n", + "from collections import OrderedDict \n", + "from .strategy import strategy_registry, TuneStrategy \n", + "from ...utils import logger \n", + "from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler \n", + "from .utils.tuning_structs import OpTuningConfig \n", + "import copy \n", + "@strategy_registry \n", + "class FakeTuneStrategy(TuneStrategy): \n", + " def __init__(self, model, cfg, q_dataloader, q_func=None, eval_dataloader=None, \n", + " eval_func=None, dicts=None, q_hooks=None): \n", + " self.id = 0 \n", + " self.resume = True if dicts else False \n", + " super(FakeTuneStrategy, self).__init__(model, cfg, q_dataloader, \n", + " q_func, eval_dataloader, eval_func, dicts) \n", + " def __getstate__(self): \n", + " for history in self.tuning_history: \n", + " if self._same_yaml(history['cfg'], self.cfg): \n", + " history['id'] = self.id \n", + " save_dict = super(FakeTuneStrategy, self).__getstate__() \n", + " return save_dict \n", + " def next_tune_cfg(self): \n", + " if self.resume: \n", + " #assert self.id == 1 \n", + " assert len(self.tuning_history) == 1 \n", + " history = self.tuning_history[0] \n", + " assert self._same_yaml(history['cfg'], self.cfg) \n", + " assert len(history['history']) \n", + " for h in history['history']: \n", + " assert h \n", + " from copy import deepcopy \n", + " tuning_space = self.tuning_space \n", + " initial_op_tuning_cfg = {} \n", + " for item in tuning_space.root_item.options: \n", + " if item.item_type == 'op': \n", + " op_name, op_type = item.name \n", + " initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) \n", + " calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options \n", + " for calib_sampling_size in calib_sampling_size_lst: \n", + " # step1. collect the ops that support static and dynamic \n", + " quant_mode_wise_items = OrderedDict() \n", + " query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] \n", + " pre_items = set() \n", + " for quant_mode in query_order: \n", + " items = tuning_space.query_items_by_quant_mode(quant_mode) \n", + " filtered_items = [item for item in items if item not in pre_items] \n", + " pre_items = pre_items.union(set(items)) \n", + " quant_mode_wise_items[quant_mode] = filtered_items \n", + " def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): \n", + " for item in items_lst: \n", + " op_item_dtype_dict[item.name] = target_quant_mode \n", + " op_item_dtype_dict = OrderedDict() \n", + " for quant_mode, quant_mode_items in quant_mode_wise_items.items(): \n", + " initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) \n", + " # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) \n", + " early_stop_tuning = False \n", + " stage1_cnt = 0 \n", + " int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] \n", + " stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value \n", + " op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], \n", + " op_item_dtype_dict, initial_op_tuning_cfg) \n", + " for op_tuning_cfg in op_wise_tuning_sampler: \n", + " stage1_cnt += 1 \n", + " if early_stop_tuning and stage1_cnt > stage1_max: \n", + " logger.info('Early stopping the stage 1.') \n", + " break \n", + " op_tuning_cfg['calib_sampling_size'] = calib_sampling_size \n", + " self.id += 1 \n", + " yield op_tuning_cfg \n", ] f.writelines(seq) f.close() + class Metric: def update(self, predict, label): pass @@ -276,6 +295,7 @@ def reset(self): def result(self): return 0.5 + class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): @@ -290,18 +310,23 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - os.remove('fake_yaml3.yaml') - os.remove('fake_yaml4.yaml') - os.remove('fake_yaml5.yaml') - os.remove('fake_yaml6.yaml') - os.remove(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'experimental/strategy/fake.py')) - shutil.rmtree('./saved', ignore_errors=True) + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") + os.remove("fake_yaml3.yaml") + os.remove("fake_yaml4.yaml") + os.remove("fake_yaml5.yaml") + os.remove("fake_yaml6.yaml") + os.remove( + os.path.join( + os.path.dirname(importlib.util.find_spec("neural_compressor").origin), "experimental/strategy/fake.py" + ) + ) + shutil.rmtree("./saved", ignore_errors=True) def test_resume(self): import tensorflow as tf from tensorflow.compat.v1 import graph_util + tf.compat.v1.disable_eager_execution() tf.compat.v1.reset_default_graph() tf.compat.v1.set_random_seed(1) @@ -309,42 +334,44 @@ def test_resume(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 3], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 3], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml5.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 32, 32, 3), label=True) + + quantizer = Quantization("fake_yaml5.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 32, 32, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() self.assertNotEqual(output_graph, None) self.assertTrue(os.path.exists("./saved")) - quantizer = Quantization('fake_yaml2.yaml') + quantizer = Quantization("fake_yaml2.yaml") quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() - #self.assertNotEqual(output_graph, None) # disable this check, the code has bug of recover from resume + # self.assertNotEqual(output_graph, None) # disable this check, the code has bug of recover from resume def test_autodump(self): # test auto_dump using old api from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml3.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml3.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -353,8 +380,9 @@ def test_autodump(self): def test_performance_only(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml4.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml4.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -363,8 +391,9 @@ def test_performance_only(self): def test_fit_method(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml4.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml4.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -373,9 +402,10 @@ def test_fit_method(self): def test_quantization_without_yaml(self): from neural_compressor.experimental import Quantization, common + quantizer = Quantization() quantizer.model = self.constant_graph - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) output_graph = quantizer.fit() @@ -383,28 +413,32 @@ def test_quantization_without_yaml(self): def test_invalid_eval_func(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph + def invalid_eval_func(model): - return [[1.]] + return [[1.0]] + quantizer.eval_func = invalid_eval_func output_graph = quantizer.fit() self.assertEqual(output_graph, None) def invalid_eval_func(model): - return '0.1' + return "0.1" + quantizer.eval_func = invalid_eval_func output_graph = quantizer.fit() self.assertEqual(output_graph, None) - def test_custom_metric(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml6.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml6.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -413,21 +447,27 @@ def test_custom_metric(self): self.assertEqual(quantizer.strategy.evaluation_result[0], 0.5) def test_custom_objective(self): + import tracemalloc + from neural_compressor.experimental import Quantization, common from neural_compressor.objective import Objective, objective_registry - import tracemalloc + class MyObjective(Objective): - representation = 'MyObj' - def __init__(self): - super().__init__() - def start(self): - tracemalloc.start() - def end(self): - _, peak = tracemalloc.get_traced_memory() - tracemalloc.stop() - self._result_list.append(peak // 1048576) - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + representation = "MyObj" + + def __init__(self): + super().__init__() + + def start(self): + tracemalloc.start() + + def end(self): + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + self._result_list.append(peak // 1048576) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -436,31 +476,40 @@ def end(self): self.assertNotEqual(output_graph, None) class MyObjective(Objective): - representation = 'Accuracy' - def __init__(self): - super().__init__() - def start(self): - tracemalloc.start() - def end(self): - _, peak = tracemalloc.get_traced_memory() - tracemalloc.stop() - self._result_list.append(peak // 1048576) + representation = "Accuracy" + + def __init__(self): + super().__init__() + + def start(self): + tracemalloc.start() + + def end(self): + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + self._result_list.append(peak // 1048576) + quantizer = Quantization() with self.assertRaises(ValueError): quantizer.objective = MyObjective() with self.assertRaises(ValueError): + @objective_registry class MyObjective(Objective): - representation = 'Accuracy' - def __init__(self): - super().__init__() - def start(self): - tracemalloc.start() - def end(self): - _, peak = tracemalloc.get_traced_memory() - tracemalloc.stop() - self._result_list.append(peak // 1048576) - + representation = "Accuracy" + + def __init__(self): + super().__init__() + + def start(self): + tracemalloc.start() + + def end(self): + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + self._result_list.append(peak // 1048576) + + if __name__ == "__main__": unittest.main() diff --git a/test/quantization/test_tensorflow_qat.py b/test/quantization/test_tensorflow_qat.py index ab12c9546d2..be8a4f4c08b 100644 --- a/test/quantization/test_tensorflow_qat.py +++ b/test/quantization/test_tensorflow_qat.py @@ -1,9 +1,11 @@ import shutil import unittest + import numpy as np import tensorflow as tf from pkg_resources import parse_version + def train_func(): # Load MNIST dataset mnist = tf.keras.datasets.mnist @@ -14,19 +16,20 @@ def train_func(): test_images = test_images / 255.0 # Define the model architecture. - model = tf.keras.Sequential([ - tf.keras.layers.InputLayer(input_shape=(28, 28)), - tf.keras.layers.Reshape(target_shape=(28, 28, 1)), - tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(10) - ]) + model = tf.keras.Sequential( + [ + tf.keras.layers.InputLayer(input_shape=(28, 28)), + tf.keras.layers.Reshape(target_shape=(28, 28, 1)), + tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(10), + ] + ) # Train the digit classification model - model.compile(optimizer='adam', - loss=tf.keras.losses.SparseCategoricalCrossentropy( - from_logits=True), - metrics=['accuracy']) + model.compile( + optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"] + ) model.fit( train_images, @@ -35,10 +38,9 @@ def train_func(): validation_split=0.1, ) - _, baseline_model_accuracy = model.evaluate( - test_images, test_labels, verbose=0) + _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0) - print('Baseline test accuracy:', baseline_model_accuracy) + print("Baseline test accuracy:", baseline_model_accuracy) model.save("baseline_model") @@ -67,10 +69,10 @@ def setUpClass(self): @classmethod def tearDownClass(self): - shutil.rmtree('baseline_model',ignore_errors=True) - shutil.rmtree('trained_qat_model',ignore_errors=True) + shutil.rmtree("baseline_model", ignore_errors=True) + shutil.rmtree("trained_qat_model", ignore_errors=True) - @unittest.skipIf(parse_version(tf.version.VERSION) < parse_version('2.3.0'), "version check") + @unittest.skipIf(parse_version(tf.version.VERSION) < parse_version("2.3.0"), "version check") def test_qat(self): mnist = tf.keras.datasets.mnist (train_images, train_labels), (test_images, test_labels) = mnist.load_data() @@ -79,50 +81,56 @@ def test_qat(self): train_images = train_images / 255.0 test_images = test_images / 255.0 - from neural_compressor import training, QuantizationAwareTrainingConfig + from neural_compressor import QuantizationAwareTrainingConfig, training + config = QuantizationAwareTrainingConfig() - compression_manager = training.prepare_compression('./baseline_model', config) + compression_manager = training.prepare_compression("./baseline_model", config) compression_manager.callbacks.on_train_begin() q_aware_model = compression_manager.model.model # `quantize_model` requires a recompile. - q_aware_model.compile(optimizer='adam', - loss=tf.keras.losses.SparseCategoricalCrossentropy( - from_logits=True), - metrics=['accuracy']) + q_aware_model.compile( + optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"] + ) train_images_subset = train_images[0:1000] # out of 60000 train_labels_subset = train_labels[0:1000] - q_aware_model.fit(train_images_subset, train_labels_subset, - batch_size=500, epochs=1, validation_split=0.1) + q_aware_model.fit(train_images_subset, train_labels_subset, batch_size=500, epochs=1, validation_split=0.1) - _, q_aware_model_accuracy = q_aware_model.evaluate( - test_images, test_labels, verbose=0) + _, q_aware_model_accuracy = q_aware_model.evaluate(test_images, test_labels, verbose=0) - print('Quant test accuracy:', q_aware_model_accuracy) + print("Quant test accuracy:", q_aware_model_accuracy) compression_manager.callbacks.on_train_end() compression_manager.save("trained_qat_model") def test_quantize_recipe(self): from neural_compressor.adaptor.tf_utils.quantize_graph.qat.quantize_config import global_config - from neural_compressor.adaptor.tf_utils.quantize_graph.qat.quantize_helper import init_quantize_config, qat_clone_function - model = tf.keras.Sequential([ - tf.keras.layers.InputLayer(input_shape=(28, 28)), - tf.keras.layers.Reshape(target_shape=(28, 28, 1)), - tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(10) - ]) + from neural_compressor.adaptor.tf_utils.quantize_graph.qat.quantize_helper import ( + init_quantize_config, + qat_clone_function, + ) + + model = tf.keras.Sequential( + [ + tf.keras.layers.InputLayer(input_shape=(28, 28)), + tf.keras.layers.Reshape(target_shape=(28, 28, 1)), + tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(10), + ] + ) print("\n") print("The layer information of this fp32 mnist model:") model.summary() # custom setting to decide which layer to be quantized - quantize_recipe = {'conv2d_1': {'quantize': False}, - 'max_pooling2d_1': {'quantize': True},} + quantize_recipe = { + "conv2d_1": {"quantize": False}, + "max_pooling2d_1": {"quantize": True}, + } init_quantize_config(model, quantize_recipe) q_model = tf.keras.models.clone_model(model, input_tensors=None, clone_function=qat_clone_function) global_config.clear() @@ -130,15 +138,22 @@ def test_quantize_recipe(self): print("\n") print("The mnist model after applying QAT:") q_model.summary() - assert q_model.layers[1].name == 'conv2d_1', "The Conv2D layer is incorrectly quantized, the quantize_recipe is ignored !" + assert ( + q_model.layers[1].name == "conv2d_1" + ), "The Conv2D layer is incorrectly quantized, the quantize_recipe is ignored !" def test_quantize_wrapper(self): from neural_compressor.adaptor.tf_utils.quantize_graph.qat.quantize_config import global_config - from neural_compressor.adaptor.tf_utils.quantize_graph.qat.quantize_helper import init_quantize_config, qat_clone_function + from neural_compressor.adaptor.tf_utils.quantize_graph.qat.quantize_helper import ( + init_quantize_config, + qat_clone_function, + ) input_shape = (28, 28, 3) inputs = tf.keras.layers.Input(shape=input_shape) - x = tf.keras.layers.DepthwiseConv2D(kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(inputs) + x = tf.keras.layers.DepthwiseConv2D(kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu")( + inputs + ) x = tf.keras.layers.Add()([inputs, x]) model = tf.keras.models.Model(inputs=inputs, outputs=x) @@ -147,14 +162,19 @@ def test_quantize_wrapper(self): global_config.clear() depthwise_conv2D_layer = q_model.layers[1] - assert depthwise_conv2D_layer.name == "quant_depthwise_conv2d", "The DepthwiseConv2D layer is not quantized as expected." + assert ( + depthwise_conv2D_layer.name == "quant_depthwise_conv2d" + ), "The DepthwiseConv2D layer is not quantized as expected." depthwise_conv2D_layer.trainable = False - assert depthwise_conv2D_layer.trainable == False, "The trainable attribute of this layer can not be correctly set." + assert ( + depthwise_conv2D_layer.trainable is False + ), "The trainable attribute of this layer can not be correctly set." input_data = np.random.rand(1, 28, 28, 3) training = tf.keras.backend.learning_phase() output = depthwise_conv2D_layer(input_data, training=training) assert output is not None, "The layer can not be correctly inferenced." -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/quantization/test_tensorflow_recipe.py b/test/quantization/test_tensorflow_recipe.py index 3d1c0e22d54..5498126672d 100644 --- a/test/quantization/test_tensorflow_recipe.py +++ b/test/quantization/test_tensorflow_recipe.py @@ -1,16 +1,18 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import yaml -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random + + def build_fake_yaml_disable_first_quantization(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -39,15 +41,15 @@ def build_fake_yaml_disable_first_quantization(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_disable_first_quantization.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_disable_first_quantization.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml_enable_first_quantization(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -76,15 +78,15 @@ def build_fake_yaml_enable_first_quantization(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_enable_first_quantization.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_enable_first_quantization.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml_disable_scale_propagation(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -113,15 +115,15 @@ def build_fake_yaml_disable_scale_propagation(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_disable_scale_propagation.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_disable_scale_propagation.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml_enable_scale_propagation(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -150,15 +152,15 @@ def build_fake_yaml_enable_scale_propagation(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_enable_scale_propagation.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_enable_scale_propagation.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml_enable_scale_unification(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -187,15 +189,15 @@ def build_fake_yaml_enable_scale_unification(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_enable_scale_unification.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_enable_scale_unification.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml_disable_scale_unification(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -224,12 +226,13 @@ def build_fake_yaml_disable_scale_unification(): performance_only: False workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_disable_scale_unification.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_disable_scale_unification.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class TestTensorflowInt8Recipe(unittest.TestCase): @classmethod def setUpClass(self): @@ -242,12 +245,12 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml_disable_first_quantization.yaml') - os.remove('fake_yaml_enable_first_quantization.yaml') - os.remove('fake_yaml_disable_scale_propagation.yaml') - os.remove('fake_yaml_enable_scale_propagation.yaml') - os.remove('fake_yaml_disable_scale_unification.yaml') - os.remove('fake_yaml_enable_scale_unification.yaml') + os.remove("fake_yaml_disable_first_quantization.yaml") + os.remove("fake_yaml_enable_first_quantization.yaml") + os.remove("fake_yaml_disable_scale_propagation.yaml") + os.remove("fake_yaml_enable_scale_propagation.yaml") + os.remove("fake_yaml_disable_scale_unification.yaml") + os.remove("fake_yaml_enable_scale_unification.yaml") @disable_random() def test_disable_first_quantization(self): @@ -255,26 +258,26 @@ def test_disable_first_quantization(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_disable_first_quantization.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_disable_first_quantization.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -283,9 +286,9 @@ def test_disable_first_quantization(self): found_fp32_conv = False for i in output_graph.graph_def.node: - if i.op == 'Conv2D': - found_fp32_conv = True - break + if i.op == "Conv2D": + found_fp32_conv = True + break self.assertEqual(found_fp32_conv, True) @@ -295,26 +298,26 @@ def test_enable_first_quantization(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_enable_first_quantization.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_enable_first_quantization.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -323,42 +326,41 @@ def test_enable_first_quantization(self): found_fp32_conv = False for i in output_graph.graph_def.node: - if i.op == 'Conv2D': - found_fp32_conv = True - break + if i.op == "Conv2D": + found_fp32_conv = True + break self.assertEqual(found_fp32_conv, False) @disable_random() def test_enable_scale_propagation(self): x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [1], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [1], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) pool = tf.nn.avg_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) x = tf.nn.relu(conv_bias) - final_node = tf.nn.relu(x, name='op_to_store') + final_node = tf.nn.relu(x, name="op_to_store") - out_name = final_node.name.split(':')[0] + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_enable_scale_propagation.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) + quantizer = Quantization("fake_yaml_enable_scale_propagation.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -366,41 +368,40 @@ def test_enable_scale_propagation(self): max_freezed_out = [] for i in output_graph.graph_def.node: - if i.op == 'QuantizedConv2DWithBiasAndReluAndRequantize': - max_freezed_out.append(i.input[-1]) + if i.op == "QuantizedConv2DWithBiasAndReluAndRequantize": + max_freezed_out.append(i.input[-1]) self.assertEqual(1, len(set(max_freezed_out))) @disable_random() def test_disable_scale_propagation(self): x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [1], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [1], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) pool = tf.nn.avg_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) x = tf.nn.relu(conv_bias) - final_node = tf.nn.relu(x, name='op_to_store') + final_node = tf.nn.relu(x, name="op_to_store") - out_name = final_node.name.split(':')[0] + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_disable_scale_propagation.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) + quantizer = Quantization("fake_yaml_disable_scale_propagation.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -408,42 +409,40 @@ def test_disable_scale_propagation(self): max_freezed_out = [] for i in output_graph.graph_def.node: - if i.op == 'QuantizedConv2DWithBiasAndReluAndRequantize': - max_freezed_out.append(i.input[-1]) + if i.op == "QuantizedConv2DWithBiasAndReluAndRequantize": + max_freezed_out.append(i.input[-1]) self.assertEqual(2, len(set(max_freezed_out))) @disable_random() def test_enable_scale_unification(self): x = tf.compat.v1.placeholder(tf.float32, [1, 128, 128, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) sqrt = tf.math.sqrt(x) relu_sqrt = tf.nn.relu(sqrt) - conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[ - 1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) relu1 = tf.nn.relu(conv_bias) concat = tf.concat([relu, relu1], 1) - final_node = tf.nn.relu(concat, name='op_to_store') - out_name = final_node.name.split(':')[0] + final_node = tf.nn.relu(concat, name="op_to_store") + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_enable_scale_unification.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 128, 16), label=True) + quantizer = Quantization("fake_yaml_enable_scale_unification.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 128, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -451,42 +450,40 @@ def test_enable_scale_unification(self): max_freezed_out = [] for i in output_graph.graph_def.node: - if i.op == 'QuantizedConv2DWithBiasAndReluAndRequantize': - max_freezed_out.append(i.input[-1]) + if i.op == "QuantizedConv2DWithBiasAndReluAndRequantize": + max_freezed_out.append(i.input[-1]) self.assertEqual(1, len(set(max_freezed_out))) @disable_random() def test_disable_scale_unification(self): x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [1], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [1], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) sqrt = tf.math.sqrt(x) relu_sqrt = tf.nn.relu(sqrt) - conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[ - 1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) relu1 = tf.nn.relu(conv_bias) concat = tf.concat([relu, relu1], 1) - final_node = tf.nn.relu(concat, name='op_to_store') - out_name = final_node.name.split(':')[0] + final_node = tf.nn.relu(concat, name="op_to_store") + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_disable_scale_unification.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) + quantizer = Quantization("fake_yaml_disable_scale_unification.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -494,9 +491,10 @@ def test_disable_scale_unification(self): max_freezed_out = [] for i in output_graph.graph_def.node: - if i.op == 'QuantizedConv2DWithBiasAndReluAndRequantize': - max_freezed_out.append(i.input[-1]) + if i.op == "QuantizedConv2DWithBiasAndReluAndRequantize": + max_freezed_out.append(i.input[-1]) self.assertEqual(2, len(set(max_freezed_out))) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/quantization/test_tensorflow_recover.py b/test/quantization/test_tensorflow_recover.py index effb09f6083..ad1d84f5962 100644 --- a/test/quantization/test_tensorflow_recover.py +++ b/test/quantization/test_tensorflow_recover.py @@ -1,23 +1,25 @@ # # -*- coding: utf-8 -*- # +import logging +import os import shutil import unittest -import os -import yaml -from neural_compressor.adaptor.tf_utils.util import disable_random import tensorflow as tf -from tensorflow.python.platform import gfile +import yaml from tensorflow.compat.v1 import graph_util from tensorflow.python.framework import tensor_util +from tensorflow.python.platform import gfile + +from neural_compressor.adaptor.tf_utils.util import disable_random -import logging logger = logging.getLogger("neural_compressor") logger.setLevel(logging.DEBUG) + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -33,14 +35,15 @@ def build_fake_yaml(): relative: 0.0001 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_yaml_2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -58,12 +61,13 @@ def build_fake_yaml_2(): relative: 0.0001 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_2.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_2.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class TestTensorflowRecover(unittest.TestCase): @classmethod def setUpClass(self): @@ -71,51 +75,51 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('test.pb') - shutil.rmtree('./saved', ignore_errors=True) + os.remove("fake_yaml.yaml") + os.remove("test.pb") + shutil.rmtree("./saved", ignore_errors=True) @disable_random() def test_tensorflow_recover(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) constant_graph = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) - with gfile.GFile('./test.pb', "wb") as f: + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) + with gfile.GFile("./test.pb", "wb") as f: f.write(constant_graph.SerializeToString()) from neural_compressor.experimental import Quantization, common + quantizer = Quantization("./fake_yaml.yaml") - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = constant_graph q_model = quantizer.fit() from neural_compressor.utils.utility import recover - recover_model = recover('./test.pb', './saved/history.snapshot', 0) + + recover_model = recover("./test.pb", "./saved/history.snapshot", 0) q_model_const_value = {} for node in q_model.graph_def.node: @@ -129,19 +133,20 @@ def test_tensorflow_recover(self): if node.name in q_model_const_value: self.assertEqual(tensor_value, q_model_const_value[node.name]) + class TestTensorflowRecoverForceBF16(unittest.TestCase): @classmethod def setUpClass(self): - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" build_fake_yaml_2() @classmethod def tearDownClass(self): - del os.environ['FORCE_BF16'] - os.remove('fake_yaml_2.yaml') - if os.path.exists('./test.pb'): - os.remove('test.pb') - shutil.rmtree('./saved', ignore_errors=True) + del os.environ["FORCE_BF16"] + os.remove("fake_yaml_2.yaml") + if os.path.exists("./test.pb"): + os.remove("test.pb") + shutil.rmtree("./saved", ignore_errors=True) @disable_random() @unittest.skipIf(tf.__version__ < "2.0", "currently bf16 converter only support tf > 2.0") @@ -150,44 +155,45 @@ def test_tensorflow_recover_bf16(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_weights_2 = tf.compat.v1.get_variable("weight_2", [3, 8, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_weights_2 = tf.compat.v1.get_variable( + "weight_2", [3, 8, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) max_pool = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[ - 1, 2, 2, 1], padding="VALID", name='conv1_3') + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv_1 = tf.nn.conv2d(max_pool, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID", name="conv1_3") conv_bias = tf.math.add(conv_1, conv_bias) - relu6 = tf.nn.relu6(conv_bias, name='op_to_store') + relu6 = tf.nn.relu6(conv_bias, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] def eval(model): return 0.5 - + with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) constant_graph = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) - with gfile.GFile('./test.pb', "wb") as f: + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) + with gfile.GFile("./test.pb", "wb") as f: f.write(constant_graph.SerializeToString()) from neural_compressor.experimental import MixedPrecision - convert = MixedPrecision('./fake_yaml_2.yaml') + + convert = MixedPrecision("./fake_yaml_2.yaml") convert.model = constant_graph convert.eval_func = eval output_model = convert.fit() found_cast_op = False from neural_compressor.utils.utility import recover - recover_model = recover('./test.pb', './saved/history.snapshot', 0) + + recover_model = recover("./test.pb", "./saved/history.snapshot", 0) q_model_const_value = {} for node in output_model.graph_def.node: @@ -196,7 +202,7 @@ def eval(model): if not tensor_value.shape: q_model_const_value[node.name] = tensor_value for node in recover_model.graph_def.node: - if node.op == 'Cast': + if node.op == "Cast": found_cast_op = True continue if node.op == "Const": diff --git a/test/quantization/test_weight_only_quantization.py b/test/quantization/test_weight_only_quantization.py index 8d5a2f32271..9ceb410ea44 100644 --- a/test/quantization/test_weight_only_quantization.py +++ b/test/quantization/test_weight_only_quantization.py @@ -1,15 +1,15 @@ import sys + sys.path.append("./") -import unittest import copy +import unittest + import torch +import transformers -from neural_compressor.adaptor.torch_utils.weight_only import ( - rtn_quantize, awq_quantize, gptq_quantize, teq_quantize -) -from neural_compressor.adaptor.torch_utils.smooth_quant import GraphTrace from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear -import transformers +from neural_compressor.adaptor.torch_utils.smooth_quant import GraphTrace +from neural_compressor.adaptor.torch_utils.weight_only import awq_quantize, gptq_quantize, rtn_quantize, teq_quantize class Model(torch.nn.Module): @@ -24,7 +24,7 @@ def forward(self, x): return out -class SimpleDataLoader(): +class SimpleDataLoader: def __init__(self): self.batch_size = 1 self.input = torch.randn([1, 32]) @@ -32,6 +32,7 @@ def __init__(self): def __iter__(self): yield self.input + class TestAWQWeightOnlyQuant(unittest.TestCase): @classmethod def setUpClass(self): @@ -39,13 +40,13 @@ def setUpClass(self): self.dataloader = SimpleDataLoader() self.example_inputs = torch.randn([1, 32]) self.gptj = transformers.AutoModelForCausalLM.from_pretrained( - 'hf-internal-testing/tiny-random-GPTJForCausalLM', + "hf-internal-testing/tiny-random-GPTJForCausalLM", torchscript=True, ) self.lm_input = torch.ones([1, 10], dtype=torch.long) def test_trace(self): - op_types = ['Linear'] + op_types = ["Linear"] tg = GraphTrace() # absorb_to_layer={'absorb_layer': absorbed_layer} absorb_to_layer, no_absorb_layers = tg.get_absorb_to_layer(self.model, self.example_inputs, op_types) @@ -60,16 +61,12 @@ def test_rtn(self): self.assertTrue(isinstance(model1.fc1, torch.nn.Linear)) weight_config = { # 'op_name': (bit, group_size, sheme) - 'fc1': { - 'bits': 8, - 'group_size': -1, - 'scheme': 'sym' - }, - 'fc2': { - 'bits': 4, - 'group_size': 32, - 'scheme': 'asym', - 'quantile': 0.95, # not required. + "fc1": {"bits": 8, "group_size": -1, "scheme": "sym"}, + "fc2": { + "bits": 4, + "group_size": 32, + "scheme": "asym", + "quantile": 0.95, # not required. }, } model2 = rtn_quantize(fp32_model, weight_config=weight_config) @@ -79,11 +76,16 @@ def test_rtn(self): def test_awq(self): example_inputs = torch.ones([1, 10], dtype=torch.long) from neural_compressor.adaptor.torch_utils.awq import ActAwareWeightQuant + model = transformers.AutoModelForCausalLM.from_pretrained( - 'facebook/opt-125m', torchscript=True,) + "facebook/opt-125m", + torchscript=True, + ) + class LLMCalibDataloader: def __init__(self): self.batch_size = 1 + def __iter__(self): for i in range(2): yield example_inputs @@ -98,10 +100,9 @@ def __iter__(self): def calib_func(model): for i in range(2): model(self.lm_input) + out1 = self.gptj(example_inputs) - awq = ActAwareWeightQuant( - self.gptj, calib_func=calib_func, example_inputs=self.lm_input, - bits=8, group_size=-1) + awq = ActAwareWeightQuant(self.gptj, calib_func=calib_func, example_inputs=self.lm_input, bits=8, group_size=-1) qdq_model = awq.quantize() out2 = qdq_model(example_inputs) self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-2)) @@ -117,13 +118,13 @@ class TestGPTQWeightOnlyQuant(unittest.TestCase): @classmethod def setUpClass(self): self.gptj = transformers.AutoModelForCausalLM.from_pretrained( - 'hf-internal-testing/tiny-random-GPTJForCausalLM', + "hf-internal-testing/tiny-random-GPTJForCausalLM", torchscript=True, ) self.gptj.seqlen = 512 def test_gptq(self): - class GPTQLLMDataLoader(): + class GPTQLLMDataLoader: def __init__(self): self.batch_size = 1 @@ -134,59 +135,62 @@ def __iter__(self): dataloader = GPTQLLMDataLoader() model = copy.deepcopy(self.gptj) weight_config = { - 'transformer.h.0.attn.k_proj':{ - 'wbits': 4, - 'group_size': 128, - 'sym': True, - 'percdamp': 0.01, - 'perchannel': False + "transformer.h.0.attn.k_proj": { + "wbits": 4, + "group_size": 128, + "sym": True, + "percdamp": 0.01, + "perchannel": False, }, - 'transformer.h.1.attn.k_proj':{ - 'wbits': 3, - 'group_size': -1, - 'sym': False, - 'percdamp': 0.01, - 'act_order': True, + "transformer.h.1.attn.k_proj": { + "wbits": 3, + "group_size": -1, + "sym": False, + "percdamp": 0.01, + "act_order": True, }, - 'transformer.h.2.attn.k_proj':{ - 'wbits': 3, - 'group_size': 32, - 'sym': False, - 'percdamp': 0.01, - 'mse': True, - 'act_order': False + "transformer.h.2.attn.k_proj": { + "wbits": 3, + "group_size": 32, + "sym": False, + "percdamp": 0.01, + "mse": True, + "act_order": False, }, - 'transformer.h.3.attn.k_proj':{ - 'wbits': 3, - 'group_size': 256, - 'sym': False, - 'percdamp': 0.01, - 'mse': True, - 'act_order': False + "transformer.h.3.attn.k_proj": { + "wbits": 3, + "group_size": 256, + "sym": False, + "percdamp": 0.01, + "mse": True, + "act_order": False, }, } - quantizer = gptq_quantize(model, weight_config=weight_config, dataloader=dataloader, ) + quantizer = gptq_quantize( + model, + weight_config=weight_config, + dataloader=dataloader, + ) self.assertTrue(isinstance(model, torch.nn.Module)) del model model = copy.deepcopy(self.gptj) - weight_config = { - "wbits": 4 - } + weight_config = {"wbits": 4} quantizer = gptq_quantize(model, weight_config=weight_config, dataloader=dataloader, use_max_length=False) self.assertTrue(isinstance(model, torch.nn.Module)) del model + class TestTEQWeightOnlyQuant(unittest.TestCase): @classmethod def setUpClass(self): self.gptj = transformers.AutoModelForCausalLM.from_pretrained( - 'hf-internal-testing/tiny-random-GPTJForCausalLM', + "hf-internal-testing/tiny-random-GPTJForCausalLM", torchscript=True, ) self.gptj.seqlen = 512 - def generate_random_corpus(self, nsamples = 32): + def generate_random_corpus(self, nsamples=32): meta_data = [] for _ in range(nsamples): inp = torch.ones([1, 512], dtype=torch.long) @@ -203,26 +207,21 @@ def test_teq(self): weight_config = { # 'op_name': (bit, group_size, sheme) - 'transformer.h.0.mlp.fc_in': { - 'bits': 8, - 'group_size': -1, - 'scheme': 'sym' - }, - 'transformer.h.0.mlp.fc_out': { - 'bits': 4, - 'group_size': 32, - 'scheme': 'asym' - }, - } - absorb_dict = { - 'transformer.h.0.mlp.fc_in': ['transformer.h.0.mlp.fc_out'] + "transformer.h.0.mlp.fc_in": {"bits": 8, "group_size": -1, "scheme": "sym"}, + "transformer.h.0.mlp.fc_out": {"bits": 4, "group_size": 32, "scheme": "asym"}, } - extra_config = {'folding': True} - - - model = teq_quantize(model, weight_config=weight_config, absorb_to_layer=absorb_dict, - extra_config=extra_config, dataloader=dataloader) + absorb_dict = {"transformer.h.0.mlp.fc_in": ["transformer.h.0.mlp.fc_out"]} + extra_config = {"folding": True} + + model = teq_quantize( + model, + weight_config=weight_config, + absorb_to_layer=absorb_dict, + extra_config=extra_config, + dataloader=dataloader, + ) self.assertTrue(isinstance(model, torch.nn.Module)) + if __name__ == "__main__": unittest.main() diff --git a/test/requirements.txt b/test/requirements.txt index 1d82ef96a71..7a045d19d88 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -1,19 +1,19 @@ +--find-links https://download.pytorch.org/whl/torch_stable.html +accelerate==0.21.0 +dynast==1.3.0 +horovod +intel-extension-for-pytorch intel-tensorflow>=2.12.0 +mxnet-mkl +neural-compressor onnx onnxruntime ---find-links https://download.pytorch.org/whl/torch_stable.html +onnxruntime-extensions; python_version < '3.11' +optimum +tensorflow-addons +tf2onnx +tf_slim torch torchvision -mxnet-mkl -neural-compressor -tf_slim transformers>=4.30.2 -accelerate==0.21.0 -horovod -tensorflow-addons -onnxruntime-extensions; python_version < '3.11' -dynast==1.3.0 -intel-extension-for-pytorch -tf2onnx xgboost -optimum diff --git a/test/scheduler/test_oneshot.py b/test/scheduler/test_oneshot.py index 50696fbb234..ac1e914bc20 100644 --- a/test/scheduler/test_oneshot.py +++ b/test/scheduler/test_oneshot.py @@ -1,21 +1,21 @@ -import os import copy +import os import shutil import unittest import torch -import torchvision import torch.nn as nn -import neural_compressor.adaptor.pytorch as nc_torch +import torchvision +from packaging.version import Version +import neural_compressor.adaptor.pytorch as nc_torch from neural_compressor.conf.config import DistillationConf, PruningConf from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.experimental.scheduler import Scheduler from neural_compressor.training import prepare_compression -from neural_compressor.utils.pytorch import load from neural_compressor.utils import logger -from packaging.version import Version +from neural_compressor.utils.pytorch import load PT_VERSION = nc_torch.get_torch_version() if PT_VERSION >= Version("1.8.0-rc1"): @@ -118,35 +118,35 @@ def build_fake_yaml(): - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) def build_fake_yaml2(): - with open('fake2.yaml', 'w', encoding="utf-8") as f: + with open("fake2.yaml", "w", encoding="utf-8") as f: f.write(fake2_yaml) def build_fake_yaml3(): - with open('fake3.yaml', 'w', encoding="utf-8") as f: + with open("fake3.yaml", "w", encoding="utf-8") as f: f.write(fake3_yaml) def build_fx_fake_yaml(): - fx_fake_yaml = fake_yaml.replace('pytorch', 'pytorch_fx') - with open('fx_fake.yaml', 'w', encoding="utf-8") as f: + fx_fake_yaml = fake_yaml.replace("pytorch", "pytorch_fx") + with open("fx_fake.yaml", "w", encoding="utf-8") as f: f.write(fx_fake_yaml) def build_fx_fake_yaml2(): - fx_fake2_yaml = fake2_yaml.replace('pytorch', 'pytorch_fx') - with open('fx_fake2.yaml', 'w', encoding="utf-8") as f: + fx_fake2_yaml = fake2_yaml.replace("pytorch", "pytorch_fx") + with open("fx_fake2.yaml", "w", encoding="utf-8") as f: f.write(fx_fake2_yaml) def build_fx_fake_yaml3(): - fx_fake3_yaml = fake3_yaml.replace('pytorch', 'pytorch_fx') - with open('fx_fake3.yaml', 'w', encoding="utf-8") as f: + fx_fake3_yaml = fake3_yaml.replace("pytorch", "pytorch_fx") + with open("fx_fake3.yaml", "w", encoding="utf-8") as f: f.write(fx_fake3_yaml) @@ -182,24 +182,25 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - os.remove('fake2.yaml') - os.remove('fake3.yaml') - os.remove('fx_fake.yaml') - os.remove('fx_fake2.yaml') - os.remove('fx_fake3.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - shutil.rmtree('nc_workspace', ignore_errors=True) + os.remove("fake.yaml") + os.remove("fake2.yaml") + os.remove("fake3.yaml") + os.remove("fx_fake.yaml") + os.remove("fx_fake2.yaml") + os.remove("fx_fake3.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + shutil.rmtree("nc_workspace", ignore_errors=True) def test_prune_qat_oneshot(self): from neural_compressor.experimental import Pruning, Quantization - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) q_model = copy.deepcopy(self.q_model) - prune = Pruning('./fake.yaml') - quantizer = Quantization('./fake2.yaml') + prune = Pruning("./fake.yaml") + quantizer = Quantization("./fake2.yaml") scheduler = Scheduler() scheduler.model = q_model combination = scheduler.combine(prune, quantizer) @@ -216,7 +217,7 @@ def train_func_for_nc(model): combination.on_epoch_begin(nepoch) for image, target in dummy_dataloader: combination.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -234,19 +235,17 @@ def train_func_for_nc(model): combination.train_dataloader = dummy_dataloader scheduler.append(combination) opt_model = scheduler() - opt_model.save('./saved') - logger.info(20 * '=' + 'test_prune_qat_oneshot' + 20 * '=') + opt_model.save("./saved") + logger.info(20 * "=" + "test_prune_qat_oneshot" + 20 * "=") try: conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() except: conv_weight = opt_model.model.layer1[0].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) - self.assertEqual(combination.__repr__().lower(), 'combination of pruning,quantization') + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) + self.assertEqual(combination.__repr__().lower(), "combination of pruning,quantization") # reloading int8 model - reloaded_model = load('./saved', copy.deepcopy(self.q_model)) + reloaded_model = load("./saved", copy.deepcopy(self.q_model)) try: reloaded_conv_weight = reloaded_model.layer1[0].conv1.weight().dequantize() except: @@ -255,13 +254,14 @@ def train_func_for_nc(model): def test_distillation_qat_oneshot(self): from neural_compressor.experimental import Distillation, Quantization - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = copy.deepcopy(self.model) q_model = copy.deepcopy(self.q_model) - distiller = Distillation('./fake3.yaml') - quantizer = Quantization('./fake2.yaml') + distiller = Distillation("./fake3.yaml") + quantizer = Quantization("./fake2.yaml") scheduler = Scheduler() distiller.teacher_model = model scheduler.model = q_model @@ -279,7 +279,7 @@ def train_func_for_nc(model): combination.on_epoch_begin(nepoch) for image, target in dummy_dataloader: combination.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -298,23 +298,24 @@ def train_func_for_nc(model): combination.train_dataloader = dummy_dataloader scheduler.append(combination) opt_model = scheduler() - opt_model.save('./saved') - logger.info(20 * '=' + 'test_distillation_qat_oneshot' + 20 * '=') + opt_model.save("./saved") + logger.info(20 * "=" + "test_distillation_qat_oneshot" + 20 * "=") - self.assertEqual(combination.__repr__().lower(), 'combination of distillation,quantization') + self.assertEqual(combination.__repr__().lower(), "combination of distillation,quantization") # reloading int8 model - reloaded_model = load('./saved', copy.deepcopy(self.q_model)) + reloaded_model = load("./saved", copy.deepcopy(self.q_model)) def test_prune_qat_distillation_oneshot(self): - from neural_compressor.experimental import Pruning, Quantization, Distillation - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + from neural_compressor.experimental import Distillation, Pruning, Quantization + + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = copy.deepcopy(self.model) q_model = copy.deepcopy(self.q_model) - prune = Pruning('./fake.yaml') - quantizer = Quantization('./fake2.yaml') - distiller = Distillation('./fake3.yaml') + prune = Pruning("./fake.yaml") + quantizer = Quantization("./fake2.yaml") + distiller = Distillation("./fake3.yaml") scheduler = Scheduler() distiller.teacher_model = model scheduler.model = q_model @@ -332,7 +333,7 @@ def train_func_for_nc(model): combination.on_epoch_begin(nepoch) for image, target in dummy_dataloader: combination.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -352,24 +353,23 @@ def train_func_for_nc(model): combination.train_dataloader = dummy_dataloader scheduler.append(combination) opt_model = scheduler() - logger.info(20 * '=' + 'test_prune_qat_distillation_oneshot' + 20 * '=') + logger.info(20 * "=" + "test_prune_qat_distillation_oneshot" + 20 * "=") try: conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() except: conv_weight = opt_model.model.layer1[0].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) - self.assertEqual(combination.__repr__().lower(), 'combination of pruning,quantization,distillation') + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) + self.assertEqual(combination.__repr__().lower(), "combination of pruning,quantization,distillation") def test_prune_qat_oneshot_fx(self): from neural_compressor.experimental import Pruning, Quantization - datasets = Datasets('pytorch_fx') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + + datasets = Datasets("pytorch_fx") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) - prune = Pruning('./fx_fake.yaml') - quantizer = Quantization('./fx_fake2.yaml') + prune = Pruning("./fx_fake.yaml") + quantizer = Quantization("./fx_fake2.yaml") scheduler = Scheduler() model = copy.deepcopy(self.model) scheduler.model = model @@ -388,7 +388,7 @@ def train_func_for_nc(model): combination.on_epoch_begin(nepoch) for image, target in dummy_dataloader: combination.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -407,28 +407,26 @@ def train_func_for_nc(model): combination.train_dataloader = dummy_dataloader scheduler.append(combination) opt_model = scheduler() - opt_model.save('./saved') - logger.info(20 * '=' + 'test_prune_qat_oneshot_fx' + 20 * '=') - conv_weight = opt_model.model.state_dict()['layer1.0.conv1.weight'] - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) - self.assertEqual(combination.__repr__().lower(), 'combination of pruning,quantization') + opt_model.save("./saved") + logger.info(20 * "=" + "test_prune_qat_oneshot_fx" + 20 * "=") + conv_weight = opt_model.model.state_dict()["layer1.0.conv1.weight"] + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) + self.assertEqual(combination.__repr__().lower(), "combination of pruning,quantization") # reloading int8 model - reloaded_model = load('./saved', copy.deepcopy(self.model), dataloader=dummy_dataloader) - reloaded_conv_weight = reloaded_model.state_dict()['layer1.0.conv1.weight'] + reloaded_model = load("./saved", copy.deepcopy(self.model), dataloader=dummy_dataloader) + reloaded_conv_weight = reloaded_model.state_dict()["layer1.0.conv1.weight"] self.assertTrue(torch.equal(reloaded_conv_weight, conv_weight)) - @unittest.skipIf(PT_VERSION < Version("1.9.0-rc1"), - "requires higher version of torch than 1.9.0") + @unittest.skipIf(PT_VERSION < Version("1.9.0-rc1"), "requires higher version of torch than 1.9.0") def test_distillation_qat_oneshot_fx(self): from neural_compressor.experimental import Distillation, Quantization - datasets = Datasets('pytorch_fx') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + + datasets = Datasets("pytorch_fx") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = DynamicControlModel() - distiller = Distillation('./fx_fake3.yaml') - quantizer = Quantization('./fx_fake2.yaml') + distiller = Distillation("./fx_fake3.yaml") + quantizer = Quantization("./fx_fake2.yaml") scheduler = Scheduler() distiller.teacher_model = copy.deepcopy(model) scheduler.model = model @@ -446,7 +444,7 @@ def train_func_for_nc(model): combination.on_epoch_begin(nepoch) for image, target in dummy_dataloader: combination.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -466,21 +464,22 @@ def train_func_for_nc(model): combination.train_dataloader = dummy_dataloader scheduler.append(combination) opt_model = scheduler() - opt_model.save('./saved') - logger.info(20 * '=' + 'test_distillation_qat_oneshot_fx' + 20 * '=') + opt_model.save("./saved") + logger.info(20 * "=" + "test_distillation_qat_oneshot_fx" + 20 * "=") - self.assertEqual(combination.__repr__().lower(), 'combination of distillation,quantization') + self.assertEqual(combination.__repr__().lower(), "combination of distillation,quantization") # reloading int8 model model = DynamicControlModel() - reloaded_model = load('./saved', model, dataloader=dummy_dataloader) + reloaded_model = load("./saved", model, dataloader=dummy_dataloader) def test_distillation_prune_oneshot_fx(self): from neural_compressor.experimental import Distillation, Pruning - datasets = Datasets('pytorch_fx') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + + datasets = Datasets("pytorch_fx") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) - distiller = Distillation('./fx_fake3.yaml') - pruner = Pruning('./fx_fake.yaml') + distiller = Distillation("./fx_fake3.yaml") + pruner = Pruning("./fx_fake.yaml") scheduler = Scheduler() model = copy.deepcopy(self.model) distiller.teacher_model = copy.deepcopy(model) @@ -499,7 +498,7 @@ def train_func_for_nc(model): combination.on_epoch_begin(nepoch) for image, target in dummy_dataloader: combination.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -519,28 +518,26 @@ def train_func_for_nc(model): combination.train_dataloader = dummy_dataloader scheduler.append(combination) opt_model = scheduler() - logger.info(20 * '=' + 'test_distillation_prune_oneshot_fx' + 20 * '=') + logger.info(20 * "=" + "test_distillation_prune_oneshot_fx" + 20 * "=") try: - conv_weight = dict(opt_model.model.layer1.named_modules())['0'].conv1.weight().dequantize() + conv_weight = dict(opt_model.model.layer1.named_modules())["0"].conv1.weight().dequantize() except: - conv_weight = dict(opt_model.model.layer1.named_modules())['0'].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) - self.assertEqual(combination.__repr__().lower(), 'combination of distillation,pruning') - - @unittest.skipIf(PT_VERSION < Version("1.9.0-rc1"), - "requires higher version of torch than 1.9.0") + conv_weight = dict(opt_model.model.layer1.named_modules())["0"].conv1.weight + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) + self.assertEqual(combination.__repr__().lower(), "combination of distillation,pruning") + + @unittest.skipIf(PT_VERSION < Version("1.9.0-rc1"), "requires higher version of torch than 1.9.0") def test_prune_qat_distillation_oneshot_fx(self): - from neural_compressor.experimental import Pruning, Quantization, Distillation - datasets = Datasets('pytorch_fx') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + from neural_compressor.experimental import Distillation, Pruning, Quantization + + datasets = Datasets("pytorch_fx") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) model = copy.deepcopy(self.model) - prune = Pruning('./fx_fake.yaml') - quantizer = Quantization('./fx_fake2.yaml') - distiller = Distillation('./fx_fake3.yaml') + prune = Pruning("./fx_fake.yaml") + quantizer = Quantization("./fx_fake2.yaml") + distiller = Distillation("./fx_fake3.yaml") scheduler = Scheduler() distiller.teacher_model = copy.deepcopy(model) scheduler.model = model @@ -558,7 +555,7 @@ def train_func_for_nc(model): combination.on_epoch_begin(nepoch) for image, target in dummy_dataloader: combination.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -578,19 +575,14 @@ def train_func_for_nc(model): combination.train_dataloader = dummy_dataloader scheduler.append(combination) opt_model = scheduler() - logger.info(20 * '=' + 'test_prune_qat_distillation_oneshot_fx' + 20 * '=') + logger.info(20 * "=" + "test_prune_qat_distillation_oneshot_fx" + 20 * "=") try: - conv_weight = \ - dict(opt_model.model.layer1.named_modules())['0'].conv1.weight().dequantize() + conv_weight = dict(opt_model.model.layer1.named_modules())["0"].conv1.weight().dequantize() except: - conv_weight = dict(opt_model.model.layer1.named_modules())['0'].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) - self.assertEqual( - combination.__repr__().lower(), 'combination of pruning,quantization,distillation' - ) + conv_weight = dict(opt_model.model.layer1.named_modules())["0"].conv1.weight + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) + self.assertEqual(combination.__repr__().lower(), "combination of pruning,quantization,distillation") if __name__ == "__main__": diff --git a/test/scheduler/test_orchestration.py b/test/scheduler/test_orchestration.py index a636d0d81b2..60ecef37c5d 100644 --- a/test/scheduler/test_orchestration.py +++ b/test/scheduler/test_orchestration.py @@ -1,13 +1,18 @@ -import os import copy +import os import shutil import unittest import torch -import torchvision import torch.nn as nn -from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig, \ - QuantizationAwareTrainingConfig, WeightPruningConfig +import torchvision + +from neural_compressor.config import ( + DistillationConfig, + KnowledgeDistillationLossConfig, + QuantizationAwareTrainingConfig, + WeightPruningConfig, +) from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.training import prepare_compression @@ -17,36 +22,32 @@ class TestPruning(unittest.TestCase): model = torchvision.models.resnet18() def test_distillation_prune_qat_oneshot_with_new_API(self): - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model.to(device) model = copy.deepcopy(self.model) - distillation_criterion = KnowledgeDistillationLossConfig(loss_types=['CE', 'KL']) + distillation_criterion = KnowledgeDistillationLossConfig(loss_types=["CE", "KL"]) d_conf = DistillationConfig(copy.deepcopy(self.model), distillation_criterion) - p_conf = WeightPruningConfig( - [{'start_step': 0, 'end_step': 2}], target_sparsity=0.64, pruning_scope="local") + p_conf = WeightPruningConfig([{"start_step": 0, "end_step": 2}], target_sparsity=0.64, pruning_scope="local") q_conf = QuantizationAwareTrainingConfig() compression_manager = prepare_compression(model=model, confs=[d_conf, p_conf, q_conf]) compression_manager.callbacks.on_train_begin() model = compression_manager.model + def train_func_for_nc(model): epochs = 3 iters = 3 criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(model.parameters(), - lr=0.001, - momentum=0.1, - nesterov=True, - weight_decay=0.001) + optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.1, nesterov=True, weight_decay=0.001) for nepoch in range(epochs): model.train() cnt = 0 compression_manager.callbacks.on_epoch_begin(nepoch) for image, target in dummy_dataloader: compression_manager.callbacks.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 image = image.to(device) target = target.to(device) @@ -66,20 +67,18 @@ def train_func_for_nc(model): return model train_func_for_nc(model) - print(20 * '=' + 'test_distillation_prune_qat_oneshot' + 20 * '=') + print(20 * "=" + "test_distillation_prune_qat_oneshot" + 20 * "=") try: conv_weight = dict(model.model.layer1.named_modules())["0.conv1"].weight().dequantize() except: conv_weight = dict(model.model.layer1.named_modules())["0.conv1"].weight() - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) self.assertTrue("quantized" in str(type(dict(model.model.layer1.named_modules())["0.conv1"]))) self.assertEqual( str(compression_manager.callbacks.callbacks_list), - "[Distillation Callbacks, Pruning Callbacks, Quantization Aware Training Callbacks]" + "[Distillation Callbacks, Pruning Callbacks, Quantization Aware Training Callbacks]", ) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/test/scheduler/test_scheduler.py b/test/scheduler/test_scheduler.py index 8d0c238deeb..4dc2b04ed5b 100644 --- a/test/scheduler/test_scheduler.py +++ b/test/scheduler/test_scheduler.py @@ -3,17 +3,18 @@ import unittest import torch -import torchvision import torch.nn as nn -import neural_compressor.adaptor.pytorch as nc_torch +import torchvision +from packaging.version import Version +import neural_compressor.adaptor.pytorch as nc_torch from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.experimental.scheduler import Scheduler -from packaging.version import Version PT_VERSION = nc_torch.get_torch_version() + def build_fake_yaml(): fake_yaml = """ model: @@ -44,9 +45,10 @@ def build_fake_yaml(): metric: topk: 1 """ - with open('fake.yaml', 'w', encoding="utf-8") as f: + with open("fake.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_fake_yaml2(): fake_yaml = """ model: @@ -105,9 +107,10 @@ def build_fake_yaml2(): shape: [16, 3, 224, 224] label: True """ - with open('fake2.yaml', 'w', encoding="utf-8") as f: + with open("fake2.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_fake_yaml3(): fake_yaml = """ model: @@ -146,9 +149,10 @@ def build_fake_yaml3(): timeout: 0 random_seed: 9527 """ - with open('fake3.yaml', 'w', encoding="utf-8") as f: + with open("fake3.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_fake_yaml4(): fake_yaml = """ model: @@ -207,9 +211,10 @@ def build_fake_yaml4(): shape: [16, 3, 224, 224] label: True """ - with open('fake4.yaml', 'w', encoding="utf-8") as f: + with open("fake4.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_fake_yaml5(): fake_yaml = """ model: @@ -248,9 +253,10 @@ def build_fake_yaml5(): timeout: 0 random_seed: 9527 """ - with open('fake5.yaml', 'w', encoding="utf-8") as f: + with open("fake5.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def build_fake_yaml6(): fake_yaml = """ model: @@ -291,11 +297,11 @@ def build_fake_yaml6(): shape: [16, 3, 224, 224] label: True """ - with open('fake6.yaml', 'w', encoding="utf-8") as f: + with open("fake6.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) -class TestPruning(unittest.TestCase): +class TestPruning(unittest.TestCase): model = torchvision.models.resnet18() q_model = torchvision.models.quantization.resnet18() q_model_teacher = torchvision.models.quantization.resnet50() @@ -311,23 +317,24 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake.yaml') - os.remove('fake2.yaml') - os.remove('fake3.yaml') - os.remove('fake4.yaml') - os.remove('fake5.yaml') - os.remove('fake6.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - shutil.rmtree('nc_workspace', ignore_errors=True) + os.remove("fake.yaml") + os.remove("fake2.yaml") + os.remove("fake3.yaml") + os.remove("fake4.yaml") + os.remove("fake5.yaml") + os.remove("fake6.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + shutil.rmtree("nc_workspace", ignore_errors=True) def test_pruning(self): from neural_compressor.experimental import Pruning, common - prune = Pruning('fake.yaml') + + prune = Pruning("fake.yaml") scheduler = Scheduler() scheduler.model = self.model - datasets = Datasets('pytorch') - dummy_dataset = datasets['dummy'](shape=(16, 3, 224, 224), low=0., high=1., label=True) + datasets = Datasets("pytorch") + dummy_dataset = datasets["dummy"](shape=(16, 3, 224, 224), low=0.0, high=1.0, label=True) dummy_dataloader = PyTorchDataLoader(dummy_dataset) def training_func_for_nc(model): @@ -341,7 +348,7 @@ def training_func_for_nc(model): prune.on_epoch_begin(nepoch) for image, target in dummy_dataloader: prune.on_step_begin(cnt) - print('.', end='') + print(".", end="") cnt += 1 output = model(image) loss = criterion(output, target) @@ -361,26 +368,26 @@ def training_func_for_nc(model): def test_pure_yaml_pruning(self): from neural_compressor.experimental import Pruning, common - prune = Pruning('fake2.yaml') + + prune = Pruning("fake2.yaml") scheduler = Scheduler() scheduler.model = self.model scheduler.append(prune) opt_model = scheduler.fit() opt_model.report_sparsity() try: - conv_weight = opt_model.model.layer1[0].conv1.weight.dequantize() + conv_weight = opt_model.model.layer1[0].conv1.weight.dequantize() except: - conv_weight = opt_model.model.layer1[0].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) + conv_weight = opt_model.model.layer1[0].conv1.weight + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) def test_scheduler_qat_distillation(self): - from neural_compressor.experimental import Quantization, common, Distillation + from neural_compressor.experimental import Distillation, Quantization, common + self.q_model = torchvision.models.quantization.resnet18() self.q_model.fuse_model() - quantizer = Quantization('./fake3.yaml') - distiller = Distillation('./fake6.yaml') + quantizer = Quantization("./fake3.yaml") + distiller = Distillation("./fake6.yaml") scheduler = Scheduler() scheduler.model = self.q_model distiller.teacher_model = self.q_model_teacher @@ -389,20 +396,18 @@ def test_scheduler_qat_distillation(self): opt_model = scheduler.fit() opt_model.report_sparsity() try: - conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() + conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() except: - conv_weight = opt_model.model.layer1[0].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.01, - delta=0.01) - + conv_weight = opt_model.model.layer1[0].conv1.weight + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.01, delta=0.01) def test_combine_qat_pruning(self): - from neural_compressor.experimental import Pruning, common, Quantization + from neural_compressor.experimental import Pruning, Quantization, common + self.q_model = torchvision.models.quantization.resnet18() self.q_model.fuse_model() - quantizer = Quantization('./fake3.yaml') - prune = Pruning('./fake2.yaml') + quantizer = Quantization("./fake3.yaml") + prune = Pruning("./fake2.yaml") scheduler = Scheduler() scheduler.model = self.q_model combination = scheduler.combine(prune, quantizer) @@ -410,19 +415,18 @@ def test_combine_qat_pruning(self): opt_model = scheduler.fit() opt_model.report_sparsity() try: - conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() + conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() except: - conv_weight = opt_model.model.layer1[0].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) - self.assertEqual(combination.__repr__().lower(), 'combination of pruning,quantization') + conv_weight = opt_model.model.layer1[0].conv1.weight + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) + self.assertEqual(combination.__repr__().lower(), "combination of pruning,quantization") def test_combine_qat_distillation(self): - from neural_compressor.experimental import Quantization, common, Distillation + from neural_compressor.experimental import Distillation, Quantization, common + self.q_model.fuse_model() - quantizer = Quantization('./fake3.yaml') - distiller = Distillation('./fake6.yaml') + quantizer = Quantization("./fake3.yaml") + distiller = Distillation("./fake6.yaml") scheduler = Scheduler() scheduler.model = self.q_model distiller.teacher_model = self.q_model_teacher @@ -431,20 +435,21 @@ def test_combine_qat_distillation(self): opt_model = scheduler.fit() opt_model.report_sparsity() try: - conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() + conv_weight = opt_model.model.layer1[0].conv1.weight().dequantize() except: - conv_weight = opt_model.model.layer1[0].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.01, - delta=0.01) - self.assertEqual(combination.__repr__().lower(), 'combination of distillation,quantization') - - @unittest.skipIf(PT_VERSION < Version("1.9.0-rc1"), - "Please use PyTroch 1.9 or higher version for Quantization & Pruning with pytorch_fx backend") + conv_weight = opt_model.model.layer1[0].conv1.weight + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.01, delta=0.01) + self.assertEqual(combination.__repr__().lower(), "combination of distillation,quantization") + + @unittest.skipIf( + PT_VERSION < Version("1.9.0-rc1"), + "Please use PyTroch 1.9 or higher version for Quantization & Pruning with pytorch_fx backend", + ) def test_combine_fx(self): - from neural_compressor.experimental import Pruning, common, Quantization - quantizer = Quantization('./fake5.yaml') - prune = Pruning('./fake4.yaml') + from neural_compressor.experimental import Pruning, Quantization, common + + quantizer = Quantization("./fake5.yaml") + prune = Pruning("./fake4.yaml") scheduler = Scheduler() scheduler.model = self.model combination = scheduler.combine(prune, quantizer) @@ -452,13 +457,12 @@ def test_combine_fx(self): opt_model = scheduler.fit() opt_model.report_sparsity() try: - conv_weight = dict(opt_model.model.layer1.named_modules())['0'].conv1.weight().dequantize() + conv_weight = dict(opt_model.model.layer1.named_modules())["0"].conv1.weight().dequantize() except: - conv_weight = dict(opt_model.model.layer1.named_modules())['0'].conv1.weight - self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), - 0.64, - delta=0.05) - self.assertEqual(combination.__repr__().lower(), 'combination of pruning,quantization') + conv_weight = dict(opt_model.model.layer1.named_modules())["0"].conv1.weight + self.assertAlmostEqual((conv_weight == 0).sum().item() / conv_weight.numel(), 0.64, delta=0.05) + self.assertEqual(combination.__repr__().lower(), "combination of pruning,quantization") + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_basic.py b/test/strategy/test_basic.py index 335cc86ad8e..aa5b75a08a7 100644 --- a/test/strategy/test_basic.py +++ b/test/strategy/test_basic.py @@ -1,51 +1,62 @@ -"""Tests for quantization""" -import numpy as np -import unittest -import shutil +"""Tests for quantization.""" import os +import shutil +import unittest + +import numpy as np from neural_compressor.config import options def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d( + input=op, + filters=z, + strides=[1, 1, 1, 1], + padding="VALID", + ) + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID") + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph -class TestBasicTuningStrategy(unittest.TestCase): +class TestBasicTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -53,67 +64,71 @@ def setUpClass(self): @classmethod def tearDownClass(self): - shutil.rmtree('saved', ignore_errors=True) + shutil.rmtree("saved", ignore_errors=True) shutil.rmtree(self.workspace) - + def test_run_basic_one_trial_new_api(self): - from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS - + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS["tensorflow"](dataset) - + def fake_eval(model): return 1 - + # tuning and accuracy criterion conf = PostTrainingQuantConfig() q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertIsNotNone(q_model) - def test_diagnosis(self): - from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS - + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion conf = PostTrainingQuantConfig(diagnosis=True) - q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader,\ - eval_func=lambda model: 1) - self.assertEqual(os.path.exists(os.path.join(self.workspace, 'inspect_saved/fp32/inspect_result.pkl')), True) - self.assertEqual(os.path.exists(os.path.join(self.workspace, 'inspect_saved/quan/inspect_result.pkl')), True) - - + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=lambda model: 1) + self.assertEqual(os.path.exists(os.path.join(self.workspace, "inspect_saved/fp32/inspect_result.pkl")), True) + self.assertEqual(os.path.exists(os.path.join(self.workspace, "inspect_saved/quan/inspect_result.pkl")), True) def test_run_create_eval_from_metric_and_dataloader(self): - from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS - + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS["tensorflow"](dataset) from neural_compressor.metric import METRICS - metrics = METRICS('tensorflow') - top1 = metrics['topk']() - + + metrics = METRICS("tensorflow") + top1 = metrics["topk"]() + # tuning and accuracy criterion conf = PostTrainingQuantConfig() - q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader,\ - eval_dataloader=dataloader, eval_metric=top1) + q_model = fit( + model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_metric=top1, + ) def test_no_tuning(self): import torchvision - from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + conf = PostTrainingQuantConfig() conf.performance_only = True # test performance_only without eval_func @@ -122,29 +137,30 @@ def test_no_tuning(self): dataloader = DATALOADERS["pytorch"](dataset) # model model = torchvision.models.resnet18() - #tuning and accuracy criterion + # tuning and accuracy criterion conf = PostTrainingQuantConfig(quant_level=1) # fit q_model = fit(model=model, conf=conf, calib_dataloader=dataloader) self.assertIsNotNone(q_model) - def test_block_wise_tuining_stock_pt(self): - from neural_compressor.quantization import fit + from transformers import BertModel, BertTokenizer + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.quantization import fit - from transformers import BertTokenizer, BertModel for backend in ["default"]: model_name = "bert-base-uncased" model = BertModel.from_pretrained(model_name) model.eval() + # dataset and dataloader class DummyNLPDataloader(object): def __init__(self, model_name): self.tokenizer = BertTokenizer.from_pretrained(model_name) self.sequence_a = "intel-extension-for-transformers is based in SH" self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH" - self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors='pt') + self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors="pt") self.batch_size = 1 def __iter__(self): @@ -156,8 +172,9 @@ def __next__(self): dataloader = DummyNLPDataloader(model_name) # tuning and accuracy criterion conf = PostTrainingQuantConfig(backend=backend) - q_model = fit(model=model, conf=conf, calib_dataloader= dataloader, eval_func=lambda model : 1) + q_model = fit(model=model, conf=conf, calib_dataloader=dataloader, eval_func=lambda model: 1) assert q_model is not None + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_basic_1.x.py b/test/strategy/test_basic_1.x.py index 8cbb002894c..f38c1f986ac 100644 --- a/test/strategy/test_basic_1.x.py +++ b/test/strategy/test_basic_1.x.py @@ -1,12 +1,14 @@ -"""Tests for quantization""" -import numpy as np -import unittest -import shutil +"""Tests for quantization.""" import os +import shutil +import unittest + +import numpy as np import yaml + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -24,14 +26,15 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml_recipe(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -54,14 +57,15 @@ def build_fake_yaml_recipe(): absolute: -1 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml_recipe.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml_recipe.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -81,14 +85,15 @@ def build_fake_yaml2(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml3(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -111,14 +116,15 @@ def build_fake_yaml3(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml3.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml3.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml4(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -142,51 +148,62 @@ def build_fake_yaml4(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml4.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml4.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d( + input=op, + filters=z, + strides=[1, 1, 1, 1], + padding="VALID", + ) + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID") + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph -class TestBasicTuningStrategy(unittest.TestCase): +class TestBasicTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -198,42 +215,42 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - os.remove('fake_yaml3.yaml') - os.remove('fake_yaml4.yaml') - os.remove('fake_yaml_recipe.yaml') - shutil.rmtree('saved', ignore_errors=True) + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") + os.remove("fake_yaml3.yaml") + os.remove("fake_yaml4.yaml") + os.remove("fake_yaml_recipe.yaml") + shutil.rmtree("saved", ignore_errors=True) def test_run_basic_one_trial(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() # resume tuning history - quantizer.conf.usr_cfg.tuning.workspace.resume = 'saved/history.snapshot' + quantizer.conf.usr_cfg.tuning.workspace.resume = "saved/history.snapshot" quantizer.fit() def test_run_basic_max_trials(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() - + def test_run_basic_recipe(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml_recipe.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml_recipe.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -242,8 +259,8 @@ def test_run_basic_recipe(self): def test_run_basic_max_trials_multimetric(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml3.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml3.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -252,12 +269,13 @@ def test_run_basic_max_trials_multimetric(self): def test_run_basic_max_trials_multimetric_weight(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml4.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml4.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_bayesian.py b/test/strategy/test_bayesian.py index 2dee3595dd5..44c2269e048 100644 --- a/test/strategy/test_bayesian.py +++ b/test/strategy/test_bayesian.py @@ -1,191 +1,218 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import shutil +import unittest + +import numpy as np + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def create_test_graph(): - from tensorflow.core.framework import attr_value_pb2 - from tensorflow.core.framework import graph_pb2 - from tensorflow.core.framework import node_def_pb2 - from tensorflow.python.framework import tensor_util - from tensorflow.python.framework import dtypes + from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 + from tensorflow.python.framework import dtypes, tensor_util + input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node.input.extend([bias_add_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" - conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv2_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape + ) + ) + ) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) - conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) - bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value2, bias_value2.dtype.type, bias_value2.shape))) + bias_node2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value2, bias_value2.dtype.type, bias_value2.shape) + ) + ) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" - bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) - bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node2.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" - relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" - conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv3_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape + ) + ) + ) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" - conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) - conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv3_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv3_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) identity_node = node_def_pb2.NodeDef() identity_node.name = "final" identity_node.op = "Identity" - identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + identity_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) identity_node.input.extend([conv3_node.name]) test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - relu_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - relu_node2, - conv3_weight_node, - conv3_node, - identity_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node, + ] + ) return test_graph + def objective_func(params): - return params['x1']**2 + params['x2'] + return params["x1"] ** 2 + params["x2"] -class TestBayesianStrategy(unittest.TestCase): +class TestBayesianStrategy(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -196,77 +223,74 @@ def tearDownClass(self): shutil.rmtree("saved", ignore_errors=True) def test_run_bayesian_one_trial(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='bayesian', max_trials=1) + tune_cri = TuningCriterion(strategy="bayesian", max_trials=1) acc_cri = AccuracyCriterion(tolerable_loss=0.01) conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + def fake_eval(model): return 1 - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=fake_eval) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) def test_run_bayesian_max_trials(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='bayesian', max_trials=3) + tune_cri = TuningCriterion(strategy="bayesian", max_trials=3) acc_cri = AccuracyCriterion(tolerable_loss=0.01) op_name_dict = { "conv1": { - "activation": {"dtype": ["fp32"]}, - }, - } + "activation": {"dtype": ["fp32"]}, + }, + } acc = [0, 1, 0.9, 1] + def fake_eval(model): acc.pop(0) return acc[0] - conf = PostTrainingQuantConfig(quant_level=1, op_name_dict = op_name_dict,\ - tuning_criterion=tune_cri, accuracy_criterion=acc_cri) - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=fake_eval) + conf = PostTrainingQuantConfig( + quant_level=1, op_name_dict=op_name_dict, tuning_criterion=tune_cri, accuracy_criterion=acc_cri + ) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) - def test_bayesian_opt_class(self): from neural_compressor.strategy.bayesian import BayesianOptimization + pbounds = {} - pbounds['x1'] = (0, 1) - pbounds['x2'] = (0, 1) + pbounds["x1"] = (0, 1) + pbounds["x2"] = (0, 1) np.random.seed(9527) - bayes_opt = BayesianOptimization(pbounds=pbounds, - random_seed=9527) + bayes_opt = BayesianOptimization(pbounds=pbounds, random_seed=9527) for i in range(10): params = bayes_opt.gen_next_params() try: bayes_opt._space.register(params, objective_func(params)) except KeyError: pass - self.assertTrue(bayes_opt._space.max()['target'] == 2.0) + self.assertTrue(bayes_opt._space.max()["target"] == 2.0) self.assertTrue(len(bayes_opt._space.res()) == 8) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_bayesian_1.x.py b/test/strategy/test_bayesian_1.x.py index 4864b840763..c0bd52b8105 100644 --- a/test/strategy/test_bayesian_1.x.py +++ b/test/strategy/test_bayesian_1.x.py @@ -1,12 +1,14 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os import shutil +import unittest + +import numpy as np import yaml + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -29,14 +31,15 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -64,194 +67,220 @@ def build_fake_yaml2(): relative: 0.01 workspace: path: saved - ''' - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: + """ + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) f.close() + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def create_test_graph(): - from tensorflow.core.framework import attr_value_pb2 - from tensorflow.core.framework import graph_pb2 - from tensorflow.core.framework import node_def_pb2 - from tensorflow.python.framework import tensor_util - from tensorflow.python.framework import dtypes + from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 + from tensorflow.python.framework import dtypes, tensor_util + input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node.input.extend([bias_add_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" - conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv2_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape + ) + ) + ) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) - conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) - bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value2, bias_value2.dtype.type, bias_value2.shape))) + bias_node2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value2, bias_value2.dtype.type, bias_value2.shape) + ) + ) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" - bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) - bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node2.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" - relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" - conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv3_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape + ) + ) + ) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" - conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) - conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv3_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv3_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) identity_node = node_def_pb2.NodeDef() identity_node.name = "final" identity_node.op = "Identity" - identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + identity_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) identity_node.input.extend([conv3_node.name]) test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - relu_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - relu_node2, - conv3_weight_node, - conv3_node, - identity_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node, + ] + ) return test_graph + def objective_func(params): - return params['x1']**2 + params['x2'] + return params["x1"] ** 2 + params["x2"] -class TestQuantization(unittest.TestCase): +class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -261,16 +290,16 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") shutil.rmtree("saved", ignore_errors=True) def test_run_bayesian_one_trial(self): - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -278,10 +307,10 @@ def test_run_bayesian_one_trial(self): self.assertNotEqual(output_graph, None) def test_run_bayesian_max_trials(self): - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) + + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", shape=(1, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.test_graph @@ -290,20 +319,21 @@ def test_run_bayesian_max_trials(self): def test_bayesian_opt_class(self): from neural_compressor.experimental.strategy.bayesian import BayesianOptimization + pbounds = {} - pbounds['x1'] = (0, 1) - pbounds['x2'] = (0, 1) + pbounds["x1"] = (0, 1) + pbounds["x2"] = (0, 1) np.random.seed(9527) - bayes_opt = BayesianOptimization(pbounds=pbounds, - random_seed=9527) + bayes_opt = BayesianOptimization(pbounds=pbounds, random_seed=9527) for i in range(10): params = bayes_opt.gen_next_params() try: bayes_opt._space.register(params, objective_func(params)) except KeyError: pass - self.assertTrue(bayes_opt._space.max()['target'] == 2.0) + self.assertTrue(bayes_opt._space.max()["target"] == 2.0) self.assertTrue(len(bayes_opt._space.res()) == 8) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_distributed_tuning.py b/test/strategy/test_distributed_tuning.py index 9dca860b9eb..1f1ce1828d8 100644 --- a/test/strategy/test_distributed_tuning.py +++ b/test/strategy/test_distributed_tuning.py @@ -1,21 +1,24 @@ -"""Tests for distributed tuning strategy""" +"""Tests for distributed tuning strategy.""" +import importlib import os -import sys -import cpuinfo -import signal +import re import shutil +import signal import subprocess +import sys import unittest -import re + +import cpuinfo from neural_compressor.utils import logger -import importlib + if importlib.util.find_spec("mpi4py") is None: CONDITION = True else: # from mpi4py import MPI CONDITION = False + def build_fake_ut(): fake_ut = """ import shutil @@ -289,9 +292,10 @@ def test_pt_met_wait_before_met(self): unittest.main() """ - with open('fake_ut.py', 'w', encoding="utf-8") as f: + with open("fake_ut.py", "w", encoding="utf-8") as f: f.write(fake_ut) + class TestDistributedTuning(unittest.TestCase): @classmethod def setUpClass(cls): @@ -299,9 +303,9 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake_ut.py') - shutil.rmtree('./saved', ignore_errors = True) - shutil.rmtree('runs', ignore_errors = True) + os.remove("fake_ut.py") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) def setUp(self): logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -313,33 +317,34 @@ def tearDown(self): @unittest.skipIf(CONDITION, "missing the mpi4py package") def test_distributed_tuning(self): distributed_cmds = [ - 'mpirun -np 3 python fake_ut.py TestDistributedTuning.test_mpi4py_installation', \ - 'mpirun -np 3 python fake_ut.py TestDistributedTuning.test_pt_stage_1_met', \ - 'mpirun -np 3 python fake_ut.py TestDistributedTuning.test_pt_stage_3_fp32_met', \ - 'mpirun -np 3 python fake_ut.py TestDistributedTuning.test_pt_stage_4_fp32_met', \ - 'mpirun -np 3 python fake_ut.py TestDistributedTuning.test_pt_stage_not_met', \ - 'mpirun -np 18 python fake_ut.py TestDistributedTuning.test_pt_num_of_nodes_more_than_len_of_tune_cfg_lst_met', + "mpirun -np 3 python fake_ut.py TestDistributedTuning.test_mpi4py_installation", + "mpirun -np 3 python fake_ut.py TestDistributedTuning.test_pt_stage_1_met", + "mpirun -np 3 python fake_ut.py TestDistributedTuning.test_pt_stage_3_fp32_met", + "mpirun -np 3 python fake_ut.py TestDistributedTuning.test_pt_stage_4_fp32_met", + "mpirun -np 3 python fake_ut.py TestDistributedTuning.test_pt_stage_not_met", + "mpirun -np 18 python fake_ut.py TestDistributedTuning.test_pt_num_of_nodes_more_than_len_of_tune_cfg_lst_met", ] for i, distributed_cmd in enumerate(distributed_cmds): - p = subprocess.Popen(distributed_cmd, preexec_fn = os.setsid, stdout = subprocess.PIPE, - stderr = subprocess.PIPE, shell=True) # nosec + p = subprocess.Popen( + distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) # nosec try: out, error = p.communicate() logger.info(f"Test command: {distributed_cmd}") - logger.info(out.decode('utf-8')) - logger.info(error.decode('utf-8')) - matches = re.findall(r'FAILED', error.decode('utf-8')) + logger.info(out.decode("utf-8")) + logger.info(error.decode("utf-8")) + matches = re.findall(r"FAILED", error.decode("utf-8")) self.assertEqual(matches, []) - matches = re.findall(r'OK', error.decode('utf-8')) + matches = re.findall(r"OK", error.decode("utf-8")) if i == len(distributed_cmds) - 1: self.assertTrue(len(matches) == 18) elif i == 0: - rank_match = re.findall("rank (\d+) of", error.decode('utf-8')) - size_match = re.findall("of (\d+) processes", error.decode('utf-8')) - self.assertEqual(sorted(rank_match), ['0', '1', '2']) - self.assertEqual(size_match, ['3'] * 3) + rank_match = re.findall("rank (\d+) of", error.decode("utf-8")) + size_match = re.findall("of (\d+) processes", error.decode("utf-8")) + self.assertEqual(sorted(rank_match), ["0", "1", "2"]) + self.assertEqual(size_match, ["3"] * 3) else: self.assertTrue(len(matches) == 3) diff --git a/test/strategy/test_exhaustive.py b/test/strategy/test_exhaustive.py index bf1fd5457c3..e8afee504ef 100644 --- a/test/strategy/test_exhaustive.py +++ b/test/strategy/test_exhaustive.py @@ -1,42 +1,47 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import shutil +import unittest + +import numpy as np + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph -class TestExhaustiveStrategy(unittest.TestCase): +class TestExhaustiveStrategy(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -46,52 +51,49 @@ def tearDownClass(self): shutil.rmtree("saved", ignore_errors=True) def test_ru_exhaustive_one_trial(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='exhaustive', max_trials=1) + tune_cri = TuningCriterion(strategy="exhaustive", max_trials=1) acc_cri = AccuracyCriterion(tolerable_loss=0.01) conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + def fake_eval(model): return 1 - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=fake_eval) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) def test_ru_exhaustive_max_trials(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='exhaustive', max_trials=3) + tune_cri = TuningCriterion(strategy="exhaustive", max_trials=3) acc_cri = AccuracyCriterion(tolerable_loss=0.01) acc = [0, 1, 0.9, 0.9, 1] + def fake_eval(model): acc.pop(0) return acc[0] conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=fake_eval) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_exhaustive_1.x.py b/test/strategy/test_exhaustive_1.x.py index 40e3160be4b..d05e36ab2fc 100644 --- a/test/strategy/test_exhaustive_1.x.py +++ b/test/strategy/test_exhaustive_1.x.py @@ -1,12 +1,14 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os import shutil +import unittest + +import numpy as np import yaml + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -24,14 +26,15 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -51,46 +54,50 @@ def build_fake_yaml2(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph -class TestQuantization(unittest.TestCase): +class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -99,16 +106,16 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") shutil.rmtree("saved", ignore_errors=True) def test_ru_exhaustive_one_trial(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -117,12 +124,13 @@ def test_ru_exhaustive_one_trial(self): def test_ru_exhaustive_max_trials(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_hawq_v2_2.x.py b/test/strategy/test_hawq_v2_2.x.py index 5b02c0e7fbd..b19c69c89ad 100644 --- a/test/strategy/test_hawq_v2_2.x.py +++ b/test/strategy/test_hawq_v2_2.x.py @@ -1,4 +1,4 @@ -"""Tests for HAWQ v2 strategy""" +"""Tests for HAWQ v2 strategy.""" import copy import shutil @@ -6,35 +6,38 @@ from neural_compressor.utils import logger + # loss function for hawq-v2 def hawq_v2_loss(output, target): import torch + return torch.nn.CrossEntropyLoss()(output, target) -class TestHAWQV2TuningStrategy(unittest.TestCase): +class TestHAWQV2TuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): import torchvision + self.model = torchvision.models.resnet18() @classmethod def tearDownClass(self): - shutil.rmtree('saved', ignore_errors=True) - shutil.rmtree('nc_workspace', ignore_errors=True) - + shutil.rmtree("saved", ignore_errors=True) + shutil.rmtree("nc_workspace", ignore_errors=True) def test_hawq_v2_pipeline(self): logger.info("*** Test: HAWQ v2 with pytorch model.") - from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import Datasets, DATALOADERS + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit # model model = copy.deepcopy(self.model) # fake evaluation function self.test_hawq_v2_pipeline_fake_acc = 0 + def _fake_eval(model): self.test_hawq_v2_pipeline_fake_acc -= 1 return self.test_hawq_v2_pipeline_fake_acc @@ -42,21 +45,18 @@ def _fake_eval(model): # dataset and dataloader dataset = Datasets("pytorch")["dummy"](((1, 3, 224, 224))) dataloader = DATALOADERS["pytorch"](dataset) - - #tuning and accuracy criterion - strategy_kwargs = {'hawq_v2_loss': hawq_v2_loss} - tuning_criterion = TuningCriterion(strategy='hawq_v2', strategy_kwargs=strategy_kwargs, max_trials=5) - conf = PostTrainingQuantConfig(approach="static", - quant_level=1, - tuning_criterion=tuning_criterion) + + # tuning and accuracy criterion + strategy_kwargs = {"hawq_v2_loss": hawq_v2_loss} + tuning_criterion = TuningCriterion(strategy="hawq_v2", strategy_kwargs=strategy_kwargs, max_trials=5) + conf = PostTrainingQuantConfig(approach="static", quant_level=1, tuning_criterion=tuning_criterion) # fit - q_model = fit(model=model, - conf=conf, - calib_dataloader=dataloader, - eval_dataloader=dataloader, - eval_func=_fake_eval) + q_model = fit( + model=model, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, eval_func=_fake_eval + ) self.assertIsNone(q_model) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_lower_bit_sampler.py b/test/strategy/test_lower_bit_sampler.py index eb0984d31e2..b9fcfd42077 100644 --- a/test/strategy/test_lower_bit_sampler.py +++ b/test/strategy/test_lower_bit_sampler.py @@ -1,11 +1,12 @@ -"""Tests for new data type""" -import unittest -import shutil +"""Tests for new data type.""" import os +import shutil +import unittest def build_model(): import torch + class M(torch.nn.Module): def __init__(self): super().__init__() @@ -17,45 +18,48 @@ def forward(self, x): x = x.view(1, -1) x = self.linear(x) return x + return M() def add_cap(filename): import yaml + int4_cap = { - 'static': { - 'Conv2d': { - 'weight': { - 'dtype': ['int4'], - 'scheme': ['sym'], - 'granularity': ['per_channel'], - 'algorithm': ['minmax'] - }, - 'activation': { - 'dtype': ['uint4'], - 'scheme': ['sym'], - 'granularity': ['per_tensor'], - 'algorithm': ['kl', 'minmax'] - }, - }, + "static": { + "Conv2d": { + "weight": { + "dtype": ["int4"], + "scheme": ["sym"], + "granularity": ["per_channel"], + "algorithm": ["minmax"], + }, + "activation": { + "dtype": ["uint4"], + "scheme": ["sym"], + "granularity": ["per_tensor"], + "algorithm": ["kl", "minmax"], + }, + }, } } with open(filename) as f: con = yaml.safe_load(f) - con[0]['int4'] = int4_cap - with open(filename, 'w') as out: + con[0]["int4"] = int4_cap + with open(filename, "w") as out: yaml.dump(con, out) -class TestLowerBitQuant(unittest.TestCase): +class TestLowerBitQuant(unittest.TestCase): @classmethod def setUpClass(self): - import shutil import importlib - nc_path = os.path.dirname(importlib.util.find_spec('neural_compressor').origin) - self.src = os.path.join(nc_path, 'adaptor/pytorch_cpu.yaml') - self.dst = os.path.join(nc_path, 'adaptor/pytorch_cpu_backup.yaml') + import shutil + + nc_path = os.path.dirname(importlib.util.find_spec("neural_compressor").origin) + self.src = os.path.join(nc_path, "adaptor/pytorch_cpu.yaml") + self.dst = os.path.join(nc_path, "adaptor/pytorch_cpu_backup.yaml") shutil.copyfile(self.src, self.dst) add_cap(self.src) @@ -63,12 +67,12 @@ def setUpClass(self): def tearDownClass(self): shutil.copyfile(self.dst, self.src) os.remove(self.dst) - shutil.rmtree('saved', ignore_errors=True) + shutil.rmtree("saved", ignore_errors=True) def test_add_int4(self): - from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import Datasets, DATALOADERS + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit # dataset and dataloader dataset = Datasets("pytorch")["dummy"](((100, 3, 224, 224))) @@ -76,6 +80,7 @@ def test_add_int4(self): model = build_model() acc_lst = [1, 1.1, 0.9, 1.1, 1.0] + def fake_eval(model): res = acc_lst.pop(0) return res diff --git a/test/strategy/test_mse.py b/test/strategy/test_mse.py index 9a03ca40b41..b571d0fa222 100644 --- a/test/strategy/test_mse.py +++ b/test/strategy/test_mse.py @@ -1,191 +1,218 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import shutil +import unittest + +import numpy as np + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def create_test_graph(): - from tensorflow.core.framework import attr_value_pb2 - from tensorflow.core.framework import graph_pb2 - from tensorflow.core.framework import node_def_pb2 - from tensorflow.python.framework import tensor_util - from tensorflow.python.framework import dtypes + from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 + from tensorflow.python.framework import dtypes, tensor_util + input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node.input.extend([bias_add_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" - conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv2_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape + ) + ) + ) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) - conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) - bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value2, bias_value2.dtype.type, bias_value2.shape))) + bias_node2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value2, bias_value2.dtype.type, bias_value2.shape) + ) + ) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" - bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) - bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node2.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" - relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" - conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv3_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape + ) + ) + ) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" - conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) - conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv3_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv3_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) identity_node = node_def_pb2.NodeDef() identity_node.name = "final" identity_node.op = "Identity" - identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + identity_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) identity_node.input.extend([conv3_node.name]) test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - relu_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - relu_node2, - conv3_weight_node, - conv3_node, - identity_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node, + ] + ) return test_graph + def objective_func(params): - return params['x1']**2 + params['x2'] + return params["x1"] ** 2 + params["x2"] -class TestQuantization(unittest.TestCase): +class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -196,58 +223,55 @@ def tearDownClass(self): shutil.rmtree("saved", ignore_errors=True) def test_run_mse_one_trial(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='mse', max_trials=1) + tune_cri = TuningCriterion(strategy="mse", max_trials=1) acc_cri = AccuracyCriterion(tolerable_loss=0.01) conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + def fake_eval(model): return 1 - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=fake_eval) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) def test_run_mse_max_trials(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='mse', max_trials=3) + tune_cri = TuningCriterion(strategy="mse", max_trials=3) acc_cri = AccuracyCriterion(tolerable_loss=0.01) op_name_dict = { "conv1": { - "activation": {"dtype": ["fp32"]}, - }, - } + "activation": {"dtype": ["fp32"]}, + }, + } acc = [0, 1, 0.9, 1] + def fake_eval(model): acc.pop(0) return acc[0] - conf = PostTrainingQuantConfig(quant_level=1, op_name_dict = op_name_dict,\ - tuning_criterion=tune_cri, accuracy_criterion=acc_cri) - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=fake_eval) + conf = PostTrainingQuantConfig( + quant_level=1, op_name_dict=op_name_dict, tuning_criterion=tune_cri, accuracy_criterion=acc_cri + ) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) diff --git a/test/strategy/test_mse_1.x.py b/test/strategy/test_mse_1.x.py index eab15e5fc7a..102358cf658 100644 --- a/test/strategy/test_mse_1.x.py +++ b/test/strategy/test_mse_1.x.py @@ -1,12 +1,14 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os import shutil +import unittest + +import numpy as np import yaml + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -29,14 +31,15 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -64,194 +67,220 @@ def build_fake_yaml2(): relative: 0.01 workspace: path: saved - ''' - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: + """ + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) f.close() + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + def create_test_graph(): - from tensorflow.core.framework import attr_value_pb2 - from tensorflow.core.framework import graph_pb2 - from tensorflow.core.framework import node_def_pb2 - from tensorflow.python.framework import tensor_util - from tensorflow.python.framework import dtypes + from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 + from tensorflow.python.framework import dtypes, tensor_util + input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node.input.extend([conv1_node.name, bias_node.name]) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu" relu_node.name = "relu" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node.input.extend([bias_add_node.name]) conv2_weight_node = node_def_pb2.NodeDef() conv2_weight_node.name = "conv2_weights" conv2_weight_node.op = "Const" - conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + conv2_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv2_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape + ) + ) + ) conv2_node = node_def_pb2.NodeDef() conv2_node.name = "conv2" conv2_node.op = "Conv2D" - conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv2_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) - conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv2_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv2_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv2_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node2 = node_def_pb2.NodeDef() bias_node2.name = "conv2_bias" bias_node2.op = "Const" bias_value2 = np.float32(np.abs(np.random.randn(32))) - bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value2, bias_value2.dtype.type, bias_value2.shape))) + bias_node2.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value2, bias_value2.dtype.type, bias_value2.shape) + ) + ) bias_add_node2 = node_def_pb2.NodeDef() bias_add_node2.name = "conv2_bias_add" bias_add_node2.op = "BiasAdd" - bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) - bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + bias_add_node2.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) relu_node2 = node_def_pb2.NodeDef() relu_node2.op = "Relu" relu_node2.name = "relu2" - relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) relu_node2.input.extend([bias_add_node2.name]) conv3_weight_node = node_def_pb2.NodeDef() conv3_weight_node.name = "conv3_weights" conv3_weight_node.op = "Const" - conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) - conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + conv3_weight_value = np.float32(np.abs(np.random.randn(3, 3, 32, 32))) + conv3_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape + ) + ) + ) conv3_node = node_def_pb2.NodeDef() conv3_node.name = "conv3" conv3_node.op = "Conv2D" - conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv3_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) - conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv3_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv3_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv3_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) identity_node = node_def_pb2.NodeDef() identity_node.name = "final" identity_node.op = "Identity" - identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + identity_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) identity_node.input.extend([conv3_node.name]) test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - relu_node, - conv2_weight_node, - conv2_node, - bias_node2, - bias_add_node2, - relu_node2, - conv3_weight_node, - conv3_node, - identity_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node, + ] + ) return test_graph + def objective_func(params): - return params['x1']**2 + params['x2'] + return params["x1"] ** 2 + params["x2"] -class TestQuantization(unittest.TestCase): +class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -261,16 +290,16 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") shutil.rmtree("saved", ignore_errors=True) def test_run_mse_one_trial(self): - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -278,10 +307,10 @@ def test_run_mse_one_trial(self): self.assertNotEqual(output_graph, None) def test_run_mse_max_trials(self): - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) + + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", shape=(1, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = self.test_graph diff --git a/test/strategy/test_mse_v2.py b/test/strategy/test_mse_v2.py index e28adba79ce..dd5d1bc2c82 100644 --- a/test/strategy/test_mse_v2.py +++ b/test/strategy/test_mse_v2.py @@ -2,14 +2,16 @@ import os import shutil import unittest -import tensorflow as tf + import numpy as np +import tensorflow as tf import torchvision + from neural_compressor.experimental import Quantization, common def build_mse_yaml_tf(): - mse_yaml = ''' + mse_yaml = """ model: name: fake_yaml framework: tensorflow @@ -29,12 +31,13 @@ def build_mse_yaml_tf(): max_trials: 10 timeout: 3600 random_seed: 9527 - ''' - with open('mse_yaml_tf.yaml', 'w', encoding="utf-8") as f: + """ + with open("mse_yaml_tf.yaml", "w", encoding="utf-8") as f: f.write(mse_yaml) - + + def build_mse_yaml_pytorch(): - mse_yaml = ''' + mse_yaml = """ model: name: resnet18 framework: pytorch_fx @@ -46,45 +49,57 @@ def build_mse_yaml_pytorch(): relative: 0.01 exit_policy: timeout: 0 - ''' - with open('mse_yaml_pytorch.yaml', 'w', encoding="utf-8") as f: + """ + with open("mse_yaml_pytorch.yaml", "w", encoding="utf-8") as f: f.write(mse_yaml) + def build_fake_model(): try: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d( + input=op, + filters=z, + strides=[1, 1, 1, 1], + padding="VALID", + ) + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID") + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + + class Test_MSEV2Strategy_Tensorflow(unittest.TestCase): @classmethod def setUpClass(self): @@ -93,29 +108,31 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('mse_yaml_tf.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - shutil.rmtree('nc_workspace', ignore_errors=True) + os.remove("mse_yaml_tf.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + shutil.rmtree("nc_workspace", ignore_errors=True) def test_quantization_saved(self): - i = [0] # use a mutable type (list) to wrap the int object + i = [0] # use a mutable type (list) to wrap the int object + def fake_eval_func(_): # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 eval_list = [0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1] i[0] += 1 return eval_list[i[0]] - - quantizer = Quantization('mse_yaml_tf.yaml') - + + quantizer = Quantization("mse_yaml_tf.yaml") + quantizer.model = self.model - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.eval_func = fake_eval_func q_model = quantizer.fit() self.assertIsNotNone(q_model) - q_model.save('./saved') + q_model.save("./saved") + class Test_MSEV2Strategy_PyTorch(unittest.TestCase): @classmethod @@ -125,28 +142,30 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('mse_yaml_pytorch.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - shutil.rmtree('nc_workspace', ignore_errors=True) + os.remove("mse_yaml_pytorch.yaml") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + shutil.rmtree("nc_workspace", ignore_errors=True) def test_quantization_saved(self): i = [0] + def fake_eval_func(model): acc_lst = [1, 1, 0, 0, 0, 0, 1, 1.1, 1.5, 1.1] - + i[0] += 1 return acc_lst[i[0]] - + model = copy.deepcopy(self.model) - quantizer = Quantization('mse_yaml_pytorch.yaml') - dataset = quantizer.dataset('dummy', (1, 3, 224, 224)) + quantizer = Quantization("mse_yaml_pytorch.yaml") + dataset = quantizer.dataset("dummy", (1, 3, 224, 224)) quantizer.model = model quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_func = fake_eval_func q_model = quantizer.fit() self.assertIsNotNone(q_model) - q_model.save('./saved') + q_model.save("./saved") + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_mse_v2_2.x.py b/test/strategy/test_mse_v2_2.x.py index 83507edcb59..d2a24010306 100644 --- a/test/strategy/test_mse_v2_2.x.py +++ b/test/strategy/test_mse_v2_2.x.py @@ -2,92 +2,106 @@ import os import shutil import unittest -import tensorflow as tf + import numpy as np -import torchvision -import torch import onnx +import tensorflow as tf +import torch +import torchvision +from onnx import TensorProto, helper, numpy_helper from onnx import onnx_pb as onnx_proto -from onnx import helper, TensorProto, numpy_helper + def build_ox_model(): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5]) - C = helper.make_tensor_value_info('C', TensorProto.FLOAT, [1, 5, 2]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 5, 2]) - H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 5, 2]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5]) + C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 5, 2]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 5, 2]) + H = helper.make_tensor_value_info("H", TensorProto.FLOAT, [1, 5, 2]) e_value = np.random.randint(2, size=(10)).astype(np.float32) - B_init = helper.make_tensor('B', TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist()) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) + B_init = helper.make_tensor("B", TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist()) + E_init = helper.make_tensor("E", TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) - matmul_node = onnx.helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') - add = onnx.helper.make_node('Add', ['C', 'E'], ['D'], name='add') + matmul_node = onnx.helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") + add = onnx.helper.make_node("Add", ["C", "E"], ["D"], name="add") f_value = np.random.randint(2, size=(10)).astype(np.float32) - F_init = helper.make_tensor('F', TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) - add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2') + F_init = helper.make_tensor("F", TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) + add2 = onnx.helper.make_node("Add", ["D", "F"], ["H"], name="add2") - graph = helper.make_graph([matmul_node, add, add2], 'test_graph_1', [A], [H], [B_init, E_init, F_init]) + graph = helper.make_graph([matmul_node, add, add2], "test_graph_1", [A], [H], [B_init, E_init, F_init]) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model + def build_ox_model2(): - A = helper.make_tensor_value_info('A', TensorProto.FLOAT, [1, 5, 5]) - D = helper.make_tensor_value_info('D', TensorProto.FLOAT, [1, 5, 2]) - H = helper.make_tensor_value_info('H', TensorProto.FLOAT, [1, 5, 2]) - F = helper.make_tensor_value_info('F', TensorProto.FLOAT, [1, 5, 2]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 5, 5]) + D = helper.make_tensor_value_info("D", TensorProto.FLOAT, [1, 5, 2]) + H = helper.make_tensor_value_info("H", TensorProto.FLOAT, [1, 5, 2]) + F = helper.make_tensor_value_info("F", TensorProto.FLOAT, [1, 5, 2]) e_value = np.random.randint(2, size=(10)).astype(np.float32) - B_init = helper.make_tensor('B', TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist()) - E_init = helper.make_tensor('E', TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) + B_init = helper.make_tensor("B", TensorProto.FLOAT, [5, 2], e_value.reshape(10).tolist()) + E_init = helper.make_tensor("E", TensorProto.FLOAT, [1, 5, 2], e_value.reshape(10).tolist()) - matmul_node = onnx.helper.make_node('MatMul', ['A', 'B'], ['C'], name='Matmul') - add = onnx.helper.make_node('Add', ['C', 'E'], ['D'], name='add') + matmul_node = onnx.helper.make_node("MatMul", ["A", "B"], ["C"], name="Matmul") + add = onnx.helper.make_node("Add", ["C", "E"], ["D"], name="add") - add2 = onnx.helper.make_node('Add', ['D', 'F'], ['H'], name='add2') + add2 = onnx.helper.make_node("Add", ["D", "F"], ["H"], name="add2") - graph = helper.make_graph([matmul_node, add, add2], 'test_graph_1', [A, F], [H], [B_init, E_init]) + graph = helper.make_graph([matmul_node, add, add2], "test_graph_1", [A, F], [H], [B_init, E_init]) model = helper.make_model(graph) - model = helper.make_model(graph, **{'opset_imports': [helper.make_opsetid('', 13)]}) + model = helper.make_model(graph, **{"opset_imports": [helper.make_opsetid("", 13)]}) return model + def build_fake_model(): try: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d( + input=op, + filters=z, + strides=[1, 1, 1, 1], + padding="VALID", + ) + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID") + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph + class Test_MSEV2Strategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -98,49 +112,53 @@ def setUpClass(self): @classmethod def tearDownClass(self): - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('nc_workspace', ignore_errors=True) + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("nc_workspace", ignore_errors=True) def test_mse_v2_tf(self): - i = [0] # use a mutable type (list) to wrap the int object + i = [0] # use a mutable type (list) to wrap the int object + def fake_eval_func(_): # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 eval_list = [0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1] i[0] += 1 return eval_list[i[0]] + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) - dataloader = DATALOADERS['tensorflow'](dataset) + dataloader = DATALOADERS["tensorflow"](dataset) conf = PostTrainingQuantConfig( - approach="static", - quant_level=1, - tuning_criterion=TuningCriterion(strategy="mse_v2")) + approach="static", quant_level=1, tuning_criterion=TuningCriterion(strategy="mse_v2") + ) q_model = fit( model=self.tf_model, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_func=fake_eval_func) + eval_func=fake_eval_func, + ) self.assertIsNotNone(q_model) def test_mse_v2_tf_with_confidence_batches(self): - i = [0] # use a mutable type (list) to wrap the int object + i = [0] # use a mutable type (list) to wrap the int object + def fake_eval_func(_): # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 eval_list = [0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1] i[0] += 1 return eval_list[i[0]] + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) - dataloader = DATALOADERS['tensorflow'](dataset) + dataloader = DATALOADERS["tensorflow"](dataset) conf = PostTrainingQuantConfig( approach="static", @@ -149,78 +167,87 @@ def fake_eval_func(_): strategy="mse_v2", strategy_kwargs={ "confidence_batches": 5, - })) + }, + ), + ) q_model = fit( model=self.tf_model, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_func=fake_eval_func) + eval_func=fake_eval_func, + ) self.assertIsNotNone(q_model) def test_mse_v2_saved_torch(self): i = [0] + def fake_eval_func(model): acc_lst = [1, 1, 0, 0, 0, 0, 1, 1.1, 1.5, 1.1] i[0] += 1 return acc_lst[i[0]] + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS + dataset = Datasets("pytorch")["dummy"](((1, 3, 224, 224))) - dataloader = DATALOADERS['pytorch'](dataset) + dataloader = DATALOADERS["pytorch"](dataset) conf = PostTrainingQuantConfig( - approach="static", - quant_level=1, - tuning_criterion=TuningCriterion(strategy="mse_v2")) + approach="static", quant_level=1, tuning_criterion=TuningCriterion(strategy="mse_v2") + ) q_model = fit( model=self.torch_model, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_func=fake_eval_func) + eval_func=fake_eval_func, + ) self.assertIsNotNone(q_model) def test_mse_v2_saved_onnx(self): i = [0] + def fake_eval_func(model): acc_lst = [1, 1, 0, 0, 0, 0, 1, 1.1, 1.5, 1.1] i[0] += 1 return acc_lst[i[0]] + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS - dataset = Datasets("onnxrt_qdq")["dummy_v2"]((5,5), (5,1)) + + dataset = Datasets("onnxrt_qdq")["dummy_v2"]((5, 5), (5, 1)) dataloader = DATALOADERS["onnxrt_qdq"](dataset) conf = PostTrainingQuantConfig( - approach="static", - quant_level=1, - tuning_criterion=TuningCriterion(strategy="mse_v2", max_trials=9)) + approach="static", quant_level=1, tuning_criterion=TuningCriterion(strategy="mse_v2", max_trials=9) + ) q_model = fit( model=self.onnx_model, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_func=fake_eval_func) + eval_func=fake_eval_func, + ) self.assertIsNotNone(q_model) i = [0] - dataset = Datasets("onnxrt_qdq")["dummy_v2"]([(5,5), (5,2)], [(5,1), (5,1)]) + dataset = Datasets("onnxrt_qdq")["dummy_v2"]([(5, 5), (5, 2)], [(5, 1), (5, 1)]) dataloader = DATALOADERS["onnxrt_qdq"](dataset) q_model = fit( model=self.onnx_model2, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_func=fake_eval_func) + eval_func=fake_eval_func, + ) self.assertIsNotNone(q_model) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_new_datatype.py b/test/strategy/test_new_datatype.py index 02e2d48d1f2..d0c7c20c542 100644 --- a/test/strategy/test_new_datatype.py +++ b/test/strategy/test_new_datatype.py @@ -1,11 +1,12 @@ -"""Tests for new data type""" -import unittest -import shutil +"""Tests for new data type.""" import os +import shutil +import unittest def build_model(): import torch + class M(torch.nn.Module): def __init__(self): super().__init__() @@ -17,57 +18,60 @@ def forward(self, x): x = x.view(1, -1) x = self.linear(x) return x + return M() def add_cap(filename): import yaml + int4_cap = { - 'static': { - 'Conv2d': { - 'weight': { - 'dtype': ['int4'], - 'scheme': ['sym'], - 'granularity': ['per_channel'], - 'algorithm': ['minmax'] - }, - 'activation': { - 'dtype': ['uint4'], - 'scheme': ['sym'], - 'granularity': ['per_tensor'], - 'algorithm': ['kl', 'minmax'] - }, - }, + "static": { + "Conv2d": { + "weight": { + "dtype": ["int4"], + "scheme": ["sym"], + "granularity": ["per_channel"], + "algorithm": ["minmax"], + }, + "activation": { + "dtype": ["uint4"], + "scheme": ["sym"], + "granularity": ["per_tensor"], + "algorithm": ["kl", "minmax"], + }, + }, } } with open(filename) as f: con = yaml.safe_load(f) - con[0]['int4'] = int4_cap - with open(filename, 'w') as out: + con[0]["int4"] = int4_cap + with open(filename, "w") as out: yaml.dump(con, out) -class TestAddNewDataType(unittest.TestCase): +class TestAddNewDataType(unittest.TestCase): @classmethod def setUpClass(self): pass @classmethod def tearDownClass(self): - shutil.rmtree('saved', ignore_errors=True) + shutil.rmtree("saved", ignore_errors=True) def test_add_int4(self): - import shutil import importlib - nc_path = os.path.dirname(importlib.util.find_spec('neural_compressor').origin) - src = os.path.join(nc_path, 'adaptor/pytorch_cpu.yaml') - dst = os.path.join(nc_path, 'adaptor/pytorch_cpu_backup.yaml') + import shutil + + nc_path = os.path.dirname(importlib.util.find_spec("neural_compressor").origin) + src = os.path.join(nc_path, "adaptor/pytorch_cpu.yaml") + dst = os.path.join(nc_path, "adaptor/pytorch_cpu_backup.yaml") shutil.copyfile(src, dst) add_cap(src) - from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import Datasets, DATALOADERS + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit # dataset and dataloader dataset = Datasets("pytorch")["dummy"](((100, 3, 224, 224))) @@ -85,5 +89,6 @@ def fake_eval(model): self.assertIsNotNone(q_model) self.assertEqual(q_model._model.conv.zero_point, 7) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_quant_level.py b/test/strategy/test_quant_level.py index c7390c585da..c31caaf9c6d 100644 --- a/test/strategy/test_quant_level.py +++ b/test/strategy/test_quant_level.py @@ -1,66 +1,75 @@ -"""Tests for optimization level & conservative strategy""" +"""Tests for optimization level & conservative strategy.""" import shutil import unittest -import numpy as np from copy import deepcopy -from onnx import helper, TensorProto, numpy_helper -from neural_compressor.data import Datasets, DATALOADERS -from neural_compressor.utils import logger +import numpy as np +from onnx import TensorProto, helper, numpy_helper + from neural_compressor import PostTrainingQuantConfig +from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit +from neural_compressor.utils import logger + def build_conv_model(): initializers = [] - input = helper.make_tensor_value_info('input', TensorProto.FLOAT, [1, 3, 224, 224]) + input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 224, 224]) conv1_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name='conv1_weight') - conv1_node = helper.make_node('Conv', ['input', 'conv1_weight'], ['conv1_output'], name='conv1') + np.random.randint(-1, 2, [3, 3, 3, 3]).astype(np.float32), name="conv1_weight" + ) + conv1_node = helper.make_node("Conv", ["input", "conv1_weight"], ["conv1_output"], name="conv1") conv2_weight_initializer = numpy_helper.from_array( - np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name='conv2_weight') - conv2_node = helper.make_node('Conv', ['conv1_output', 'conv2_weight'], ['conv2_output'], name='conv2') + np.random.randint(-1, 2, [5, 3, 3, 3]).astype(np.float32), name="conv2_weight" + ) + conv2_node = helper.make_node("Conv", ["conv1_output", "conv2_weight"], ["conv2_output"], name="conv2") - avg_args = {'kernel_shape': [3, 3]} - avgpool_node = helper.make_node('AveragePool', ['conv1_output'], ['avg_output'], name='AveragePool', **avg_args) + avg_args = {"kernel_shape": [3, 3]} + avgpool_node = helper.make_node("AveragePool", ["conv1_output"], ["avg_output"], name="AveragePool", **avg_args) - concat_node = helper.make_node('Concat', ['avg_output', 'conv2_output'], - ['concat_output'], name='Concat', axis=1) - output = helper.make_tensor_value_info('concat_output', TensorProto.FLOAT, [1, 8, 220, 220]) + concat_node = helper.make_node("Concat", ["avg_output", "conv2_output"], ["concat_output"], name="Concat", axis=1) + output = helper.make_tensor_value_info("concat_output", TensorProto.FLOAT, [1, 8, 220, 220]) initializers = [conv1_weight_initializer, conv2_weight_initializer] - graph = helper.make_graph([conv1_node, conv2_node, concat_node, avgpool_node], - 'test', [input], [output], initializer=initializers) + graph = helper.make_graph( + [conv1_node, conv2_node, concat_node, avgpool_node], "test", [input], [output], initializer=initializers + ) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) return model + def build_ort_data(): - datasets = Datasets('onnxrt_qlinearops') - cv_dataset = datasets['dummy'](shape=(10, 3, 224, 224), low=0., high=1., label=True) - cv_dataloader = DATALOADERS['onnxrt_qlinearops'](cv_dataset) + datasets = Datasets("onnxrt_qlinearops") + cv_dataset = datasets["dummy"](shape=(10, 3, 224, 224), low=0.0, high=1.0, label=True) + cv_dataloader = DATALOADERS["onnxrt_qlinearops"](cv_dataset) return cv_dataloader def export_onnx_model(model, path, opset=12): import torch + x = torch.randn(100, 3, 224, 224, requires_grad=True) torch_out = model(x) # Export the model - torch.onnx.export(model, # model being run - x, # model input (or a tuple for multiple inputs) - path, # where to save the model (can be a file or file-like object) - export_params=True, # store the trained parameter weights inside the model file - opset_version=opset, # the ONNX version to export the model to, please ensure at least 11. - do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ["input"], # the model"s input names - output_names = ["output"], # the model"s output names - dynamic_axes={"input" : {0 : "batch_size"}, # variable length axes - "output" : {0 : "batch_size"}}) + torch.onnx.export( + model, # model being run + x, # model input (or a tuple for multiple inputs) + path, # where to save the model (can be a file or file-like object) + export_params=True, # store the trained parameter weights inside the model file + opset_version=opset, # the ONNX version to export the model to, please ensure at least 11. + do_constant_folding=True, # whether to execute constant folding for optimization + input_names=["input"], # the model"s input names + output_names=["output"], # the model"s output names + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, # variable length axes + ) + def build_resnet18(): import onnx import torchvision + rn18_model = torchvision.models.resnet18() rn18_export_path = "rn18.onnx" export_onnx_model(rn18_model, rn18_export_path, 12) @@ -70,44 +79,55 @@ def build_resnet18(): def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d( + input=op, + filters=z, + strides=[1, 1, 1, 1], + padding="VALID", + ) + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') - last_identity = tf.identity(op2, name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID") + last_identity = tf.identity(op2, name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph def get_torch_demo_model(): import torch + class DemoModel(torch.nn.Module): def __init__(self): super(DemoModel, self).__init__() @@ -124,10 +144,11 @@ def forward(self, x): x = self.fc4(x) x = self.fc5(x) return x + return DemoModel() -class TestQuantLevel(unittest.TestCase): +class TestQuantLevel(unittest.TestCase): @classmethod def setUpClass(self): self.tf_graph = build_fake_model() @@ -137,147 +158,162 @@ def setUpClass(self): @classmethod def tearDownClass(self): - shutil.rmtree('saved', ignore_errors=True) - shutil.rmtree('nc_workspace', ignore_errors=True) + shutil.rmtree("saved", ignore_errors=True) + shutil.rmtree("nc_workspace", ignore_errors=True) def test_quant_level_auto(self): from neural_compressor import PostTrainingQuantConfig from neural_compressor.quantization import fit acc_lst = [1.0, 0.9, 1.1, 0.8] + def fake_eval(model): result = acc_lst[0] del acc_lst[0] return result - conf = PostTrainingQuantConfig(approach='static') + conf = PostTrainingQuantConfig(approach="static") - q_model = fit(model=self.ort_cv_model, conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval) + q_model = fit(model=self.ort_cv_model, conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval) node_names = [i.name for i in q_model.nodes()] # All conv will be quantized for node_name in node_names: - if 'conv' in node_name: - self.assertTrue('quant' in node_name or 'Quant' in node_name) + if "conv" in node_name: + self.assertTrue("quant" in node_name or "Quant" in node_name) def test2_quant_level_auto(self): # All conv will be quantized but matmul not acc_lst = [1.0, 0.9, 1.1, 0.8] + def fake_eval(model): result = acc_lst[0] del acc_lst[0] return result - conf = PostTrainingQuantConfig(approach='static') - q_model = fit(model=deepcopy(self.ort_resnet18), conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval) + conf = PostTrainingQuantConfig(approach="static") + q_model = fit( + model=deepcopy(self.ort_resnet18), conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval + ) node_names = [i.name for i in q_model.nodes()] for node_name in node_names: - if 'conv' in node_name: - self.assertTrue('quant' in node_name or 'Quant' in node_name) - if 'MatMul' in node_name: - self.assertTrue('quant' not in node_name and 'Quant' not in node_name) + if "conv" in node_name: + self.assertTrue("quant" in node_name or "Quant" in node_name) + if "MatMul" in node_name: + self.assertTrue("quant" not in node_name and "Quant" not in node_name) def test3_quant_level_auto(self): # All conv/matmul will be quantized acc_lst = [1.0, 0.9, 1.1, 1.2] + def fake_eval3(model): result = acc_lst[0] del acc_lst[0] return result - conf = PostTrainingQuantConfig(approach='static') - q_model = fit(model=deepcopy(self.ort_resnet18), conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval3) + + conf = PostTrainingQuantConfig(approach="static") + q_model = fit( + model=deepcopy(self.ort_resnet18), conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval3 + ) node_names = [i.name for i in q_model.nodes()] for node_name in node_names: - if 'conv' in node_name or 'MatMul' in node_name: - self.assertTrue('quant' in node_name or 'Quant' in node_name) + if "conv" in node_name or "MatMul" in node_name: + self.assertTrue("quant" in node_name or "Quant" in node_name) def test4_quant_level_auto(self): # All matmul will be quantized but conv not acc_lst = [1.0, 0.9, 0.8, 1.1] + def fake_eval4(model): result = acc_lst[0] del acc_lst[0] return result - conf = PostTrainingQuantConfig(approach='static') - q_model = fit(model=deepcopy(self.ort_resnet18), conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval4) + conf = PostTrainingQuantConfig(approach="static") + q_model = fit( + model=deepcopy(self.ort_resnet18), conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval4 + ) node_names = [i.name for i in q_model.nodes()] for node_name in node_names: - if 'MatMul' in node_name: - self.assertTrue('quant' in node_name or 'Quant' in node_name) - if 'conv' in node_name: - self.assertTrue('quant' not in node_name and 'Quant' not in node_name) + if "MatMul" in node_name: + self.assertTrue("quant" in node_name or "Quant" in node_name) + if "conv" in node_name: + self.assertTrue("quant" not in node_name and "Quant" not in node_name) def test5_quant_level_auto(self): # All matmul and conv will be quantized, return with all int8. acc_lst = [1.0, 1.2, 0.8, 1.1] + def fake_eval5(model): result = acc_lst[0] del acc_lst[0] return result - conf = PostTrainingQuantConfig(approach='static') - q_model = fit(model=deepcopy(self.ort_resnet18), conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval5) + conf = PostTrainingQuantConfig(approach="static") + q_model = fit( + model=deepcopy(self.ort_resnet18), conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval5 + ) node_names = [i.name for i in q_model.nodes()] for node_name in node_names: - if 'MatMul' in node_name: - self.assertTrue('quant' in node_name or 'Quant' in node_name) - if 'conv' in node_name: - self.assertTrue('quant' in node_name or 'Quant' in node_name) + if "MatMul" in node_name: + self.assertTrue("quant" in node_name or "Quant" in node_name) + if "conv" in node_name: + self.assertTrue("quant" in node_name or "Quant" in node_name) def test6_quant_level_auto(self): # start with basic acc_lst = [1.0, 0.7, 0.9, 0.9, 1.1] + def fake_eval6(model): result = acc_lst[0] del acc_lst[0] return result - conf = PostTrainingQuantConfig(approach='static') - q_model = fit(model=deepcopy(self.ort_resnet18), conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval6) + conf = PostTrainingQuantConfig(approach="static") + q_model = fit( + model=deepcopy(self.ort_resnet18), conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval6 + ) node_names = [i.name for i in q_model.nodes()] for node_name in node_names: - if 'MatMul' in node_name: - self.assertTrue('quant' not in node_name) + if "MatMul" in node_name: + self.assertTrue("quant" not in node_name) def test7_quant_level_auto(self): # start with basic and return at the 3th of basic stage acc_lst = [1.0, 0.7, 0.8, 0.9, 0.95, 0.98, 1.1] + def fake_eval7(model): result = acc_lst[0] del acc_lst[0] return result - conf = PostTrainingQuantConfig(approach='static') - q_model = fit(model=deepcopy(self.ort_resnet18), conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval7) + conf = PostTrainingQuantConfig(approach="static") + q_model = fit( + model=deepcopy(self.ort_resnet18), conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval7 + ) node_names = [i.name for i in q_model.nodes()] for node_name in node_names: - if 'MatMul' in node_name: - self.assertTrue('quant' in node_name or 'Quant' in node_name) - if 'conv' in node_name: - self.assertTrue('quant' in node_name or 'Quant' in node_name) + if "MatMul" in node_name: + self.assertTrue("quant" in node_name or "Quant" in node_name) + if "conv" in node_name: + self.assertTrue("quant" in node_name or "Quant" in node_name) def test_pt_quant_level_auto(self): logger.info("*** Test: quantization level is auto with pytorch model.") import torchvision - from neural_compressor.data import Datasets, DATALOADERS + from neural_compressor import PostTrainingQuantConfig + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit resnet18 = torchvision.models.resnet18() - acc_lst = [2.0, 1.0, 1.1, 2.2, 2.3] + acc_lst = [2.0, 1.0, 1.1, 2.2, 2.3] + def _fake_eval(model): result = acc_lst[0] del acc_lst[0] @@ -286,17 +322,18 @@ def _fake_eval(model): dataset = Datasets("pytorch")["dummy"](((4, 3, 3, 1))) dataloader = DATALOADERS["pytorch"](dataset) conf = PostTrainingQuantConfig() - q_model = fit(model=resnet18, conf=conf, calib_dataloader= dataloader, \ - eval_dataloader=dataloader, eval_func=_fake_eval) + q_model = fit( + model=resnet18, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, eval_func=_fake_eval + ) self.assertIsNotNone(q_model) fc_layer = q_model._model.fc - self.assertTrue('Quant' in str(fc_layer)) + self.assertTrue("Quant" in str(fc_layer)) def test_tf_quant_level_0(self): logger.info("*** Test: quantization level 0 with tensorflow model.") - from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit # fake evaluation function def _fake_eval(model): @@ -310,21 +347,24 @@ def _fake_eval(model): conf = PostTrainingQuantConfig(quant_level=0) # fit - q_model = fit(model=self.tf_graph, - conf=conf, - calib_dataloader= dataloader, - eval_dataloader=dataloader, - eval_func=_fake_eval) + q_model = fit( + model=self.tf_graph, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_func=_fake_eval, + ) self.assertIsNotNone(q_model) def test_tf_quant_level_1(self): logger.info("*** Test: quantization level 1 with tensorflow model.") - from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit # fake evaluation function self._fake_acc = 10 + def _fake_eval(model): self._fake_acc -= 1 return self._fake_acc @@ -337,27 +377,31 @@ def _fake_eval(model): conf = PostTrainingQuantConfig(quant_level=1) # fit - q_model = fit(model=self.tf_graph, - conf=conf, - calib_dataloader= dataloader, - eval_dataloader=dataloader, - eval_func=_fake_eval) + q_model = fit( + model=self.tf_graph, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_func=_fake_eval, + ) self.assertIsNone(q_model) def test_pt_quant_level_1_with_perf_obj(self): logger.info("*** Test: quantization level 1 with perf obj [pytorch model].") - from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import Datasets, DATALOADERS import time + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + # model model = get_torch_demo_model() # fake evaluation function - acc_lst = [2.0, 1.0, 2.1, 2.2, 2.3, 2.1, 2.1, 2.2] + acc_lst = [2.0, 1.0, 2.1, 2.2, 2.3, 2.1, 2.1, 2.2] perf_lst = [2.0, 1.5, 1.0, 0.5, 0.1, 1.0, 1.0, 1.0] self._internal_index = -1 + def _fake_eval(model): self._internal_index += 1 perf = perf_lst[self._internal_index] @@ -368,32 +412,32 @@ def _fake_eval(model): dataset = Datasets("pytorch")["dummy"](((16, 2, 3))) dataloader = DATALOADERS["pytorch"](dataset) - tuning_criterion = TuningCriterion(timeout=10000, max_trials=6, objective='performance') + tuning_criterion = TuningCriterion(timeout=10000, max_trials=6, objective="performance") conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tuning_criterion) # fit - q_model = fit(model=model, - conf=conf, - calib_dataloader= dataloader, - eval_dataloader=dataloader, - eval_func=_fake_eval) + q_model = fit( + model=model, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, eval_func=_fake_eval + ) self.assertIsNotNone(q_model) - self.assertEqual(q_model.q_config.get('trial_number', -1), 4) - + self.assertEqual(q_model.q_config.get("trial_number", -1), 4) + def test_pt_quant_level_1_with_perf_obj2(self): logger.info("*** Test: quantization level 1 with perf obj [pytorch model].") - from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import Datasets, DATALOADERS import time + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + # model model = get_torch_demo_model() # fake evaluation function - acc_lst = [2.0, 1.0, 2.1, 2.2, 2.3, 2.1, 2.1, 2.2] + acc_lst = [2.0, 1.0, 2.1, 2.2, 2.3, 2.1, 2.1, 2.2] perf_lst = [2.0, 1.5, 1.0, 0.5, 0.1, 1.0, 1.0, 1.0] self._internal_index = -1 + def _fake_eval(model): self._internal_index += 1 perf = perf_lst[self._internal_index] @@ -404,33 +448,34 @@ def _fake_eval(model): dataset = Datasets("pytorch")["dummy"](((16, 2, 3))) dataloader = DATALOADERS["pytorch"](dataset) - tuning_criterion = TuningCriterion(timeout=10000, max_trials=6, objective=['performance']) + tuning_criterion = TuningCriterion(timeout=10000, max_trials=6, objective=["performance"]) conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tuning_criterion) # fit - q_model = fit(model=model, - conf=conf, - calib_dataloader= dataloader, - eval_dataloader=dataloader, - eval_func=_fake_eval) + q_model = fit( + model=model, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, eval_func=_fake_eval + ) self.assertIsNotNone(q_model) - self.assertEqual(q_model.q_config.get('trial_number', -1), 4) + self.assertEqual(q_model.q_config.get("trial_number", -1), 4) def test_pt_quant_level_0(self): logger.info("*** Test: quantization level 0 with pytorch model.") - from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.data import Datasets, DATALOADERS - import torchvision import time + import torchvision + + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + # model resnet18 = torchvision.models.resnet18() # fake evaluation function - acc_lst = [2.0, 1.0, 2.1, 2.2, 2.3] + acc_lst = [2.0, 1.0, 2.1, 2.2, 2.3] perf_lst = [2.0, 1.5, 1.0, 0.5, 0.1] self.test_pt_opt_level_0_index = -1 + def _fake_eval(model): self.test_pt_opt_level_0_index += 1 perf = perf_lst[self.test_pt_opt_level_0_index] @@ -445,30 +490,30 @@ def _fake_eval(model): conf = PostTrainingQuantConfig(quant_level=0) # fit - q_model = fit(model=resnet18, - conf=conf, - calib_dataloader= dataloader, - eval_dataloader=dataloader, - eval_func=_fake_eval) + q_model = fit( + model=resnet18, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, eval_func=_fake_eval + ) self.assertIsNotNone(q_model) - def test_quant_level_auto_ort(self): # All conv/matmul will be quantized acc_lst = [1.0, 0.9, 0.9, 0.9, 1.1] + def fake_eval3(model): result = acc_lst[0] del acc_lst[0] return result - conf = PostTrainingQuantConfig(approach='static') - q_model = fit(model=deepcopy(self.ort_resnet18), conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval3) + + conf = PostTrainingQuantConfig(approach="static") + q_model = fit( + model=deepcopy(self.ort_resnet18), conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval3 + ) node_names = [i.name for i in q_model.nodes()] found_fp32_conv = False for node_name in node_names: - if 'MatMul' in node_name: - self.assertTrue('quant' not in node_name) - if 'conv' in node_name and ('quant' in node_name or 'Quant' in node_name): + if "MatMul" in node_name: + self.assertTrue("quant" not in node_name) + if "conv" in node_name and ("quant" in node_name or "Quant" in node_name): found_fp32_conv = True self.assertTrue(found_fp32_conv) @@ -476,15 +521,19 @@ def test_quant_level_auto_with_max_trial(self): # maxt_trails = 1: even if the accuracy does not meet the requirements, # the tuning process ends after the first trial. from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + acc_lst = [1.0, 0.9, 1.1, 1.2] + def fake_eval3(model): result = acc_lst[0] del acc_lst[0] return result + tuning_criterion = TuningCriterion(max_trials=1) - conf = PostTrainingQuantConfig(approach='static', tuning_criterion=tuning_criterion) - q_model = fit(model=deepcopy(self.ort_resnet18), conf=conf, \ - calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval3) + conf = PostTrainingQuantConfig(approach="static", tuning_criterion=tuning_criterion) + q_model = fit( + model=deepcopy(self.ort_resnet18), conf=conf, calib_dataloader=self.ort_cv_dataloader, eval_func=fake_eval3 + ) self.assertIsNone(q_model) diff --git a/test/strategy/test_random.py b/test/strategy/test_random.py index a9aef8b8cc5..ced559d5518 100644 --- a/test/strategy/test_random.py +++ b/test/strategy/test_random.py @@ -1,48 +1,48 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os import shutil +import unittest + +import numpy as np + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name='x') - y = tf.constant(np.random.random((2, 2, 1, 1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[ - 1, 1, 1, 1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants( - sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name='x') - y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[ - 1, 1, 1, 1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, [ - 'op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph class TestRandomStrategy(unittest.TestCase): - @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -52,51 +52,47 @@ def tearDownClass(self): shutil.rmtree("saved", ignore_errors=True) def test_ru_random_one_trial(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='random', max_trials=1) + tune_cri = TuningCriterion(strategy="random", max_trials=1) acc_cri = AccuracyCriterion(tolerable_loss=0.01) conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + def fake_eval(model): return 1 - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=fake_eval) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) def test_ru_random_max_trials(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='random', max_trials=3) + tune_cri = TuningCriterion(strategy="random", max_trials=3) acc_cri = AccuracyCriterion(tolerable_loss=0.01) acc = [0, 1, 0.9, 1] + def fake_eval(model): acc.pop(0) return acc[0] conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=fake_eval) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) diff --git a/test/strategy/test_random_1.x.py b/test/strategy/test_random_1.x.py index d0f007fd7f2..d6493da60eb 100644 --- a/test/strategy/test_random_1.x.py +++ b/test/strategy/test_random_1.x.py @@ -1,13 +1,14 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os import shutil +import unittest + +import numpy as np import yaml def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -25,15 +26,15 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -53,52 +54,50 @@ def build_fake_yaml2(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name='x') - y = tf.constant(np.random.random((2, 2, 1, 1)), name='y') - op = tf.nn.conv2d(input=x, filter=y, strides=[ - 1, 1, 1, 1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants( - sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name='x') - y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name='y') - op = tf.nn.conv2d(input=x, filters=y, strides=[ - 1, 1, 1, 1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, [ - 'op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph class TestQuantization(unittest.TestCase): - @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -107,16 +106,16 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") shutil.rmtree("saved", ignore_errors=True) def test_ru_random_one_trial(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -125,8 +124,8 @@ def test_ru_random_one_trial(self): def test_ru_random_max_trials(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph diff --git a/test/strategy/test_sigopt.py b/test/strategy/test_sigopt.py index 4fd5e3ac271..4c956a67b6a 100644 --- a/test/strategy/test_sigopt.py +++ b/test/strategy/test_sigopt.py @@ -1,109 +1,124 @@ -"""Tests for quantization""" -import numpy as np -import unittest -import shutil +"""Tests for quantization.""" import os -if os.getenv('SIGOPT_API_TOKEN') is None or os.getenv('SIGOPT_PROJECT_ID') is None: +import shutil +import unittest + +import numpy as np + +if os.getenv("SIGOPT_API_TOKEN") is None or os.getenv("SIGOPT_PROJECT_ID") is None: CONDITION = True else: CONDITION = False + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=tf.nn.relu(x), filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=tf.nn.relu(op), filters=z, strides=[1,1,1,1], padding='VALID', name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=tf.nn.relu(x), filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d( + input=tf.nn.relu(op), filters=z, strides=[1, 1, 1, 1], padding="VALID", name="op2_to_store" + ) sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID", name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph class TestSigoptTuningStrategy(unittest.TestCase): - @classmethod def setUpClass(self): - sigopt_api_token = os.getenv('SIGOPT_API_TOKEN') - sigopt_project_id = os.getenv('SIGOPT_PROJECT_ID') + sigopt_api_token = os.getenv("SIGOPT_API_TOKEN") + sigopt_project_id = os.getenv("SIGOPT_PROJECT_ID") self.constant_graph = build_fake_model() @classmethod def tearDownClass(self): - shutil.rmtree('saved', ignore_errors=True) - - @unittest.skipIf(CONDITION , "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") + shutil.rmtree("saved", ignore_errors=True) + + @unittest.skipIf(CONDITION, "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") def test_run_sigopt_one_trial_new_api(self): - from neural_compressor.quantization import fit from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import Datasets, DATALOADERS - + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion - accuracy_criterion = AccuracyCriterion(criterion='relative') - strategy_kwargs = {'sigopt_api_token': 'sigopt_api_token_test', - 'sigopt_project_id': 'sigopt_project_id_test', - 'sigopt_experiment_name': 'nc-tune'} - tuning_criterion = TuningCriterion(strategy='sigopt', strategy_kwargs=strategy_kwargs, max_trials=3) - conf = PostTrainingQuantConfig(quant_level=1, - approach="static", - tuning_criterion=tuning_criterion, - accuracy_criterion=accuracy_criterion) + accuracy_criterion = AccuracyCriterion(criterion="relative") + strategy_kwargs = { + "sigopt_api_token": "sigopt_api_token_test", + "sigopt_project_id": "sigopt_project_id_test", + "sigopt_experiment_name": "nc-tune", + } + tuning_criterion = TuningCriterion(strategy="sigopt", strategy_kwargs=strategy_kwargs, max_trials=3) + conf = PostTrainingQuantConfig( + quant_level=1, approach="static", tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion + ) self.assertEqual(conf.tuning_criterion.strategy_kwargs, strategy_kwargs) + def fake_eval(model): return 1 + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) - + def test_run_sigopt_one_trial_fake_token(self): - from neural_compressor.quantization import fit from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import Datasets, DATALOADERS - + from neural_compressor.data import DATALOADERS, Datasets + from neural_compressor.quantization import fit + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion - accuracy_criterion = AccuracyCriterion(criterion='relative') - strategy_kwargs = {'sigopt_api_token': 'sigopt_api_token_test', - 'sigopt_project_id': 'sigopt_project_id_test', - 'sigopt_experiment_name': 'nc-tune'} - tuning_criterion = TuningCriterion(strategy='sigopt', strategy_kwargs=strategy_kwargs, max_trials=3) - conf = PostTrainingQuantConfig(quant_level=1, - approach="static", - tuning_criterion=tuning_criterion, - accuracy_criterion=accuracy_criterion) + accuracy_criterion = AccuracyCriterion(criterion="relative") + strategy_kwargs = { + "sigopt_api_token": "sigopt_api_token_test", + "sigopt_project_id": "sigopt_project_id_test", + "sigopt_experiment_name": "nc-tune", + } + tuning_criterion = TuningCriterion(strategy="sigopt", strategy_kwargs=strategy_kwargs, max_trials=3) + conf = PostTrainingQuantConfig( + quant_level=1, approach="static", tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion + ) self.assertEqual(conf.tuning_criterion.strategy_kwargs, strategy_kwargs) + def fake_eval(model): return 1 + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) diff --git a/test/strategy/test_sigopt_1.x.py b/test/strategy/test_sigopt_1.x.py index 4a2e077dbfd..5b7f643358f 100644 --- a/test/strategy/test_sigopt_1.x.py +++ b/test/strategy/test_sigopt_1.x.py @@ -1,16 +1,19 @@ -"""Tests for quantization""" -import numpy as np -import unittest -import shutil +"""Tests for quantization.""" import os +import shutil +import unittest + +import numpy as np import yaml -if os.getenv('SIGOPT_API_TOKEN') is None or os.getenv('SIGOPT_PROJECT_ID') is None: + +if os.getenv("SIGOPT_API_TOKEN") is None or os.getenv("SIGOPT_PROJECT_ID") is None: CONDITION = False else: CONDITION = False -def build_fake_yaml(sigopt_api_token,sigopt_project_id): - fake_yaml = ''' + +def build_fake_yaml(sigopt_api_token, sigopt_project_id): + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -31,14 +34,17 @@ def build_fake_yaml(sigopt_api_token,sigopt_project_id): relative: 0.01 workspace: path: saved - '''.format(sigopt_api_token, sigopt_project_id) + """.format( + sigopt_api_token, sigopt_project_id + ) y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() -def build_fake_yaml2(sigopt_api_token,sigopt_project_id): - fake_yaml = ''' + +def build_fake_yaml2(sigopt_api_token, sigopt_project_id): + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -61,81 +67,90 @@ def build_fake_yaml2(sigopt_api_token,sigopt_project_id): relative: -0.01 workspace: path: saved - '''.format(sigopt_api_token, sigopt_project_id) + """.format( + sigopt_api_token, sigopt_project_id + ) y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=tf.nn.relu(x), filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=tf.nn.relu(op), filters=z, strides=[1,1,1,1], padding='VALID', name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=tf.nn.relu(x), filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d( + input=tf.nn.relu(op), filters=z, strides=[1, 1, 1, 1], padding="VALID", name="op2_to_store" + ) sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') - z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') - op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', name='op2_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="y") + z = tf.constant(np.random.random((1, 1, 1, 1)).astype(np.float32), name="z") + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1, 1, 1, 1], padding="VALID", name="op2_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op2_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph -@unittest.skipIf(CONDITION , "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") -class TestSigoptTuningStrategy(unittest.TestCase): +@unittest.skipIf(CONDITION, "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") +class TestSigoptTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): - sigopt_api_token = os.getenv('SIGOPT_API_TOKEN') - sigopt_project_id = os.getenv('SIGOPT_PROJECT_ID') + sigopt_api_token = os.getenv("SIGOPT_API_TOKEN") + sigopt_project_id = os.getenv("SIGOPT_PROJECT_ID") self.constant_graph = build_fake_model() - build_fake_yaml(sigopt_api_token,sigopt_project_id) - build_fake_yaml2(sigopt_api_token,sigopt_project_id) + build_fake_yaml(sigopt_api_token, sigopt_project_id) + build_fake_yaml2(sigopt_api_token, sigopt_project_id) @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - shutil.rmtree('saved', ignore_errors=True) + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") + shutil.rmtree("saved", ignore_errors=True) def test_run_basic_one_trial(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() - def test_run_basic_max_trials(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph diff --git a/test/strategy/test_tpe.py b/test/strategy/test_tpe.py index 9c8e4e6c1c0..b73b58e46d8 100644 --- a/test/strategy/test_tpe.py +++ b/test/strategy/test_tpe.py @@ -1,45 +1,49 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os import shutil +import unittest + +import numpy as np def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y', dtype=tf.float32) - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y", dtype=tf.float32) + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y', dtype=tf.float32) - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y", dtype=tf.float32) + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph -class TestTpeStrategy(unittest.TestCase): +class TestTpeStrategy(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -49,47 +53,50 @@ def tearDownClass(self): shutil.rmtree("saved", ignore_errors=True) def test_run_tpe_one_trial(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='tpe', max_trials=200) + tune_cri = TuningCriterion(strategy="tpe", max_trials=200) acc_cri = AccuracyCriterion(tolerable_loss=0.01) + def eval_func(model): return 1 + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_func=eval_func) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=eval_func) def test_run_tpe_max_trials(self): + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATALOADERS, Datasets from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion - from neural_compressor.data import Datasets, DATALOADERS # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='tpe', max_trials=5) + tune_cri = TuningCriterion(strategy="tpe", max_trials=5) acc_cri = AccuracyCriterion(tolerable_loss=0.01) from neural_compressor.metric import METRICS - metrics = METRICS('tensorflow') - top1 = metrics['topk']() + + metrics = METRICS("tensorflow") + top1 = metrics["topk"]() conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) - q_model = fit(model=self.constant_graph, - conf=conf, - calib_dataloader=dataloader, - eval_dataloader=dataloader, - eval_metric=top1) + q_model = fit( + model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_metric=top1, + ) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_tpe_1.x.py b/test/strategy/test_tpe_1.x.py index c6b921a4b31..17de380cc72 100644 --- a/test/strategy/test_tpe_1.x.py +++ b/test/strategy/test_tpe_1.x.py @@ -1,12 +1,14 @@ -"""Tests for quantization""" -import numpy as np -import unittest +"""Tests for quantization.""" import os import shutil +import unittest + +import numpy as np import yaml + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -24,14 +26,15 @@ def build_fake_yaml(): relative: 0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_yaml2(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -51,47 +54,51 @@ def build_fake_yaml2(): relative: -0.01 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) + with open("fake_yaml2.yaml", "w", encoding="utf-8") as f: + yaml.dump(y, f) f.close() + def build_fake_model(): import tensorflow as tf + try: graph = tf.Graph() graph_def = tf.GraphDef() with tf.Session() as sess: - x = tf.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.constant(np.random.random((2,2,1,1)), name='y', dtype=tf.float32) - op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.constant(np.random.random((2, 2, 1, 1)), name="y", dtype=tf.float32) + op = tf.nn.conv2d(input=x, filter=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.global_variables_initializer()) - constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ["op_to_store"]) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") except: graph = tf.Graph() graph_def = tf.compat.v1.GraphDef() with tf.compat.v1.Session() as sess: - x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') - y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y', dtype=tf.float32) - op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + x = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="x") + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name="y", dtype=tf.float32) + op = tf.nn.conv2d(input=x, filters=y, strides=[1, 1, 1, 1], padding="VALID", name="op_to_store") sess.run(tf.compat.v1.global_variables_initializer()) - constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ["op_to_store"] + ) graph_def.ParseFromString(constant_graph.SerializeToString()) with graph.as_default(): - tf.import_graph_def(graph_def, name='') + tf.import_graph_def(graph_def, name="") return graph -class TestQuantization(unittest.TestCase): +class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() @@ -101,8 +108,8 @@ def setUpClass(self): @classmethod def tearDownClass(self): try: - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') + os.remove("fake_yaml.yaml") + os.remove("fake_yaml2.yaml") shutil.rmtree("saved", ignore_errors=True) except: @@ -111,8 +118,8 @@ def tearDownClass(self): def test_run_tpe_one_trial(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -121,19 +128,19 @@ def test_run_tpe_one_trial(self): def test_run_tpe_max_trials(self): from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml2.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() def test_loss_calculation(self): - from neural_compressor.experimental.contrib.strategy.tpe import TpeTuneStrategy from neural_compressor.experimental import Quantization, common + from neural_compressor.experimental.contrib.strategy.tpe import TpeTuneStrategy - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", (100, 3, 3, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph @@ -149,5 +156,6 @@ def test_loss_calculation(self): tmp_val2 = testObject.calculate_loss(0.03, 2, testObject.loss_function_config) self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_tuning_sampler.py b/test/strategy/test_tuning_sampler.py index bd07570eec3..f9022301f05 100644 --- a/test/strategy/test_tuning_sampler.py +++ b/test/strategy/test_tuning_sampler.py @@ -1,183 +1,121 @@ +import unittest +from collections import OrderedDict +from copy import deepcopy + from neural_compressor.strategy.utils.tuning_sampler import ( - OpTypeWiseTuningSampler, + BlockFallbackTuningSampler, + FallbackTuningSampler, ModelWiseTuningSampler, + OpTypeWiseTuningSampler, OpWiseTuningSampler, - FallbackTuningSampler, - BlockFallbackTuningSampler - ) -from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig +) from neural_compressor.strategy.utils.tuning_space import TuningSpace -from collections import OrderedDict -from copy import deepcopy -import unittest +from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig op_cap = { - ('op_name1', 'op_type1'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, + ("op_name1", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], - ('op_name2', 'op_type1'): [ + ("op_name2", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], - ('op_name3', 'op_type2'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel'] - } - }, + ("op_name3", "op_type2"): [ { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], - ('op_name4', 'op_type3'): [ + ("op_name4", "op_type3"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - }, + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + }, }, { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - }, + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + }, }, { - 'activation': - { - 'dtype': 'fp32' - }, + "activation": {"dtype": "fp32"}, }, - ] + ], } -block_wise = [[('op_name2', 'op_type1'),('op_name4', 'op_type3')], - [('op_name4', 'op_type3'), ('op_name1', 'op_type1')]] - +block_wise = [ + [("op_name2", "op_type1"), ("op_name4", "op_type3")], + [("op_name4", "op_type3"), ("op_name1", "op_type1")], +] class TestTuningSampler(unittest.TestCase): def test_tuning_sampler(self): - capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': deepcopy(op_cap) - } + capability = {"calib": {"calib_sampling_size": [1, 10, 50]}, "op": deepcopy(op_cap)} conf = None tuning_space = TuningSpace(capability, conf) initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) print(initial_op_tuning_cfg[item.name]) quant_mode_wise_items = OrderedDict() from neural_compressor.strategy.utils.constant import auto_query_order as query_order + pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -192,27 +130,30 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_item_dtype_dict = OrderedDict() for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - - op_wise_tuning_sampler = OpWiseTuningSampler(deepcopy(tuning_space), [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + + op_wise_tuning_sampler = OpWiseTuningSampler( + deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) self.assertEqual(len(list(op_wise_tuning_sampler)), 128) - optype_wise_tuning_sampler = OpTypeWiseTuningSampler(deepcopy(tuning_space), [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + optype_wise_tuning_sampler = OpTypeWiseTuningSampler( + deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) cfg_lst = list(optype_wise_tuning_sampler) self.assertEqual(len(cfg_lst), 16) - model_wise_tuning_sampler = ModelWiseTuningSampler(deepcopy(tuning_space), [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + model_wise_tuning_sampler = ModelWiseTuningSampler( + deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) model_wise_pool = [] best_tune_cfg = None for tune_cfg in model_wise_tuning_sampler: best_tune_cfg = tune_cfg model_wise_pool.append(tune_cfg) self.assertEqual(len(model_wise_pool), 8) - + # fallback test - quant_ops = quant_mode_wise_items.get('static', []) - quant_ops += quant_mode_wise_items.get('dynamic', []) - target_dtype = 'fp32' + quant_ops = quant_mode_wise_items.get("static", []) + quant_ops += quant_mode_wise_items.get("dynamic", []) + target_dtype = "fp32" target_type_lst = tuning_space.query_items_by_quant_mode(target_dtype) fallback_items_lst = [item for item in quant_ops if item in target_type_lst] if fallback_items_lst: @@ -220,53 +161,60 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): fallback_items_name_lst = [item.name for item in fallback_items_lst] op_dtypes = OrderedDict(zip(fallback_items_name_lst[::-1], [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_tune_cfg) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) fallback_cnt = [] fp32_lst = [] for op_cfgs in fallback_sampler: cnt = 0 for op_name, op_cfg in op_cfgs.items(): op_state = op_cfg.get_state() - if 'fp32' == op_state['activation']['dtype'] and\ - ('fp32' == op_state['weight']['dtype'] if 'weight' in op_state else True): + if "fp32" == op_state["activation"]["dtype"] and ( + "fp32" == op_state["weight"]["dtype"] if "weight" in op_state else True + ): cnt = cnt + 1 fp32_lst.append(op_name) fallback_cnt.append(cnt) self.assertListEqual(fallback_cnt, [1, 1, 1, 1]) self.assertListEqual(fp32_lst, fallback_items_name_lst[::-1]) - fallback_sampler_acc = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler_acc = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) fallback_cnt = [] for op_cfgs in fallback_sampler_acc: cnt = 0 for op_name, op_cfg in op_cfgs.items(): op_state = op_cfg.get_state() - if 'fp32' == op_state['activation']['dtype'] and\ - ('fp32' == op_state['weight']['dtype'] if 'weight' in op_state else True): + if "fp32" == op_state["activation"]["dtype"] and ( + "fp32" == op_state["weight"]["dtype"] if "weight" in op_state else True + ): cnt = cnt + 1 fallback_cnt.append(cnt) self.assertListEqual(fallback_cnt, [2, 3, 4]) - + def test_block_sampler(self): - capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': deepcopy(op_cap), - 'block_wise': block_wise - } + capability = {"calib": {"calib_sampling_size": [1, 10, 50]}, "op": deepcopy(op_cap), "block_wise": block_wise} conf = None tuning_space = TuningSpace(capability, conf) initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) print(initial_op_tuning_cfg[item.name]) quant_mode_wise_items = OrderedDict() from neural_compressor.strategy.utils.constant import auto_query_order as query_order + pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -281,22 +229,25 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_item_dtype_dict = OrderedDict() for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - - op_block_lst = capability.get('block_wise', []) + + op_block_lst = capability.get("block_wise", []) if op_block_lst: # Fallback block by block - target_type_lst = set(tuning_space.query_items_by_quant_mode('fp32')) + target_type_lst = set(tuning_space.query_items_by_quant_mode("fp32")) fallback_items_lst = [item for item in target_type_lst] op_block_fallback_lst = [] for op_block_index, op_block in enumerate(op_block_lst): op_block_fallback_lst.append(op_block) - block_fallback_sampler = BlockFallbackTuningSampler(tuning_space=tuning_space, - tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_block_lst=op_block_fallback_lst, - accumulate=False, - target_dtype='fp32') + block_fallback_sampler = BlockFallbackTuningSampler( + tuning_space=tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_block_lst=op_block_fallback_lst, + accumulate=False, + target_dtype="fp32", + ) self.assertEqual(2, len(list(block_fallback_sampler))) - + + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_tuning_sampler_1.x.py b/test/strategy/test_tuning_sampler_1.x.py index 46adf411317..d17188c5b10 100644 --- a/test/strategy/test_tuning_sampler_1.x.py +++ b/test/strategy/test_tuning_sampler_1.x.py @@ -1,178 +1,114 @@ +import unittest +from collections import OrderedDict +from copy import deepcopy + from neural_compressor.experimental.strategy.utils.tuning_sampler import ( - OpTypeWiseTuningSampler, + FallbackTuningSampler, ModelWiseTuningSampler, + OpTypeWiseTuningSampler, OpWiseTuningSampler, - FallbackTuningSampler - ) -from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig +) from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace -from collections import OrderedDict -from copy import deepcopy -import unittest - +from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig op_cap = { - ('op_name1', 'op_type1'): [ + ("op_name1", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], - ('op_name2', 'op_type1'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, + ("op_name2", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], - ('op_name3', 'op_type2'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel'] - } - }, + ("op_name3", "op_type2"): [ { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], - ('op_name4', 'op_type3'): [ + ("op_name4", "op_type3"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - }, + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + }, }, { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - }, + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + }, }, { - 'activation': - { - 'dtype': 'fp32' - }, + "activation": {"dtype": "fp32"}, }, - ] + ], } class TestTuningSampler(unittest.TestCase): def test_tuning_sampler(self): - capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': op_cap - } + capability = {"calib": {"calib_sampling_size": [1, 10, 50]}, "op": op_cap} conf = None tuning_space = TuningSpace(capability, conf) initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) print(initial_op_tuning_cfg[item.name]) quant_mode_wise_items = OrderedDict() from neural_compressor.experimental.strategy.utils.constant import auto_query_order as query_order + pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -187,27 +123,30 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_item_dtype_dict = OrderedDict() for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - - op_wise_tuning_sampler = OpWiseTuningSampler(deepcopy(tuning_space), [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + + op_wise_tuning_sampler = OpWiseTuningSampler( + deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) self.assertEqual(len(list(op_wise_tuning_sampler)), 128) - optype_wise_tuning_sampler = OpTypeWiseTuningSampler(deepcopy(tuning_space), [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + optype_wise_tuning_sampler = OpTypeWiseTuningSampler( + deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) cfg_lst = list(optype_wise_tuning_sampler) self.assertEqual(len(cfg_lst), 16) - model_wise_tuning_sampler = ModelWiseTuningSampler(deepcopy(tuning_space), [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + model_wise_tuning_sampler = ModelWiseTuningSampler( + deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) model_wise_pool = [] best_tune_cfg = None for tune_cfg in model_wise_tuning_sampler: best_tune_cfg = tune_cfg model_wise_pool.append(tune_cfg) self.assertEqual(len(model_wise_pool), 8) - + # fallback test - quant_ops = quant_mode_wise_items.get('static', []) - quant_ops += quant_mode_wise_items.get('dynamic', []) - target_dtype = 'fp32' + quant_ops = quant_mode_wise_items.get("static", []) + quant_ops += quant_mode_wise_items.get("dynamic", []) + target_dtype = "fp32" target_type_lst = tuning_space.query_items_by_quant_mode(target_dtype) fallback_items_lst = [item for item in quant_ops if item in target_type_lst] if fallback_items_lst: @@ -215,36 +154,47 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): fallback_items_name_lst = [item.name for item in fallback_items_lst] op_dtypes = OrderedDict(zip(fallback_items_name_lst[::-1], [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_tune_cfg) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) + fallback_sampler = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=False, + ) fallback_cnt = [] fp32_lst = [] for op_cfgs in fallback_sampler: cnt = 0 for op_name, op_cfg in op_cfgs.items(): op_state = op_cfg.get_state() - if 'fp32' == op_state['activation']['dtype'] and\ - ('fp32' == op_state['weight']['dtype'] if 'weight' in op_state else True): + if "fp32" == op_state["activation"]["dtype"] and ( + "fp32" == op_state["weight"]["dtype"] if "weight" in op_state else True + ): cnt = cnt + 1 fp32_lst.append(op_name) fallback_cnt.append(cnt) self.assertListEqual(fallback_cnt, [1, 1, 1, 1]) self.assertListEqual(fp32_lst, fallback_items_name_lst[::-1]) - fallback_sampler_acc = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + fallback_sampler_acc = FallbackTuningSampler( + tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, + accumulate=True, + ) fallback_cnt = [] for op_cfgs in fallback_sampler_acc: cnt = 0 for op_name, op_cfg in op_cfgs.items(): op_state = op_cfg.get_state() - if 'fp32' == op_state['activation']['dtype'] and\ - ('fp32' == op_state['weight']['dtype'] if 'weight' in op_state else True): + if "fp32" == op_state["activation"]["dtype"] and ( + "fp32" == op_state["weight"]["dtype"] if "weight" in op_state else True + ): cnt = cnt + 1 fallback_cnt.append(cnt) self.assertListEqual(fallback_cnt, [2, 3, 4]) - + + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_tuning_space.py b/test/strategy/test_tuning_space.py index 310fb5f3988..9c4dbff6ecb 100644 --- a/test/strategy/test_tuning_space.py +++ b/test/strategy/test_tuning_space.py @@ -1,272 +1,203 @@ -from neural_compressor.strategy.utils.tuning_space import TuningItem, TuningSpace +import unittest +from copy import deepcopy + from neural_compressor.conf.dotdict import DotDict +from neural_compressor.strategy.utils.tuning_space import TuningItem, TuningSpace from neural_compressor.utils import logger -from copy import deepcopy -import unittest op_cap = { # op have both weight and activation and support static/dynamic/fp32 - ('op_name1', 'op_type1'): [ + ("op_name1", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op have both weight and activation and support static/dynamic/fp32 - ('op_name2', 'op_type1'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, + ("op_name2", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op have both weight and activation and support static/fp32 - ('op_name3', 'op_type2'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel'], - 'algorithm': ['minmax', 'kl'] - } - }, + ("op_name3", "op_type2"): [ { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel"], + "algorithm": ["minmax", "kl"], + }, + "weight": { + "dtype": ["int8"], + "scheme": ["sym"], + "granularity": ["per_channel"], + "algorithm": ["minmax", "kl"], + }, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op only have activation and support dynamic/fp32 - ('op_name4', 'op_type3'): [ + ("op_name4", "op_type3"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, }, { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax'] - }, + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax"], + }, }, { - 'activation': - { - 'dtype': 'fp32' - }, + "activation": {"dtype": "fp32"}, }, - ] + ], } op_cap2 = { # The granularity of op activation do not support per_tensor. - ('op_name4', 'op_type1'): [ + ("op_name4", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - },] + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, + }, + ] } class TestTuningSpace(unittest.TestCase): def setUp(self) -> None: - self.capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': deepcopy(op_cap) - } + self.capability = {"calib": {"calib_sampling_size": [1, 10, 50]}, "op": deepcopy(op_cap)} # for optype1,'algorithm': ['minmax', 'kl'] -> ['minmax'] self.optype_wise_user_config = { - 'op_type1': { - 'activation': { - 'algorithm': ['minmax'], - 'granularity': ['per_channel', 'per_tensor'], + "op_type1": { + "activation": { + "algorithm": ["minmax"], + "granularity": ["per_channel", "per_tensor"], } } } # fallback op_name4 self.op_wise_user_config = { - 'op_name4': { - 'activation': { - 'dtype': ['fp32'], + "op_name4": { + "activation": { + "dtype": ["fp32"], } } } self.op_wise_user_config2 = { - 'op_name4': { - 'activation': { - 'granularity': ['per_tensor'], + "op_name4": { + "activation": { + "granularity": ["per_tensor"], } } } - self.capability2 = { - 'calib': {'calib_sampling_size': [1, 10]}, - 'op': deepcopy(op_cap2) - } + self.capability2 = {"calib": {"calib_sampling_size": [1, 10]}, "op": deepcopy(op_cap2)} def test_tuning_space_merge_op_wise_not_exist(self): # op-wise conf = { - 'op_type_dict': deepcopy(self.op_wise_user_config2), + "op_type_dict": deepcopy(self.op_wise_user_config2), } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability2), deepcopy(conf)) logger.debug(tuning_space2.root_item.get_details()) - def test_tuning_space_creation(self): conf = None # Test the creation of tuning space tuning_space = TuningSpace(self.capability, conf) logger.debug(tuning_space.root_item.get_details()) # ops supported static - static_items = tuning_space.query_items_by_quant_mode('static') + static_items = tuning_space.query_items_by_quant_mode("static") static_items_name = [item.name for item in static_items] self.assertEqual(set(static_items_name), set(op_cap.keys())) # ops supported dynamic - dynamic_items = tuning_space.query_items_by_quant_mode('dynamic') + dynamic_items = tuning_space.query_items_by_quant_mode("dynamic") dynamic_items_name = [item.name for item in dynamic_items] all_items_name = list(op_cap.keys()) - all_items_name.remove(('op_name3', 'op_type2')) + all_items_name.remove(("op_name3", "op_type2")) self.assertEqual(set(dynamic_items_name), set(all_items_name)) # ops supported fp32 - fp32_items = tuning_space.query_items_by_quant_mode('fp32') + fp32_items = tuning_space.query_items_by_quant_mode("fp32") fp32_items_name = [item.name for item in fp32_items] self.assertEqual(set(fp32_items_name), set(op_cap.keys())) # all optype - self.assertEqual(list(tuning_space.op_type_wise_items.keys()), ['op_type1', 'op_type2', 'op_type3']) + self.assertEqual(list(tuning_space.op_type_wise_items.keys()), ["op_type1", "op_type2", "op_type3"]) def test_tuning_space_merge_optype_wise(self): # optype-wise conf = { - 'op_type_dict': self.optype_wise_user_config, + "op_type_dict": self.optype_wise_user_config, } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) logger.debug(tuning_space2.root_item.get_details()) found_act_algo_kl_optype1 = False found_act_algo_kl_others = False - for quant_mode in ['static', 'dynamic']: + for quant_mode in ["static", "dynamic"]: for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): for path in tuning_space2.ops_path_set[op_item.name]: mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) - act_algo_item = mode_item.get_option_by_name(('activation', 'algorithm')) - if act_algo_item and op_item.name[1] == 'op_type1' and 'kl' in act_algo_item.options: + act_algo_item = mode_item.get_option_by_name(("activation", "algorithm")) + if act_algo_item and op_item.name[1] == "op_type1" and "kl" in act_algo_item.options: found_act_algo_kl_optype1 = True break - if act_algo_item and op_item.name[1] != 'op_type1' and 'kl' in act_algo_item.options: + if act_algo_item and op_item.name[1] != "op_type1" and "kl" in act_algo_item.options: found_act_algo_kl_others = True self.assertFalse(found_act_algo_kl_optype1) self.assertTrue(found_act_algo_kl_others) @@ -274,22 +205,21 @@ def test_tuning_space_merge_optype_wise(self): def test_tuning_space_merge_op_wise(self): # op-wise conf = { - 'op_name_dict': self.op_wise_user_config, - + "op_name_dict": self.op_wise_user_config, } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) logger.debug(tuning_space2.root_item.get_details()) found_quant_op_name4 = False found_fp32_op_name4 = False - for quant_mode in ['static', 'dynamic']: + for quant_mode in ["static", "dynamic"]: for item in tuning_space2.query_items_by_quant_mode(quant_mode): - if 'op_name4' in item.name: + if "op_name4" in item.name: found_quant_op_name4 = True break - for item in tuning_space2.query_items_by_quant_mode('fp32'): - if 'op_name4' in item.name: + for item in tuning_space2.query_items_by_quant_mode("fp32"): + if "op_name4" in item.name: found_fp32_op_name4 = True break self.assertFalse(found_quant_op_name4) diff --git a/test/strategy/test_tuning_space_1.x.py b/test/strategy/test_tuning_space_1.x.py index 28086048758..ab7fb6ee3c6 100644 --- a/test/strategy/test_tuning_space_1.x.py +++ b/test/strategy/test_tuning_space_1.x.py @@ -1,232 +1,164 @@ -from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace +import unittest +from copy import deepcopy + from neural_compressor.conf.dotdict import DotDict +from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace from neural_compressor.utils import logger -from copy import deepcopy -import unittest op_cap = { # op have both weight and activation and support static/dynamic/fp32 - ('op_name1', 'op_type1'): [ + ("op_name1", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op have both weight and activation and support static/dynamic/fp32 - ('op_name2', 'op_type1'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, + ("op_name2", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op have both weight and activation and support static/fp32 - ('op_name3', 'op_type2'): [ + ("op_name3", "op_type2"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel'], - 'algorithm': ['minmax', 'kl'] - } - }, - { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel"], + "algorithm": ["minmax", "kl"], + }, + "weight": { + "dtype": ["int8"], + "scheme": ["sym"], + "granularity": ["per_channel"], + "algorithm": ["minmax", "kl"], + }, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op only have activation and support dynamic/fp32 - ('op_name4', 'op_type3'): [ + ("op_name4", "op_type3"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, }, { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax'] - }, + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax"], + }, }, { - 'activation': - { - 'dtype': 'fp32' - }, + "activation": {"dtype": "fp32"}, }, - ] + ], } op_cap2 = { # The granularity of op activation do not support per_tensor. - ('op_name4', 'op_type1'): [ + ("op_name4", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - },] + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, + }, + ] } class TestTuningSampler(unittest.TestCase): def setUp(self) -> None: - self.capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': deepcopy(op_cap) - } + self.capability = {"calib": {"calib_sampling_size": [1, 10, 50]}, "op": deepcopy(op_cap)} # for optype1,'algorithm': ['minmax', 'kl'] -> ['minmax'] self.optype_wise_user_config = { - 'op_type1': { - 'activation': { - 'algorithm': ['minmax'], - 'granularity': ['per_channel', 'per_tensor'], + "op_type1": { + "activation": { + "algorithm": ["minmax"], + "granularity": ["per_channel", "per_tensor"], } } } self.model_wise_user_config = { - 'activation': { - 'granularity': ['per_channel'], + "activation": { + "granularity": ["per_channel"], } } # fallback op_name4 self.op_wise_user_config = { - 'op_name4': { - 'activation': { - 'dtype': ['fp32'], + "op_name4": { + "activation": { + "dtype": ["fp32"], } } } self.op_wise_user_config2 = { - 'op_name4': { - 'activation': { - 'granularity': ['per_tensor'], + "op_name4": { + "activation": { + "granularity": ["per_tensor"], } } } - - self.capability2 = { - 'calib': {'calib_sampling_size': [1, 10]}, - 'op': deepcopy(op_cap2) - } - + + self.capability2 = {"calib": {"calib_sampling_size": [1, 10]}, "op": deepcopy(op_cap2)} + def test_tuning_space_merge_op_wise_not_exist(self): # op-wise conf = { - 'usr_cfg': { - 'quantization': { - 'op_wise': deepcopy(self.op_wise_user_config2), + "usr_cfg": { + "quantization": { + "op_wise": deepcopy(self.op_wise_user_config2), } } } @@ -234,40 +166,36 @@ def test_tuning_space_merge_op_wise_not_exist(self): tuning_space2 = TuningSpace(deepcopy(self.capability2), deepcopy(conf)) logger.debug(tuning_space2.root_item.get_details()) - def test_tuning_space_creation(self): conf = None - # Test the creation of tuning space + # Test the creation of tuning space tuning_space = TuningSpace(self.capability, conf) logger.debug(tuning_space.root_item.get_details()) - # ops supported static - static_items = tuning_space.query_items_by_quant_mode('static') + # ops supported static + static_items = tuning_space.query_items_by_quant_mode("static") static_items_name = [item.name for item in static_items] self.assertEqual(set(static_items_name), set(op_cap.keys())) - # ops supported dynamic - dynamic_items = tuning_space.query_items_by_quant_mode('dynamic') + # ops supported dynamic + dynamic_items = tuning_space.query_items_by_quant_mode("dynamic") dynamic_items_name = [item.name for item in dynamic_items] all_items_name = list(op_cap.keys()) - all_items_name.remove(('op_name3', 'op_type2')) + all_items_name.remove(("op_name3", "op_type2")) self.assertEqual(set(dynamic_items_name), set(all_items_name)) - # ops supported fp32 - fp32_items = tuning_space.query_items_by_quant_mode('fp32') + # ops supported fp32 + fp32_items = tuning_space.query_items_by_quant_mode("fp32") fp32_items_name = [item.name for item in fp32_items] self.assertEqual(set(fp32_items_name), set(op_cap.keys())) # all optype - self.assertEqual(list(tuning_space.op_type_wise_items.keys()), ['op_type1', 'op_type2', 'op_type3']) + self.assertEqual(list(tuning_space.op_type_wise_items.keys()), ["op_type1", "op_type2", "op_type3"]) def test_tuning_space_merge_model_wise(self): - # Test merge with user config, model-wise, optype-wise, op-wise + # Test merge with user config, model-wise, optype-wise, op-wise # model-wise - self.capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': op_cap - } + self.capability = {"calib": {"calib_sampling_size": [1, 10, 50]}, "op": op_cap} conf = { - 'usr_cfg': { - 'quantization': { - 'model_wise': self.model_wise_user_config, + "usr_cfg": { + "quantization": { + "model_wise": self.model_wise_user_config, } } } @@ -275,12 +203,12 @@ def test_tuning_space_merge_model_wise(self): tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) logger.debug(tuning_space2.root_item.get_details()) found_per_tensor = False - for quant_mode in ['static', 'dynamic']: + for quant_mode in ["static", "dynamic"]: for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): for path in tuning_space2.ops_path_set[op_item.name]: mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) - act_algo_item = mode_item.get_option_by_name(('activation', 'granularity')) - if act_algo_item and 'per_tensor' in act_algo_item.options: + act_algo_item = mode_item.get_option_by_name(("activation", "granularity")) + if act_algo_item and "per_tensor" in act_algo_item.options: found_per_tensor = True break self.assertFalse(found_per_tensor) @@ -288,9 +216,9 @@ def test_tuning_space_merge_model_wise(self): def test_tuning_space_merge_optype_wise(self): # optype-wise conf = { - 'usr_cfg': { - 'quantization': { - 'optype_wise': self.optype_wise_user_config, + "usr_cfg": { + "quantization": { + "optype_wise": self.optype_wise_user_config, } } } @@ -299,15 +227,15 @@ def test_tuning_space_merge_optype_wise(self): logger.debug(tuning_space2.root_item.get_details()) found_act_algo_kl_optype1 = False found_act_algo_kl_others = False - for quant_mode in ['static', 'dynamic']: + for quant_mode in ["static", "dynamic"]: for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): for path in tuning_space2.ops_path_set[op_item.name]: mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) - act_algo_item = mode_item.get_option_by_name(('activation', 'algorithm')) - if act_algo_item and op_item.name[1] == 'op_type1' and 'kl' in act_algo_item.options: + act_algo_item = mode_item.get_option_by_name(("activation", "algorithm")) + if act_algo_item and op_item.name[1] == "op_type1" and "kl" in act_algo_item.options: found_act_algo_kl_optype1 = True break - if act_algo_item and op_item.name[1] != 'op_type1' and 'kl' in act_algo_item.options: + if act_algo_item and op_item.name[1] != "op_type1" and "kl" in act_algo_item.options: found_act_algo_kl_others = True self.assertFalse(found_act_algo_kl_optype1) self.assertTrue(found_act_algo_kl_others) @@ -315,26 +243,25 @@ def test_tuning_space_merge_optype_wise(self): def test_tuning_space_merge_op_wise(self): # op-wise conf = { - 'usr_cfg': { - 'quantization': { - 'op_wise': self.op_wise_user_config, + "usr_cfg": { + "quantization": { + "op_wise": self.op_wise_user_config, } } - } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) logger.debug(tuning_space2.root_item.get_details()) found_quant_op_name4 = False found_fp32_op_name4 = False - for quant_mode in ['static', 'dynamic']: + for quant_mode in ["static", "dynamic"]: for item in tuning_space2.query_items_by_quant_mode(quant_mode): - if 'op_name4' in item.name: + if "op_name4" in item.name: found_quant_op_name4 = True break - for item in tuning_space2.query_items_by_quant_mode('fp32'): - if 'op_name4' in item.name: + for item in tuning_space2.query_items_by_quant_mode("fp32"): + if "op_name4" in item.name: found_fp32_op_name4 = True break self.assertFalse(found_quant_op_name4) @@ -343,25 +270,24 @@ def test_tuning_space_merge_op_wise(self): def test_tuning_space_merge_model_wise_and_opty_wise(self): # Test mode-wise + optype-wise conf = { - 'usr_cfg': { - 'quantization': { - 'model_wise': self.model_wise_user_config, - 'optype_wise': self.optype_wise_user_config, + "usr_cfg": { + "quantization": { + "model_wise": self.model_wise_user_config, + "optype_wise": self.optype_wise_user_config, } } - } # the optype_wise config will overwrite the model-wise config conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) logger.debug(tuning_space2.root_item.get_details()) found_per_tensor = False - for quant_mode in ['static', 'dynamic']: + for quant_mode in ["static", "dynamic"]: for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): for path in tuning_space2.ops_path_set[op_item.name]: mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) - act_algo_item = mode_item.get_option_by_name(('activation', 'granularity')) - if act_algo_item and 'per_tensor' in act_algo_item.options: + act_algo_item = mode_item.get_option_by_name(("activation", "granularity")) + if act_algo_item and "per_tensor" in act_algo_item.options: found_per_tensor = True break self.assertTrue(found_per_tensor) diff --git a/test/strategy/test_tuning_space_v2.py b/test/strategy/test_tuning_space_v2.py index 6cd4b86dc10..cafcd6221c0 100644 --- a/test/strategy/test_tuning_space_v2.py +++ b/test/strategy/test_tuning_space_v2.py @@ -1,288 +1,151 @@ -from neural_compressor.strategy.utils.tuning_space import TuningItem, TuningSpace +import unittest +from copy import deepcopy + from neural_compressor.conf.dotdict import DotDict +from neural_compressor.strategy.utils.tuning_space import TuningItem, TuningSpace from neural_compressor.utils import logger -from copy import deepcopy -import unittest op_cap = { # op1 have both weight and activation and support static/dynamic/fp32/b16 - ('op_name1', 'op_type1'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': ['int4'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['uint4'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + ("op_name1", "op_type1"): [ + { + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'bf16' - }, - 'weight': - { - 'dtype': 'bf16' - } + "activation": { + "dtype": ["int4"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["uint4"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "bf16"}, "weight": {"dtype": "bf16"}}, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op2 have both weight and activation and support static/dynamic/fp32 - ('op_name2', 'op_type1'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + ("op_name2", "op_type1"): [ + { + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op3 have both weight and activation and support int4 - ('op_name3', 'op_type3'): [ - { - 'activation': - { - 'dtype': ['int4'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int4'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + ("op_name3", "op_type3"): [ + { + "activation": { + "dtype": ["int4"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int4"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op4 have tuple name as IPEX - (('op_name4', 0), 'op_type4'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + (("op_name4", 0), "op_type4"): [ + { + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], - # op5, weight only - ('op_name5', 'op_type5'): [ + ("op_name5", "op_type5"): [ { - 'activation': - { - 'dtype': ['fp32'], - 'quant_mode': 'static', - }, - 'weight': - { - 'dtype': ['int4'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["fp32"], + "quant_mode": "static", + }, + "weight": {"dtype": ["int4"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], } + class TestTuningSpaceV2(unittest.TestCase): def setUp(self) -> None: - self.capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': deepcopy(op_cap) - } + self.capability = {"calib": {"calib_sampling_size": [1, 10, 50]}, "op": deepcopy(op_cap)} self.op_wise_user_cfg_for_fallback = { - 'op_name1': { - 'activation': { - 'dtype': ['fp32'] - }, - 'weight': { - 'dtype': ['fp32'] - } - }, - ('op_name4', 0): { - 'activation': { - 'dtype': ['fp32'] - }, - 'weight': { - 'dtype': ['fp32'] - } - }, + "op_name1": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, + ("op_name4", 0): {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, } - def test_tuning_sampler_int4(self): # op-wise conf = {} @@ -292,24 +155,28 @@ def test_tuning_sampler_int4(self): logger.debug(tuning_space.root_item.get_details()) found_int4_activation = False found_int4_weight = False - op3_act_item = tuning_space.query_quant_mode_item_by_full_path(('op_name3', 'op_type3'),\ - ('static', 'activation')) + op3_act_item = tuning_space.query_quant_mode_item_by_full_path( + ("op_name3", "op_type3"), ("static", "activation") + ) for dtype_item in op3_act_item.options: - if dtype_item.name == 'int4': + if dtype_item.name == "int4": found_int4_activation = True self.assertTrue(found_int4_activation) - op3_weight_item = tuning_space.query_quant_mode_item_by_full_path(('op_name3', 'op_type3'), \ - ('static', 'weight')) + op3_weight_item = tuning_space.query_quant_mode_item_by_full_path( + ("op_name3", "op_type3"), ("static", "weight") + ) for dtype_item in op3_weight_item.options: - if dtype_item.name == 'int4': + if dtype_item.name == "int4": found_int4_weight = True self.assertTrue(found_int4_weight) def test_sampler_int4(self): # test sampler from collections import OrderedDict + + from neural_compressor.strategy.utils.tuning_sampler import LowerBitsSampler, OpWiseTuningSampler from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig - from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler, LowerBitsSampler + # op-wise conf = {} conf = DotDict(conf) @@ -318,11 +185,12 @@ def test_sampler_int4(self): logger.debug(tuning_space.root_item.get_details()) initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) quant_mode_wise_items = OrderedDict() from neural_compressor.strategy.utils.constant import auto_query_order as query_order + pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -338,33 +206,34 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - op_wise_tuning_sampler = OpWiseTuningSampler(deepcopy(tuning_space), [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - op3 = ('op_name3', 'op_type3') + op_wise_tuning_sampler = OpWiseTuningSampler( + deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) + op3 = ("op_name3", "op_type3") for tune_cfg in op_wise_tuning_sampler: op_cfg = tune_cfg[op3].get_state() - act_dtype = op_cfg['activation']['dtype'] - weight_dtype = op_cfg['weight']['dtype'] - self.assertTrue(act_dtype == weight_dtype == 'int4') + act_dtype = op_cfg["activation"]["dtype"] + weight_dtype = op_cfg["weight"]["dtype"] + self.assertTrue(act_dtype == weight_dtype == "int4") - - int4_ops = tuning_space.collect_op_by_quant_bits('int4') + int4_ops = tuning_space.collect_op_by_quant_bits("int4") for op in int4_ops: - op_item_dtype_dict[op.name] = 'int4' - lower_bits_sampler = LowerBitsSampler(deepcopy(tuning_space), [], initial_op_tuning_cfg, op_item_dtype_dict, - accumulate=False, skip_first=True) - op3 = ('op_name5', 'op_type5') + op_item_dtype_dict[op.name] = "int4" + lower_bits_sampler = LowerBitsSampler( + deepcopy(tuning_space), [], initial_op_tuning_cfg, op_item_dtype_dict, accumulate=False, skip_first=True + ) + op3 = ("op_name5", "op_type5") for tune_cfg in lower_bits_sampler: op_cfg = tune_cfg[op3].get_state() - act_dtype = op_cfg['activation']['dtype'] - weight_dtype = op_cfg['weight']['dtype'] + act_dtype = op_cfg["activation"]["dtype"] + weight_dtype = op_cfg["weight"]["dtype"] logger.debug(op_cfg) - self.assertTrue((weight_dtype == 'int4' and act_dtype == 'fp32') or (act_dtype == weight_dtype == 'fp32')) + self.assertTrue((weight_dtype == "int4" and act_dtype == "fp32") or (act_dtype == weight_dtype == "fp32")) def test_tuning_space_merge_op_wise(self): # op-wise conf = { - 'op_name_dict': self.op_wise_user_cfg_for_fallback, + "op_name_dict": self.op_wise_user_cfg_for_fallback, } conf = DotDict(conf) # test fallback @@ -372,16 +241,15 @@ def test_tuning_space_merge_op_wise(self): logger.debug(tuning_space2.root_item.get_details()) op_name1_only_fp32 = True op_name4_only_fp32 = True - for quant_mode in ['static', 'dynamic']: + for quant_mode in ["static", "dynamic"]: for item in tuning_space2.query_items_by_quant_mode(quant_mode): - if item.name[0] == 'op_name1': + if item.name[0] == "op_name1": op_name1_only_fp32 = False - if item.name[0] == ('op_name4', 0): + if item.name[0] == ("op_name4", 0): op_name4_only_fp32 = False self.assertTrue(op_name1_only_fp32) self.assertTrue(op_name4_only_fp32) - if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_tuning_space_v2_1.x.py b/test/strategy/test_tuning_space_v2_1.x.py index 6bef6c4689a..feab5fed6c1 100644 --- a/test/strategy/test_tuning_space_v2_1.x.py +++ b/test/strategy/test_tuning_space_v2_1.x.py @@ -1,232 +1,150 @@ -from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace +import unittest +from copy import deepcopy + from neural_compressor.conf.dotdict import DotDict +from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace from neural_compressor.utils import logger -from copy import deepcopy -import unittest op_cap = { # op1 have both weight and activation and support static/dynamic/fp32/b16 - ('op_name1', 'op_type1'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, - { - 'activation': - { - 'dtype': ['int4'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['uint4'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, + ("op_name1", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'bf16' - }, - 'weight': - { - 'dtype': 'bf16' - } + "activation": { + "dtype": ["int4"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["uint4"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "bf16"}, "weight": {"dtype": "bf16"}}, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op2 have both weight and activation and support static/dynamic/fp32 - ('op_name2', 'op_type1'): [ - { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, + ("op_name2", "op_type1"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'dynamic', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "dynamic", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], # op3 have both weight and activation and support int4 - ('op_name3', 'op_type3'): [ - { - 'activation': - { - 'dtype': ['int4'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int4'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } - }, + ("op_name3", "op_type3"): [ { - 'activation': - { - 'dtype': ['int8'], - 'quant_mode': 'static', - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'], - 'algorithm': ['minmax', 'kl'] - }, - 'weight': - { - 'dtype': ['int8'], - 'scheme': ['sym'], - 'granularity': ['per_channel', 'per_tensor'] - } + "activation": { + "dtype": ["int4"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int4"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, { - 'activation': - { - 'dtype': 'fp32' - }, - 'weight': - { - 'dtype': 'fp32' - } + "activation": { + "dtype": ["int8"], + "quant_mode": "static", + "scheme": ["sym"], + "granularity": ["per_channel", "per_tensor"], + "algorithm": ["minmax", "kl"], + }, + "weight": {"dtype": ["int8"], "scheme": ["sym"], "granularity": ["per_channel", "per_tensor"]}, }, + {"activation": {"dtype": "fp32"}, "weight": {"dtype": "fp32"}}, ], } + class TestTuningSpaceV2(unittest.TestCase): def setUp(self) -> None: - self.capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': deepcopy(op_cap) - } - + self.capability = {"calib": {"calib_sampling_size": [1, 10, 50]}, "op": deepcopy(op_cap)} + self.op_wise_user_cfg_for_fallback = { - 'op_name1': { - 'activation': { - 'dtype': ['fp32'] - }, - 'weight': { - 'dtype': ['fp32'] - } - }, + "op_name1": {"activation": {"dtype": ["fp32"]}, "weight": {"dtype": ["fp32"]}}, } - - + def test_tuning_sampler_int4(self): # op-wise - conf = {'usr_cfg': { } } + conf = {"usr_cfg": {}} conf = DotDict(conf) # test space construction tuning_space = TuningSpace(deepcopy(self.capability), deepcopy(conf)) logger.debug(tuning_space.root_item.get_details()) found_int4_activation = False found_int4_weight = False - op3_act_item = tuning_space.query_quant_mode_item_by_full_path(('op_name3', 'op_type3'),\ - ('static', 'activation')) + op3_act_item = tuning_space.query_quant_mode_item_by_full_path( + ("op_name3", "op_type3"), ("static", "activation") + ) for dtype_item in op3_act_item.options: - if dtype_item.name == 'int4': + if dtype_item.name == "int4": found_int4_activation = True self.assertTrue(found_int4_activation) - op3_weight_item = tuning_space.query_quant_mode_item_by_full_path(('op_name3', 'op_type3'), \ - ('static', 'weight')) + op3_weight_item = tuning_space.query_quant_mode_item_by_full_path( + ("op_name3", "op_type3"), ("static", "weight") + ) for dtype_item in op3_weight_item.options: - if dtype_item.name == 'int4': + if dtype_item.name == "int4": found_int4_weight = True self.assertTrue(found_int4_weight) - + def test_sampler_int4(self): # test sampler from collections import OrderedDict - from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig + from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler + from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig + # op-wise - conf = {'usr_cfg': { } } + conf = {"usr_cfg": {}} conf = DotDict(conf) # test space construction tuning_space = TuningSpace(deepcopy(self.capability), deepcopy(conf)) logger.debug(tuning_space.root_item.get_details()) initial_op_tuning_cfg = {} for item in tuning_space.root_item.options: - if item.item_type == 'op': + if item.item_type == "op": op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, "fp32", tuning_space) quant_mode_wise_items = OrderedDict() from neural_compressor.strategy.utils.constant import auto_query_order as query_order + pre_items = set() for quant_mode in query_order: items = tuning_space.query_items_by_quant_mode(quant_mode) @@ -241,39 +159,37 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_item_dtype_dict = OrderedDict() for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - - op_wise_tuning_sampler = OpWiseTuningSampler(deepcopy(tuning_space), [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - op3 = ('op_name3', 'op_type3') + + op_wise_tuning_sampler = OpWiseTuningSampler( + deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg + ) + op3 = ("op_name3", "op_type3") for tune_cfg in op_wise_tuning_sampler: op_cfg = tune_cfg[op3].get_state() - act_dtype = op_cfg['activation']['dtype'] - weight_dtype = op_cfg['weight']['dtype'] - self.assertTrue(act_dtype == weight_dtype == 'int4') - + act_dtype = op_cfg["activation"]["dtype"] + weight_dtype = op_cfg["weight"]["dtype"] + self.assertTrue(act_dtype == weight_dtype == "int4") def test_tuning_space_merge_op_wise(self): # op-wise conf = { - 'usr_cfg': { - 'quantization': { - 'op_wise': self.op_wise_user_cfg_for_fallback, + "usr_cfg": { + "quantization": { + "op_wise": self.op_wise_user_cfg_for_fallback, } } - } conf = DotDict(conf) # test fallback tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) logger.debug(tuning_space2.root_item.get_details()) op_name1_only_fp32 = True - for quant_mode in ['static', 'dynamic']: + for quant_mode in ["static", "dynamic"]: for item in tuning_space2.query_items_by_quant_mode(quant_mode): - if item.name[0] == 'op_name1': + if item.name[0] == "op_name1": op_name1_only_fp32 = False self.assertTrue(op_name1_only_fp32) - if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_utility.py b/test/strategy/test_utility.py index d6173c32105..bb356f05713 100644 --- a/test/strategy/test_utility.py +++ b/test/strategy/test_utility.py @@ -1,7 +1,9 @@ """Tests for strategy utility.""" -from neural_compressor.strategy.utils.utility import build_slave_faker_model import unittest +from neural_compressor.strategy.utils.utility import build_slave_faker_model + + class TestUtils(unittest.TestCase): def test_build_slave_faker_model(self): faker_model = build_slave_faker_model() @@ -9,5 +11,6 @@ def test_build_slave_faker_model(self): faker_model.some_attr faker_model.some_attr.another_attr[0].some_method() + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/test/tfnewapi/test_smooth_quant_newapi.py b/test/tfnewapi/test_smooth_quant_newapi.py index b3bf60d9d74..ed549e79cab 100644 --- a/test/tfnewapi/test_smooth_quant_newapi.py +++ b/test/tfnewapi/test_smooth_quant_newapi.py @@ -1,12 +1,14 @@ import unittest -import tensorflow as tf + import numpy as np +import tensorflow as tf +from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.util import disable_random +from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.data.dataloaders.dataloader import DataLoader from neural_compressor.quantization import fit -from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.utils.utility import set_random_seed -from tensorflow.compat.v1 import graph_util class TestSmoothQuantTFNewApi(unittest.TestCase): @@ -24,48 +26,53 @@ def test_newapi_conv_sq(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) - add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.Add(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) set_random_seed(9527) config = PostTrainingQuantConfig( quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.5}}, - calibration_sampling_size=[500]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}, + calibration_sampling_size=[500], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(100, 56, 56, 16), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset, batch_size=1) + + dataset = Datasets("tensorflow")["dummy"](shape=(100, 56, 56, 16), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset, batch_size=1) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 2) @@ -75,12 +82,13 @@ def test_newapi_sq_matmul(self): x_data = np.random.rand(1024, 1024).astype(np.float32) y_data = np.random.rand(1024, 1024).astype(np.float32) import tensorflow.compat.v1 as tf - x = tf.placeholder(tf.float32, shape=[1024, 1024], name='x') + + x = tf.placeholder(tf.float32, shape=[1024, 1024], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[1024, 1024]) z = tf.matmul(x, y) bias = np.random.rand(1024).astype(np.float32) z = tf.nn.bias_add(z, bias) - z = tf.nn.relu(z, name='op_to_store') + z = tf.nn.relu(z, name="op_to_store") with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) @@ -89,24 +97,28 @@ def test_newapi_sq_matmul(self): set_random_seed(9527) config = PostTrainingQuantConfig( quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.5}}, - calibration_sampling_size=[1024]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}}, + calibration_sampling_size=[1024], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(1024, 1024), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset, batch_size=1024) + + dataset = Datasets("tensorflow")["dummy"](shape=(1024, 1024), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset, batch_size=1024) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 1) @@ -117,51 +129,58 @@ def test_newapi_sq_conv_matmul(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv1_weights = tf.compat.v1.get_variable("weight_conv1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weight_conv1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x_pad, conv1_weights, strides=[1, 2, 2, 1], padding="VALID") - matmul_weights = tf.compat.v1.get_variable("weight_matmul", [28*28*16, 7*7*32], - initializer=tf.compat.v1.random_normal_initializer()) - conv1_reshaped = tf.reshape(conv1, shape=[-1, 28*28*16]) + matmul_weights = tf.compat.v1.get_variable( + "weight_matmul", [28 * 28 * 16, 7 * 7 * 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv1_reshaped = tf.reshape(conv1, shape=[-1, 28 * 28 * 16]) matmul = tf.matmul(conv1_reshaped, matmul_weights) reshape = tf.reshape(matmul, (1, 7, 7, 32)) - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [7, 7, 32, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [7, 7, 32, 1], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(reshape, conv2_weights, strides=[1, 2, 2, 1], padding="VALID") - leaky_relu = tf.nn.leaky_relu(conv2, name='op_to_store') + leaky_relu = tf.nn.leaky_relu(conv2, name="op_to_store") - out_name = leaky_relu.name.split(':')[0] + out_name = leaky_relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) set_random_seed(9527) config = PostTrainingQuantConfig( quant_level=1, - recipes={"smooth_quant": True, "smooth_quant_args": {'alpha': 0.6}}, - calibration_sampling_size=[500]) + recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.6}}, + calibration_sampling_size=[500], + ) from neural_compressor.data import Datasets - dataset = Datasets('tensorflow')['dummy'](shape=(100, 56, 56, 16), label=True) - dataloader = DataLoader(framework='tensorflow', dataset=dataset) + + dataset = Datasets("tensorflow")["dummy"](shape=(100, 56, 56, 16), label=True) + dataloader = DataLoader(framework="tensorflow", dataset=dataset) from neural_compressor import Metric + top1 = Metric(name="topk", k=1) output_graph = fit( model=output_graph_def, conf=config, calib_dataloader=dataloader, eval_dataloader=dataloader, - eval_metric=top1) + eval_metric=top1, + ) mul_count = 0 for i in output_graph.graph_def.node: - if i.op == 'Mul': + if i.op == "Mul": mul_count += 1 self.assertEqual(mul_count, 3) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_bias_correction.py b/test/tfnewapi/test_tensorflow_bias_correction.py index 53850f415fb..45e5523705c 100644 --- a/test/tfnewapi/test_tensorflow_bias_correction.py +++ b/test/tfnewapi/test_tensorflow_bias_correction.py @@ -1,17 +1,19 @@ import os import unittest + +import tensorflow as tf import yaml -from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper +from tensorflow.compat.v1 import graph_util + +from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel +from neural_compressor.adaptor.tf_utils.quantize_graph_common import QuantizeGraphHelper from neural_compressor.adaptor.tf_utils.transform_graph.bias_correction import BiasCorrection -from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.util import disable_random -import tensorflow as tf -from tensorflow.compat.v1 import graph_util def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -37,12 +39,13 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class TestBiasCorrectionNewApi(unittest.TestCase): @classmethod def setUpClass(self): @@ -50,89 +53,168 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_bias_correction_new_api(self): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - if tf.version.VERSION <= '2.1.0': + if tf.version.VERSION <= "2.1.0": x = tf.nn.relu(x) - conv1_weights = tf.compat.v1.get_variable("weights1", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weights1", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x, conv1_weights, strides=[1, 1, 1, 1], padding="SAME") - normed = tf.nn.bias_add(conv1, tf.constant([3.0, 1.2, 1.0, 2, 3, 4, 5, 6, 7, 8, 0, 1, - 4.0, 5.2, 8.1, 2, 4, 5, 8, 9, 10, 12, 11, 2, - 5.0, 7.2, 3.2, 3, 4, 5, 7, 8])) - relu1 = tf.nn.relu(normed, name='Relu_1') - op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml")).get_eightbit_patterns() + normed = tf.nn.bias_add( + conv1, + tf.constant( + [ + 3.0, + 1.2, + 1.0, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 0, + 1, + 4.0, + 5.2, + 8.1, + 2, + 4, + 5, + 8, + 9, + 10, + 12, + 11, + 2, + 5.0, + 7.2, + 3.2, + 3, + 4, + 5, + 7, + 8, + ] + ), + ) + relu1 = tf.nn.relu(normed, name="Relu_1") + op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml") + ).get_eightbit_patterns() with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu1.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu1.name.split(":")[0]] + ) output_graph_def = QuantizeGraphHelper.remove_training_nodes( - output_graph_def, protected_nodes=[relu1.name.split(':')[0]]) - inputs = [x.name.split(':')[0]] - outputs = [relu1.name.split(':')[0]] + output_graph_def, protected_nodes=[relu1.name.split(":")[0]] + ) + inputs = [x.name.split(":")[0]] + outputs = [relu1.name.split(":")[0]] op_wise_config = { - "Conv2D": (False, 'minmax', False, 7.0), + "Conv2D": (False, "minmax", False, 7.0), } from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 224, 224, 3), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def int8_output_graph = quantizer.fit() correct_graph_def = BiasCorrection( - int8_output_graph.graph_def, output_graph_def, 'weight_empirical', True).do_transformation() + int8_output_graph.graph_def, output_graph_def, "weight_empirical", True + ).do_transformation() self.assertEqual(len(correct_graph_def.node), len(int8_output_graph.graph_def.node)) + class TestBiasCorrectionOldApi(unittest.TestCase): @disable_random() def test_bias_correction_old_api(self): tf.compat.v1.disable_eager_execution() x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - if tf.version.VERSION <= '2.1.0': + if tf.version.VERSION <= "2.1.0": x = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weights", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weights", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") - normed = tf.nn.bias_add(conv, tf.constant([3.0, 1.2, 1.0, 2, 3, 4, 5, 6, 7, 8, 0, 1, - 4.0, 5.2, 8.1, 2, 4, 5, 8, 9, 10, 12, 11, 2, - 5.0, 7.2, 3.2, 3, 4, 5, 7, 8])) - relu = tf.nn.relu(normed, name='Relu_0') - op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml")).get_eightbit_patterns() + normed = tf.nn.bias_add( + conv, + tf.constant( + [ + 3.0, + 1.2, + 1.0, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 0, + 1, + 4.0, + 5.2, + 8.1, + 2, + 4, + 5, + 8, + 9, + 10, + 12, + 11, + 2, + 5.0, + 7.2, + 3.2, + 3, + 4, + 5, + 7, + 8, + ] + ), + ) + relu = tf.nn.relu(normed, name="Relu_0") + op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml") + ).get_eightbit_patterns() with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = QuantizeGraphHelper.remove_training_nodes( - output_graph_def, protected_nodes=[relu.name.split(':')[0]]) - inputs = [x.name.split(':')[0]] - outputs = [relu.name.split(':')[0]] + output_graph_def, protected_nodes=[relu.name.split(":")[0]] + ) + inputs = [x.name.split(":")[0]] + outputs = [relu.name.split(":")[0]] op_wise_config = { - "Conv2D": (False, 'minmax', False, 7.0), + "Conv2D": (False, "minmax", False, 7.0), } - int8_graph_def, _, _ = QuantizeGraphForIntel(output_graph_def, inputs, outputs, - op_wise_config, op_wise_sequences, - 'cpu').do_transform() + int8_graph_def, _, _ = QuantizeGraphForIntel( + output_graph_def, inputs, outputs, op_wise_config, op_wise_sequences, "cpu" + ).do_transform() - correct_graph_def = BiasCorrection( - int8_graph_def, output_graph_def).do_transformation() + correct_graph_def = BiasCorrection(int8_graph_def, output_graph_def).do_transformation() self.assertEqual(len(correct_graph_def.node), len(int8_graph_def.node)) - + + if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_fuse_reshape_transpose.py b/test/tfnewapi/test_tensorflow_fuse_reshape_transpose.py index 21ac1564b38..b80dc785d97 100644 --- a/test/tfnewapi/test_tensorflow_fuse_reshape_transpose.py +++ b/test/tfnewapi/test_tensorflow_fuse_reshape_transpose.py @@ -1,18 +1,18 @@ - import imp -import unittest import os -from numpy.core.fromnumeric import squeeze -import yaml -import numpy as np -from neural_compressor.adaptor.tf_utils.util import disable_random +import unittest +import numpy as np import tensorflow.compat.v1 as tf +import yaml +from numpy.core.fromnumeric import squeeze from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -39,9 +39,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -53,21 +53,21 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_fuse_enter_reshape_transpose(self): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - enter = tf.raw_ops.Enter(data=y, frame_name='test') - enter_perm = tf.raw_ops.Enter(data=[1, 0], frame_name='test', is_constant=True) + enter = tf.raw_ops.Enter(data=y, frame_name="test") + enter_perm = tf.raw_ops.Enter(data=[1, 0], frame_name="test", is_constant=True) transpose = tf.transpose(enter, perm=enter_perm) - enter_reshape = tf.raw_ops.Enter(data=[2, 2], frame_name='test', is_constant=True) + enter_reshape = tf.raw_ops.Enter(data=[2, 2], frame_name="test", is_constant=True) reshape = tf.reshape(transpose, enter_reshape) - x_enter = tf.raw_ops.Enter(data=x, frame_name='test') - z = tf.raw_ops.MatMul(a=x_enter, b=reshape, name='matmul_1') + x_enter = tf.raw_ops.Enter(data=x, frame_name="test") + z = tf.raw_ops.MatMul(a=x_enter, b=reshape, name="matmul_1") z = tf.raw_ops.Exit(data=z) found_quantized_matmul = True found_transpose = False @@ -78,18 +78,19 @@ def test_fuse_enter_reshape_transpose(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False - if i.op == 'Transpose': + if i.op == "Transpose": found_transpose = True - if i.op == 'Reshape': + if i.op == "Reshape": found_reshape = True self.assertEqual(found_quantized_matmul, True) self.assertEqual(found_transpose, False) @@ -99,12 +100,12 @@ def test_fuse_enter_reshape_transpose(self): def test_fuse_reshape_transpose(self): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) transpose = tf.transpose(y, perm=[1, 0]) reshape = tf.reshape(transpose, [2, 2]) - z = tf.raw_ops.MatMul(a=x, b=reshape, name='matmul_2') - z = tf.nn.bias_add(z, [1, 2], name='op_to_store') + z = tf.raw_ops.MatMul(a=x, b=reshape, name="matmul_2") + z = tf.nn.bias_add(z, [1, 2], name="op_to_store") found_quantized_matmul = True found_transpose = False found_reshape = False @@ -114,23 +115,25 @@ def test_fuse_reshape_transpose(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False - if i.op == 'Transpose': + if i.op == "Transpose": found_transpose = True - if i.op == 'Reshape': + if i.op == "Reshape": found_reshape = True self.assertEqual(found_quantized_matmul, True) self.assertEqual(found_transpose, False) self.assertEqual(found_reshape, False) + if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_biasadd_add_fusion.py b/test/tfnewapi/test_tensorflow_graph_biasadd_add_fusion.py index b29322d0c6b..ef6b7745271 100644 --- a/test/tfnewapi/test_tensorflow_graph_biasadd_add_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_biasadd_add_fusion.py @@ -1,19 +1,20 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import yaml +import unittest + import numpy as np import tensorflow as tf - +import yaml from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.util import disable_random def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -39,9 +40,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -53,37 +54,40 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_conv_biasadd_add_relu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="SAME") - normed = tf.nn.bias_add(conv, tf.constant([3.0, 1.2,1,2,3,4,5,6,7,8,9,0,12,2,3,4])) + normed = tf.nn.bias_add(conv, tf.constant([3.0, 1.2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 2, 3, 4])) add = normed + tf.constant([3.0]) relu = tf.nn.relu6(add) - mul1 = tf.math.multiply(relu, tf.constant([0.1]) ) - mul2 = tf.math.multiply(mul1, tf.constant([0.8]), name='op_to_store') + mul1 = tf.math.multiply(relu, tf.constant([0.1])) + mul2 = tf.math.multiply(mul1, tf.constant([0.8]), name="op_to_store") - out_name = mul2.name.split(':')[0] + out_name = mul2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) + + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_biasadd_add import ( + FuseBiasAddAndAddOptimizer, + ) - from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_biasadd_add import FuseBiasAddAndAddOptimizer output_graph_def = FuseBiasAddAndAddOptimizer(output_graph_def).do_transformation() found_addv2 = False for i in output_graph_def.node: - if i.op.find('AddV2') != -1: + if i.op.find("AddV2") != -1: found_addv2 = True break @@ -93,36 +97,39 @@ def test_conv_biasadd_add_relu_no_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") - normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2,1,2,3,4,5,6,7,8,9,0,12,2,3,4])) - add_y = tf.compat.v1.get_variable("add_y", [16], - initializer=tf.compat.v1.random_normal_initializer()) + normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 2, 3, 4])) + add_y = tf.compat.v1.get_variable("add_y", [16], initializer=tf.compat.v1.random_normal_initializer()) add = normed2 + add_y relu = tf.nn.relu6(add) - mul1 = tf.math.multiply(relu, tf.constant([0.1]) ) - mul2 = tf.math.multiply(mul1, tf.constant([0.8]), name='op_to_store') + mul1 = tf.math.multiply(relu, tf.constant([0.1])) + mul2 = tf.math.multiply(mul1, tf.constant([0.8]), name="op_to_store") - out_name = mul2.name.split(':')[0] + out_name = mul2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) + + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_biasadd_add import ( + FuseBiasAddAndAddOptimizer, + ) - from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_biasadd_add import FuseBiasAddAndAddOptimizer output_graph_def = FuseBiasAddAndAddOptimizer(output_graph_def).do_transformation() found_addv2 = False for i in output_graph_def.node: - if i.op.find('AddV2') != -1: + if i.op.find("AddV2") != -1: found_addv2 = True break self.assertEqual(found_addv2, True) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_conv_fusion.py b/test/tfnewapi/test_tensorflow_graph_conv_fusion.py index 43e11a0c285..19e38a7a8f8 100644 --- a/test/tfnewapi/test_tensorflow_graph_conv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_conv_fusion.py @@ -1,24 +1,25 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import yaml +import unittest + import numpy as np import tensorflow as tf - -from neural_compressor.adaptor.tf_utils.quantize_graph.qdq.optimize_qdq import OptimizeQDQGraph -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer +import yaml +from pkg_resources import parse_version from tensorflow.compat.v1 import graph_util from tensorflow.python.framework import function + from neural_compressor.adaptor.tensorflow import TensorflowQuery +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer +from neural_compressor.adaptor.tf_utils.quantize_graph.qdq.optimize_qdq import OptimizeQDQGraph from neural_compressor.adaptor.tf_utils.util import disable_random -from pkg_resources import parse_version def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -44,12 +45,13 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class TestConvBiasAddAddReluFusion(unittest.TestCase): @classmethod def setUpClass(self): @@ -57,7 +59,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_conv_single_fusion(self): @@ -65,27 +67,30 @@ def test_conv_single_fusion(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv1_weights = tf.compat.v1.get_variable("weight_conv1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weight_conv1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x_pad, conv1_weights, strides=[1, 2, 2, 1], padding="VALID") - matmul_weights = tf.compat.v1.get_variable("weight_matmul", [1, 28, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + matmul_weights = tf.compat.v1.get_variable( + "weight_matmul", [1, 28, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) matmul = tf.linalg.matmul(conv1, matmul_weights) - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [7, 7, 32, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [7, 7, 32, 1], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(matmul, conv2_weights, strides=[1, 2, 2, 1], padding="VALID") - leaky_relu = tf.nn.leaky_relu(conv2, name='op_to_store') + leaky_relu = tf.nn.leaky_relu(conv2, name="op_to_store") - out_name = leaky_relu.name.split(':')[0] + out_name = leaky_relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -93,30 +98,32 @@ def test_conv_single_fusion(self): find_single_qconv = [] for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D': - find_single_qconv.append(i.attr['fused_ops'].list.s == [b'Requantize']) + if i.op == "_FusedQuantizedConv2D": + find_single_qconv.append(i.attr["fused_ops"].list.s == [b"Requantize"]) self.assertEqual(find_single_qconv, [False, False]) + @disable_random() def test_spacetobatchnd_conv2d_batchtospacend_fusion(self): i = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - x = tf.space_to_batch_nd(i, block_shape=[2,2], paddings=[[0, 0], [0, 0]]) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + x = tf.space_to_batch_nd(i, block_shape=[2, 2], paddings=[[0, 0], [0, 0]]) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - y = tf.compat.v1.batch_to_space_nd(conv, block_shape=[2,2], crops=[[0, 0], [0, 0]]) + y = tf.compat.v1.batch_to_space_nd(conv, block_shape=[2, 2], crops=[[0, 0], [0, 0]]) out = tf.identity(y, name="op_to_store") - out_name = out.name.split(':')[0] + out_name = out.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -124,7 +131,7 @@ def test_spacetobatchnd_conv2d_batchtospacend_fusion(self): found_op = False for i in output_graph.graph_def.node: - if i.op == 'SpaceToBatchND' or i.op=='BatchToSpaceND': + if i.op == "SpaceToBatchND" or i.op == "BatchToSpaceND": found_op = True break @@ -136,23 +143,24 @@ def test_conv_relu_fusion(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -160,7 +168,7 @@ def test_conv_relu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu': + if i.op == "Relu": found_conv_fusion = False break @@ -171,23 +179,24 @@ def test_conv_biasadd_relu6_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu6 = tf.nn.relu6(normed, name='op_to_store') + relu6 = tf.nn.relu6(normed, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -195,7 +204,7 @@ def test_conv_biasadd_relu6_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu6': + if i.op == "Relu6": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -205,26 +214,28 @@ def test_conv_biasadd_swishf32_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) @function.Defun(tf.float32, func_name="swish_f32") def swish_f32(x): return tf.nn.silu(x, beta=1.0) + swish = swish_f32(normed, name="swish_f32_output_node") - out_name = swish.name.split(':')[0] + out_name = swish.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -232,7 +243,7 @@ def swish_f32(x): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'swish_f32': + if i.op == "swish_f32": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -240,24 +251,26 @@ def swish_f32(x): @disable_random() def test_conv_addv2_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv1_weights = tf.compat.v1.get_variable("weight_conv1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weight_conv1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x, conv1_weights, strides=[1, 2, 2, 1], padding="SAME") - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(x, conv2_weights, strides=[1, 2, 2, 1], padding="SAME") - sumadd = tf.raw_ops.AddV2(x=conv1, y=conv2, name='addv2') + sumadd = tf.raw_ops.AddV2(x=conv1, y=conv2, name="addv2") - out_name = sumadd.name.split(':')[0] + out_name = sumadd.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -265,7 +278,7 @@ def test_conv_addv2_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op.find('QuantizedConv2D') != -1: + if i.op.find("QuantizedConv2D") != -1: found_conv_fusion = True break @@ -275,24 +288,25 @@ def test_conv_addv2_fusion(self): def test_conv_biasadd_add_relu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") - normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2,1,2,3,4,5,6,7,8,9,0,12,2,3,4])) + normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 2, 3, 4])) relu = tf.nn.relu(normed2 + tf.constant([3.0])) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -301,7 +315,7 @@ def test_conv_biasadd_add_relu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op.find('QuantizedConv2D') != -1: + if i.op.find("QuantizedConv2D") != -1: found_conv_fusion = True break @@ -313,32 +327,34 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): top_relu = tf.nn.leaky_relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.AddV2(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.AddV2(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -347,8 +363,12 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Relu', b'Requantize']: + if i.op == "_FusedQuantizedConv2D" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Sum", + b"Relu", + b"Requantize", + ]: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -359,32 +379,34 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_2(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.AddV2(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.AddV2(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -392,8 +414,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_2(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Requantize']: + if i.op == "_FusedQuantizedConv2D" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Requantize"]: found_conv_fusion = True break @@ -405,8 +426,9 @@ def test_conv_fusion_with_last_matmul(self): top_relu = tf.nn.relu(x) # paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) # x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) @@ -423,19 +445,19 @@ def test_conv_fusion_with_last_matmul(self): y_1 = tf.constant(y_data_1, dtype=tf.float32, shape=[1, 1]) z_2nd_matmul = tf.matmul(relu1, y_1) - relu6 = tf.nn.relu6(z_2nd_matmul, name='op_to_store') + relu6 = tf.nn.relu6(z_2nd_matmul, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -443,7 +465,7 @@ def test_conv_fusion_with_last_matmul(self): quantize_v2_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_v2_count += 1 break @@ -453,35 +475,38 @@ def test_conv_fusion_with_last_matmul(self): def test_conv_fusion_with_last_conv(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_weights_2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights_2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(pooling, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID") - conv_weights_3 = tf.compat.v1.get_variable("weight3", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights_3 = tf.compat.v1.get_variable( + "weight3", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) relu2 = tf.nn.relu(conv2) conv3 = tf.nn.conv2d(relu2, conv_weights_3, strides=[1, 2, 2, 1], padding="VALID") relu3 = tf.nn.relu(conv3) - relu6 = tf.nn.relu6(relu3, name='op_to_store') + relu6 = tf.nn.relu6(relu3, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -489,7 +514,7 @@ def test_conv_fusion_with_last_conv(self): quantize_v2_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_v2_count += 1 break @@ -501,21 +526,22 @@ def test_conv_fusion_with_max_pooling(self): relu = tf.nn.relu(x) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_weights = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(pooling, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - biasadd = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - out_name = biasadd.name.split(':')[0] + biasadd = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + out_name = biasadd.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -525,9 +551,9 @@ def test_conv_fusion_with_max_pooling(self): quantized_conv_data_type = None for i in output_graph.graph_def.node: if i.op.find("QuantizedMaxPool") != -1: - quantized_pool_data_type = i.attr['T'].type + quantized_pool_data_type = i.attr["T"].type if i.op.find("QuantizedConv2D") != -1: - quantized_conv_data_type = i.attr['Tinput'].type + quantized_conv_data_type = i.attr["Tinput"].type self.assertNotEqual(quantized_pool_data_type, None) self.assertEqual(quantized_pool_data_type, quantized_conv_data_type) @@ -536,25 +562,27 @@ def test_conv_fusion_with_max_pooling(self): def test_conv3d_addv2_relu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 128, 64, 64, 16], name="input") top_relu = tf.nn.relu(x) - conv3d_1_weights = tf.compat.v1.get_variable("weight_conv3d_1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight_conv3d_1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name='addv2') + add = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name="addv2") relu = tf.nn.relu(add) - conv3d_2_weights = tf.compat.v1.get_variable("weight_conv3d_2", [3, 3, 3, 32, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight_conv3d_2", [3, 3, 3, 32, 1], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - out_name = conv3d_2.name.split(':')[0] + out_name = conv3d_2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -563,10 +591,10 @@ def test_conv3d_addv2_relu_fusion(self): found_conv_sumadd_fusion = False found_conv_biasadd_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - if b'Sum' in i.attr['fused_ops'].list.s: + if i.op == "_FusedQuantizedConv3D": + if b"Sum" in i.attr["fused_ops"].list.s: found_conv_sumadd_fusion = True - if i.attr['fused_ops'].list.s == [b'BiasAdd', b'Relu', b'Requantize']: + if i.attr["fused_ops"].list.s == [b"BiasAdd", b"Relu", b"Requantize"]: found_conv_biasadd_fusion = True self.assertEqual(found_conv_sumadd_fusion, False) self.assertEqual(found_conv_biasadd_fusion, True) @@ -578,27 +606,30 @@ def test_conv_add_addn_non_const_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv2d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2d_1 = tf.nn.conv2d(top_relu, conv2d_1_weights, strides=[1, 2, 2, 1], padding="SAME") - conv2d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2d_2 = tf.nn.conv2d(top_relu, conv2d_2_weights, strides=[1, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv2d_1, y=conv2d_2, name='addv2_1') - conv2d_3_weights = tf.compat.v1.get_variable("weight3", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv2d_1, y=conv2d_2, name="addv2_1") + conv2d_3_weights = tf.compat.v1.get_variable( + "weight3", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2d_3 = tf.nn.conv2d(top_relu, conv2d_3_weights, strides=[1, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv2d_3, name='addv2_2') - out_name = add.name.split(':')[0] + add = tf.raw_ops.AddV2(x=add_1, y=conv2d_3, name="addv2_2") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -606,35 +637,43 @@ def test_conv_add_addn_non_const_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Requantize']: + if i.op == "_FusedQuantizedConv2D" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Sum", + b"Requantize", + ]: found_conv_fusion = True self.assertEqual(found_conv_fusion, True) - @disable_random() - @unittest.skipIf(tf.__version__ not in ["2.11.0202242", "2.11.0202250", '2.11.0202317', '2.11.0202323'], "deconv2d quantization only support 2.11") + @unittest.skipIf( + tf.__version__ not in ["2.11.0202242", "2.11.0202250", "2.11.0202317", "2.11.0202323"], + "deconv2d quantization only support 2.11", + ) def test_deconv2d_biasadd_fusion(self): - x = tf.compat.v1.placeholder(tf.float32, [1,2,2,1], name="input") - conv_weights2 = tf.compat.v1.get_variable("weight2", [3,3,1,1], - initializer=tf.compat.v1.random_normal_initializer()) + x = tf.compat.v1.placeholder(tf.float32, [1, 2, 2, 1], name="input") + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) - conv2 = tf.nn.conv2d_transpose(x, conv_weights2, output_shape=[1,2,2,1], strides=[1, 1, 1, 1], padding="SAME") + conv2 = tf.nn.conv2d_transpose( + x, conv_weights2, output_shape=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding="SAME" + ) normed2 = tf.nn.bias_add(conv2, tf.constant([3.0])) out = tf.identity(normed2) - out_name = out.name.split(':')[0] + out_name = out.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 2, 2, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 2, 2, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -643,34 +682,40 @@ def test_deconv2d_biasadd_fusion(self): found_deconv2d_fusion = False for i in output_graph.graph_def.node: - if i.op.find('_FusedQuantizedDeconv2D') != -1: + if i.op.find("_FusedQuantizedDeconv2D") != -1: found_deconv2d_fusion = True break self.assertEqual(found_deconv2d_fusion, True) @disable_random() - @unittest.skipIf(tf.__version__ not in ["2.11.0202242", "2.11.0202250", '2.11.0202317', '2.11.0202323'], "deconv2d quantization only support 2.11") + @unittest.skipIf( + tf.__version__ not in ["2.11.0202242", "2.11.0202250", "2.11.0202317", "2.11.0202323"], + "deconv2d quantization only support 2.11", + ) def test_single_deconv2d_fusion(self): - x = tf.compat.v1.placeholder(tf.float32, [1,2,2,1], name="input") - conv_weights2 = tf.compat.v1.get_variable("weight2", [3,3,1,1], - initializer=tf.compat.v1.random_normal_initializer()) + x = tf.compat.v1.placeholder(tf.float32, [1, 2, 2, 1], name="input") + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) - conv2 = tf.nn.conv2d_transpose(x, conv_weights2, output_shape=[1,2,2,1], strides=[1, 1, 1, 1], padding="SAME") + conv2 = tf.nn.conv2d_transpose( + x, conv_weights2, output_shape=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding="SAME" + ) out = tf.identity(conv2) - out_name = out.name.split(':')[0] + out_name = out.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 2, 2, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 2, 2, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() @@ -678,34 +723,40 @@ def test_single_deconv2d_fusion(self): found_deconv2d_fusion = False for i in output_graph.graph_def.node: - if i.op.find('_FusedQuantizedDeconv2D') != -1: + if i.op.find("_FusedQuantizedDeconv2D") != -1: found_deconv2d_fusion = True break self.assertEqual(found_deconv2d_fusion, True) @disable_random() - @unittest.skipIf(tf.__version__ not in ["2.11.0202242", "2.11.0202250", '2.11.0202317', '2.11.0202323'], "deconv2d quantization only support 2.11") + @unittest.skipIf( + tf.__version__ not in ["2.11.0202242", "2.11.0202250", "2.11.0202317", "2.11.0202323"], + "deconv2d quantization only support 2.11", + ) def test_deconv3d_biasadd_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 2, 2, 2, 1], name="input") - conv3d_weights = tf.compat.v1.get_variable("weight_conv3d_1", [3, 3, 3, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - conv3d = tf.nn.conv3d_transpose(x, conv3d_weights, output_shape=[1,2,2,2,1], strides=[1, 1, 1, 1, 1], padding="SAME") + conv3d_weights = tf.compat.v1.get_variable( + "weight_conv3d_1", [3, 3, 3, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + conv3d = tf.nn.conv3d_transpose( + x, conv3d_weights, output_shape=[1, 2, 2, 2, 1], strides=[1, 1, 1, 1, 1], padding="SAME" + ) normed2 = tf.nn.bias_add(conv3d, tf.constant([3.0])) out = tf.identity(normed2) - out_name = out.name.split(':')[0] + out_name = out.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 2, 2, 2, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 2, 2, 2, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -714,33 +765,39 @@ def test_deconv3d_biasadd_fusion(self): found_deconv3d_fusion = False for i in output_graph.graph_def.node: - if i.op.find('_FusedQuantizedDeconv3D') != -1: + if i.op.find("_FusedQuantizedDeconv3D") != -1: found_deconv3d_fusion = True break self.assertEqual(found_deconv3d_fusion, True) @disable_random() - @unittest.skipIf(tf.__version__ not in ["2.11.0202242", "2.11.0202250", '2.11.0202317', '2.11.0202323'], "deconv2d quantization only support 2.11") + @unittest.skipIf( + tf.__version__ not in ["2.11.0202242", "2.11.0202250", "2.11.0202317", "2.11.0202323"], + "deconv2d quantization only support 2.11", + ) def test_single_deconv3d_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 2, 2, 2, 1], name="input") - conv3d_weights = tf.compat.v1.get_variable("weight_conv3d_1", [3, 3, 3, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - conv3d = tf.nn.conv3d_transpose(x, conv3d_weights, output_shape=[1,2,2,2,1], strides=[1, 1, 1, 1, 1], padding="SAME") + conv3d_weights = tf.compat.v1.get_variable( + "weight_conv3d_1", [3, 3, 3, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + conv3d = tf.nn.conv3d_transpose( + x, conv3d_weights, output_shape=[1, 2, 2, 2, 1], strides=[1, 1, 1, 1, 1], padding="SAME" + ) out = tf.identity(conv3d) - out_name = out.name.split(':')[0] + out_name = out.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 2, 2, 2, 1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 2, 2, 2, 1), label=True) # quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -749,11 +806,12 @@ def test_single_deconv3d_fusion(self): found_deconv3d_fusion = False for i in output_graph.graph_def.node: - if i.op.find('_FusedQuantizedDeconv3D') != -1: + if i.op.find("_FusedQuantizedDeconv3D") != -1: found_deconv3d_fusion = True break self.assertEqual(found_deconv3d_fusion, True) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_conv_requantize_fusion.py b/test/tfnewapi/test_tensorflow_graph_conv_requantize_fusion.py index a3eadfecdd1..55021a94eaa 100644 --- a/test/tfnewapi/test_tensorflow_graph_conv_requantize_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_conv_requantize_fusion.py @@ -1,19 +1,21 @@ # # -*- coding: utf-8 -*- # -import unittest +import logging import os -import yaml +import unittest + import numpy as np import tensorflow as tf -import logging - +import yaml from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.util import disable_random + def build_tensorflow_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: tensorflow_yaml framework: tensorflow @@ -39,12 +41,13 @@ def build_tensorflow_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('tensorflow_yaml.yaml', "w", encoding="utf-8") as f: + with open("tensorflow_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + class TestConvRequantizedFusionNewAPI(unittest.TestCase): @classmethod def setUpClass(self): @@ -52,7 +55,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('tensorflow_yaml.yaml') + os.remove("tensorflow_yaml.yaml") @disable_random() def test_conv_biasadd_relu6_fusion(self): @@ -60,23 +63,24 @@ def test_conv_biasadd_relu6_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight0", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight0", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu6 = tf.nn.relu6(normed, name='op_to_store') + relu6 = tf.nn.relu6(normed, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -84,7 +88,7 @@ def test_conv_biasadd_relu6_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu6': + if i.op == "Relu6": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -92,23 +96,24 @@ def test_conv_biasadd_relu6_fusion(self): @disable_random() def test_single_conv3d_fusion(self): logging.getLogger().info("test_single_conv3d_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight1", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1,2,2,2,1], padding="VALID", name='op_to_store') + conv_weights = tf.compat.v1.get_variable( + "weight1", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID", name="op_to_store") - out_name = conv.name.split(':')[0] + out_name = conv.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -116,32 +121,32 @@ def test_single_conv3d_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) - @disable_random() def test_conv3d_biasadd_fusion(self): logging.getLogger().info("test_conv3d_biasadd_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight2", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1,2,2,2,1], padding="VALID") - relu6 = tf.nn.relu6(conv, name='op_to_store') - out_name = relu6.name.split(':')[0] + conv_weights = tf.compat.v1.get_variable( + "weight2", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") + relu6 = tf.nn.relu6(conv, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -149,7 +154,7 @@ def test_conv3d_biasadd_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -157,26 +162,28 @@ def test_conv3d_biasadd_fusion(self): @disable_random() def test_conv3d_add_relu_fusion(self): logging.getLogger().info("test_conv3d_add_relu_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") - conv_weights = tf.compat.v1.get_variable("weight6", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv1_weights = tf.compat.v1.get_variable("weight7", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x, conv_weights, strides=[1,2,2,2,1], padding="VALID") - conv1 = tf.nn.conv3d(x, conv1_weights, strides=[1,2,2,2,1], padding="VALID") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") + conv_weights = tf.compat.v1.get_variable( + "weight6", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv1_weights = tf.compat.v1.get_variable( + "weight7", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") + conv1 = tf.nn.conv3d(x, conv1_weights, strides=[1, 2, 2, 2, 1], padding="VALID") add = conv + conv1 relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -184,32 +191,31 @@ def test_conv3d_add_relu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Dequantize']: + if i.op == "_FusedQuantizedConv3D" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Dequantize"]: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @disable_random() def test_conv2d_biasadd_elu_fusion(self): - input = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='input') - weight = tf.compat.v1.constant(np.random.random((2,2,1,1)).astype(np.float32), name='weight') - bias = tf.constant(np.random.random((1)), name='bias', dtype = tf.float32) - conv = tf.nn.conv2d(input=input, filters=weight, strides=[1,1,1,1], padding='VALID', name='conv') - bias_add = tf.nn.bias_add(conv, bias, name = 'bias_add') - res = tf.nn.elu(bias_add, name = 'res') - output = tf.nn.softmax(res, name = 'op_to_store') - - out_name = output.name.split(':')[0] + input = tf.compat.v1.placeholder(tf.float32, shape=(1, 3, 3, 1), name="input") + weight = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)).astype(np.float32), name="weight") + bias = tf.constant(np.random.random((1)), name="bias", dtype=tf.float32) + conv = tf.nn.conv2d(input=input, filters=weight, strides=[1, 1, 1, 1], padding="VALID", name="conv") + bias_add = tf.nn.bias_add(conv, bias, name="bias_add") + res = tf.nn.elu(bias_add, name="res") + output = tf.nn.softmax(res, name="op_to_store") + + out_name = output.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 3, 3, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -217,47 +223,106 @@ def test_conv2d_biasadd_elu_fusion(self): self.assertNotEqual(output_graph, None) elu_fused = False for node in output_graph.graph_def.node: - if node.name == 'conv_eightbit_requantize_dequantize': - if b'Elu' in node.attr['fused_ops'].list.s: + if node.name == "conv_eightbit_requantize_dequantize": + if b"Elu" in node.attr["fused_ops"].list.s: elu_fused = True self.assertEqual(elu_fused, True) @disable_random() def test_conv3d_add_const_fusion(self): logging.getLogger().info("test_conv3d_add_const_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") - conv_weights = tf.compat.v1.get_variable("weight11", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x, conv_weights, strides=[1,2,2,2,1], padding="VALID") - add = conv + tf.constant([[[[[ - 0.000015179151887423359, 0.000022200847524800338, -0.000009995766049541999, -0.0000022956028260523453, - 0.000008830029400996864, 0.0000017190360495078494, 0.000019561824956326745, 0.00014721050683874637, - -0.000005871841494808905, 0.000004377178811409976, -0.000006191140982991783, 0.000009258330464945175, - -0.000009839599442784674, 0.000008547322067897767, 0.000004629391241905978, 2.345327061448188e-7, - 0.000015179151887423359, 0.000022200847524800338, -0.000009995766049541999, -0.0000022956028260523453, - 0.000008830029400996864, 0.0000017190360495078494, 0.000019561824956326745, 0.00014721050683874637, - -0.000005871841494808905, 0.000004377178811409976, -0.000006191140982991783, 0.000009258330464945175, - -0.000009839599442784674, 0.000008547322067897767, 0.000004629391241905978, 2.345327061448188e-7, - 0.000015179151887423359, 0.000022200847524800338, -0.000009995766049541999, -0.0000022956028260523453, - 0.000008830029400996864, 0.0000017190360495078494, 0.000019561824956326745, 0.00014721050683874637, - -0.000005871841494808905, 0.000004377178811409976, -0.000006191140982991783, 0.000009258330464945175, - -0.000009839599442784674, 0.000008547322067897767, 0.000004629391241905978, 2.345327061448188e-7, - 0.000015179151887423359, 0.000022200847524800338, -0.000009995766049541999, -0.0000022956028260523453, - 0.000008830029400996864, 0.0000017190360495078494, 0.000019561824956326745, 0.00014721050683874637, - -0.000005871841494808905, 0.000004377178811409976, -0.000006191140982991783, 0.000009258330464945175, - -0.000009839599442784674, 0.000008547322067897767, 0.000004629391241905978, 2.345327061448188e-7 - ]]]]]) - - out_name = add.name.split(':')[0] + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") + conv_weights = tf.compat.v1.get_variable( + "weight11", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") + add = conv + tf.constant( + [ + [ + [ + [ + [ + 0.000015179151887423359, + 0.000022200847524800338, + -0.000009995766049541999, + -0.0000022956028260523453, + 0.000008830029400996864, + 0.0000017190360495078494, + 0.000019561824956326745, + 0.00014721050683874637, + -0.000005871841494808905, + 0.000004377178811409976, + -0.000006191140982991783, + 0.000009258330464945175, + -0.000009839599442784674, + 0.000008547322067897767, + 0.000004629391241905978, + 2.345327061448188e-7, + 0.000015179151887423359, + 0.000022200847524800338, + -0.000009995766049541999, + -0.0000022956028260523453, + 0.000008830029400996864, + 0.0000017190360495078494, + 0.000019561824956326745, + 0.00014721050683874637, + -0.000005871841494808905, + 0.000004377178811409976, + -0.000006191140982991783, + 0.000009258330464945175, + -0.000009839599442784674, + 0.000008547322067897767, + 0.000004629391241905978, + 2.345327061448188e-7, + 0.000015179151887423359, + 0.000022200847524800338, + -0.000009995766049541999, + -0.0000022956028260523453, + 0.000008830029400996864, + 0.0000017190360495078494, + 0.000019561824956326745, + 0.00014721050683874637, + -0.000005871841494808905, + 0.000004377178811409976, + -0.000006191140982991783, + 0.000009258330464945175, + -0.000009839599442784674, + 0.000008547322067897767, + 0.000004629391241905978, + 2.345327061448188e-7, + 0.000015179151887423359, + 0.000022200847524800338, + -0.000009995766049541999, + -0.0000022956028260523453, + 0.000008830029400996864, + 0.0000017190360495078494, + 0.000019561824956326745, + 0.00014721050683874637, + -0.000005871841494808905, + 0.000004377178811409976, + -0.000006191140982991783, + 0.000009258330464945175, + -0.000009839599442784674, + 0.000008547322067897767, + 0.000004629391241905978, + 2.345327061448188e-7, + ] + ] + ] + ] + ] + ) + + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -265,7 +330,7 @@ def test_conv3d_add_const_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'AddV2': + if i.op == "AddV2": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -276,23 +341,24 @@ def test_conv_add_add_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight12", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight12", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) add = normed + tf.constant(np.random.randn(16), dtype=tf.float32) - relu6 = tf.nn.relu6(add, name='op_to_store') + relu6 = tf.nn.relu6(add, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -300,7 +366,7 @@ def test_conv_add_add_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Add': + if i.op == "Add": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -311,19 +377,20 @@ def test_single_conv2d_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight13", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight13", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - out_name = conv.name.split(':')[0] + out_name = conv.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -331,7 +398,7 @@ def test_single_conv2d_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D': + if i.op == "_FusedQuantizedConv2D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -341,24 +408,26 @@ def test_conv3d_add_addn_const_relu_fusion(self): logging.getLogger().info("test_conv3d_add_addn_const_relu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 128, 64, 64, 16], name="input") top_relu = tf.nn.relu(x) - conv3d_1_weights = tf.compat.v1.get_variable("weight14", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight14", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name='addv2') - var = tf.compat.v1.get_variable("add_y", [1,64,32,32,32], - initializer=tf.compat.v1.random_normal_initializer()) - add = tf.raw_ops.AddV2(x=add_1, y=var, name='addv2_1') + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name="addv2") + var = tf.compat.v1.get_variable( + "add_y", [1, 64, 32, 32, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + add = tf.raw_ops.AddV2(x=add_1, y=var, name="addv2_1") relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -367,10 +436,10 @@ def test_conv3d_add_addn_const_relu_fusion(self): found_conv_sumadd_fusion = False found_conv_biasadd_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - if str(b'Sum') in str(i.attr['fused_ops'].list.s): + if i.op == "_FusedQuantizedConv3D": + if str(b"Sum") in str(i.attr["fused_ops"].list.s): found_conv_sumadd_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Relu']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"Relu"]): found_conv_biasadd_fusion = True self.assertEqual(found_conv_sumadd_fusion, False) self.assertEqual(found_conv_biasadd_fusion, False) @@ -382,25 +451,27 @@ def test_conv3d_add_const_addn_relu_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight15", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight15", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name='addv2_2') - conv3d_2_weights = tf.compat.v1.get_variable("weight16", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name="addv2_2") + conv3d_2_weights = tf.compat.v1.get_variable( + "weight16", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_3') + add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_3") relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -409,10 +480,10 @@ def test_conv3d_add_const_addn_relu_fusion(self): found_conv_sumadd_fusion = False found_conv_biasadd_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - if str(b'Sum') in str(i.attr['fused_ops'].list.s): + if i.op == "_FusedQuantizedConv3D": + if str(b"Sum") in str(i.attr["fused_ops"].list.s): found_conv_sumadd_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Relu']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"Relu"]): found_conv_biasadd_fusion = True self.assertEqual(found_conv_sumadd_fusion, True) self.assertEqual(found_conv_biasadd_fusion, False) @@ -424,24 +495,26 @@ def test_conv3d_add_addn_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight15", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight15", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name='addv2_4') - conv3d_2_weights = tf.compat.v1.get_variable("weight16", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name="addv2_4") + conv3d_2_weights = tf.compat.v1.get_variable( + "weight16", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_5') - out_name = add.name.split(':')[0] + add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_5") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -449,7 +522,7 @@ def test_conv3d_add_addn_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -460,28 +533,31 @@ def test_conv3d_add_addn_relu_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight17", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv3d_2_weights = tf.compat.v1.get_variable("weight18", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight17", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight18", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name='addv2_6') - conv3d_3_weights = tf.compat.v1.get_variable("weight19", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name="addv2_6") + conv3d_3_weights = tf.compat.v1.get_variable( + "weight19", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_3_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_7') + add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_7") relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -489,31 +565,32 @@ def test_conv3d_add_addn_relu_fusion(self): found_relu_fusion = False for i in output_graph.graph_def.node: - if i.op == 'Relu': + if i.op == "Relu": found_relu_fusion = True self.assertEqual(found_relu_fusion, True) @disable_random() def test_conv3d_relu_fusion(self): logging.getLogger().info("test_conv3d_relu_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight20", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1,2,2,2,1], padding="VALID") + conv_weights = tf.compat.v1.get_variable( + "weight20", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") relu = tf.nn.leaky_relu(conv) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -521,7 +598,7 @@ def test_conv3d_relu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -533,23 +610,25 @@ def test_conv3d_add_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight21", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight21", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - conv3d_2_weights = tf.compat.v1.get_variable("weight22", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight22", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name='addv2_8') - out_name = add.name.split(':')[0] + add = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name="addv2_8") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -557,7 +636,7 @@ def test_conv3d_add_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -568,26 +647,28 @@ def test_conv3d_add_const_addn_relu_requantize_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight23", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight23", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2_9') - conv3d_2_weights = tf.compat.v1.get_variable("weight24", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2_9") + conv3d_2_weights = tf.compat.v1.get_variable( + "weight24", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_2 = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_10') + add_2 = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_10") relu = tf.nn.relu(add_2) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -596,10 +677,10 @@ def test_conv3d_add_const_addn_relu_requantize_fusion(self): found_conv_sumadd_fusion = False found_conv_biasadd_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - if str(b'Sum') in str(i.attr['fused_ops'].list.s): + if i.op == "_FusedQuantizedConv3D": + if str(b"Sum") in str(i.attr["fused_ops"].list.s): found_conv_sumadd_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Relu', b'Requantize']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"Relu", b"Requantize"]): found_conv_biasadd_fusion = True self.assertEqual(found_conv_sumadd_fusion, True) self.assertEqual(found_conv_biasadd_fusion, True) @@ -611,25 +692,27 @@ def test_conv3d_add_const_addn_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight25", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight25", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2_11') - conv3d_2_weights = tf.compat.v1.get_variable("weight26", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2_11") + conv3d_2_weights = tf.compat.v1.get_variable( + "weight26", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_2 = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_12') - out_name = add_2.name.split(':')[0] + add_2 = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_12") + out_name = add_2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -637,7 +720,7 @@ def test_conv3d_add_const_addn_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -648,29 +731,30 @@ def test_conv3d_add_no_relu_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight27", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight27", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2_13') + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2_13") pooling = tf.nn.max_pool(add, ksize=1, strides=[1, 2, 2, 2, 1], padding="SAME") - out_name = pooling.name.split(':')[0] + out_name = pooling.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -682,33 +766,34 @@ def test_conv3d_add_const_relu_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight28", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight28", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2_10') + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2_10") relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) - + @disable_random() def test_conv2d_add_const_leakyrelu_add_fusion(self): logging.getLogger().info("test_conv2d_add_const_leakyrelu_add_fusion") @@ -716,33 +801,35 @@ def test_conv2d_add_const_leakyrelu_add_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv2d_1_weights = tf.compat.v1.get_variable("weight29", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2d_1_weights = tf.compat.v1.get_variable( + "weight29", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2d_1 = tf.nn.conv2d(top_relu, conv2d_1_weights, strides=[1, 2, 2, 1], padding="SAME") y_const = tf.constant(np.random.randn(16), dtype=tf.float32) - add_1 = tf.raw_ops.AddV2(x=conv2d_1, y=y_const, name='addv2_11') + add_1 = tf.raw_ops.AddV2(x=conv2d_1, y=y_const, name="addv2_11") relu = tf.nn.leaky_relu(add_1) - conv2d_2_weights = tf.compat.v1.get_variable("weight30", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2d_2_weights = tf.compat.v1.get_variable( + "weight30", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2d_2 = tf.nn.conv2d(top_relu, conv2d_2_weights, strides=[1, 2, 2, 1], padding="SAME") - add_2 = tf.raw_ops.AddV2(x=relu, y=conv2d_2, name='addv2_12') - out_name = add_2.name.split(':')[0] + add_2 = tf.raw_ops.AddV2(x=relu, y=conv2d_2, name="addv2_12") + out_name = add_2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D': + if i.op == "_FusedQuantizedConv2D": found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -753,34 +840,36 @@ def test_conv3d_add_const_leakyrelu_add_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight31", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight31", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2_13') + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2_13") relu = tf.nn.leaky_relu(add_1) - conv3d_2_weights = tf.compat.v1.get_variable("weight32", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight32", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_2 = tf.raw_ops.AddV2(x=relu, y=conv3d_2, name='addv2_14') - out_name = add_2.name.split(':')[0] + add_2 = tf.raw_ops.AddV2(x=relu, y=conv3d_2, name="addv2_14") + out_name = add_2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - found_conv_fusion = True + if i.op == "_FusedQuantizedConv3D": + found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @disable_random() @@ -790,27 +879,30 @@ def test_conv3d_add_addn_non_const_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight33", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight33", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - conv3d_2_weights = tf.compat.v1.get_variable("weight34", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight34", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name='addv2_15') - conv3d_3_weights = tf.compat.v1.get_variable("weight35", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name="addv2_15") + conv3d_3_weights = tf.compat.v1.get_variable( + "weight35", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_3 = tf.nn.conv3d(top_relu, conv3d_3_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv3d_3, name='addv2_16') - out_name = add.name.split(':')[0] + add = tf.raw_ops.AddV2(x=add_1, y=conv3d_3, name="addv2_16") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -818,7 +910,7 @@ def test_conv3d_add_addn_non_const_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -829,32 +921,34 @@ def test_conv3d_add_const_elu_add_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d = tf.nn.conv3d(top_relu, conv3d_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add = tf.raw_ops.AddV2(x=conv3d, y=y_const, name='addv2') + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add = tf.raw_ops.AddV2(x=conv3d, y=y_const, name="addv2") elu = tf.nn.elu(add) - output = tf.nn.softmax(elu, name = 'op_to_store') - out_name = output.name.split(':')[0] + output = tf.nn.softmax(elu, name="op_to_store") + out_name = output.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('tensorflow_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("tensorflow_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - found_conv_fusion = True + if i.op == "_FusedQuantizedConv3D": + found_conv_fusion = True self.assertEqual(found_conv_fusion, True) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_depthwiseconv_fusion.py b/test/tfnewapi/test_tensorflow_graph_depthwiseconv_fusion.py index b30790a6eea..2eaf6efe3b1 100644 --- a/test/tfnewapi/test_tensorflow_graph_depthwiseconv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_depthwiseconv_fusion.py @@ -1,23 +1,21 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import yaml +import unittest + import numpy as np import tensorflow as tf -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +import yaml from tensorflow.compat.v1 import graph_util +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from neural_compressor.adaptor.tf_utils.util import disable_random def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -43,111 +41,130 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_Conv2dBiasAddAddRelu6MulMul(): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) - bias_add_node.input.extend([conv1_node.name, bias_node.name]) - + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) + bias_add_node.input.extend([conv1_node.name, bias_node.name]) + offset_node = node_def_pb2.NodeDef() offset_node.name = "offset" offset_node.op = "Const" offset_value = np.float32(np.abs(np.random.randn(1))) - offset_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - offset_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - offset_value, offset_value.dtype.type, offset_value.shape))) - + offset_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + offset_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(offset_value, offset_value.dtype.type, offset_value.shape) + ) + ) + add_node = node_def_pb2.NodeDef() add_node.op = "Add" add_node.name = "add/hard_swish" - add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - add_node.input.extend([bias_add_node.name, offset_node.name]) - + add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + add_node.input.extend([bias_add_node.name, offset_node.name]) + relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu6" relu_node.name = "relu6/hard_swish" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - relu_node.input.extend([add_node.name]) - + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.input.extend([add_node.name]) + mul_node = node_def_pb2.NodeDef() mul_node.op = "Mul" mul_node.name = "mul/hard_swish" - mul_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - mul_node.input.extend([bias_add_node.name, relu_node.name]) - + mul_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + mul_node.input.extend([bias_add_node.name, relu_node.name]) + offset1_node = node_def_pb2.NodeDef() offset1_node.name = "mul1_offset" offset1_node.op = "Const" offset1_value = np.float32(np.abs(np.random.randn(1))) - offset1_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - offset1_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - offset1_value, offset1_value.dtype.type, offset1_value.shape))) - + offset1_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + offset1_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(offset1_value, offset1_value.dtype.type, offset1_value.shape) + ) + ) + mul1_node = node_def_pb2.NodeDef() mul1_node.op = "Mul" mul1_node.name = "mul1/hard_swish" - mul1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - mul1_node.input.extend([mul_node.name,offset1_node.name]) - + mul1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + mul1_node.input.extend([mul_node.name, offset1_node.name]) + test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - add_node, - relu_node, - offset_node, - offset1_node, - mul_node, - mul1_node - ]) - return test_graph + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + add_node, + relu_node, + offset_node, + offset1_node, + mul_node, + mul1_node, + ] + ) + return test_graph + + class TestConvBiasAddAddReluFusion(unittest.TestCase): @classmethod def setUpClass(self): @@ -155,28 +172,29 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_depthwiseconv_biasadd_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - normed = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - out_name = normed.name.split(':')[0] + normed = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + out_name = normed.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -184,7 +202,7 @@ def test_depthwiseconv_biasadd_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedDepthwiseConv2D': + if i.op == "_FusedQuantizedDepthwiseConv2D": found_conv_fusion = True break @@ -195,8 +213,9 @@ def test_depthwiseConv2dNative_BiasAddAddRelu6MulMul_fusion(self): output_graph_def = build_Conv2dBiasAddAddRelu6MulMul() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 224, 224, 3), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -205,34 +224,35 @@ def test_depthwiseConv2dNative_BiasAddAddRelu6MulMul_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D': + if i.op == "_FusedQuantizedConv2D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) - + @disable_random() def test_depthwiseconv_biasadd_leakyrelu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - normed = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - + normed = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + leakyrelu = tf.nn.leaky_relu(normed) - out_name = leakyrelu.name.split(':')[0] + out_name = leakyrelu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -240,11 +260,12 @@ def test_depthwiseconv_biasadd_leakyrelu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedDepthwiseConv2D': + if i.op == "_FusedQuantizedDepthwiseConv2D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) - + + if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_dequantize_cast_optimizer_newapi.py b/test/tfnewapi/test_tensorflow_graph_dequantize_cast_optimizer_newapi.py index fc26f9a8ce6..1bc1020ae4a 100644 --- a/test/tfnewapi/test_tensorflow_graph_dequantize_cast_optimizer_newapi.py +++ b/test/tfnewapi/test_tensorflow_graph_dequantize_cast_optimizer_newapi.py @@ -1,73 +1,68 @@ -import unittest import os -import yaml +import unittest + import numpy as np import tensorflow as tf +import yaml from tensorflow.python.framework import dtypes -from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper + from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.dequantize_cast_optimizer import DequantizeCastOptimizer +from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_graphdef(set_min_first=False, dq_multi_outputs=False): tf.compat.v1.disable_eager_execution() - input = tf.compat.v1.placeholder(tf.float32, shape=(32, 224, 224, 3), name='input') + input = tf.compat.v1.placeholder(tf.float32, shape=(32, 224, 224, 3), name="input") graph_def = tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True) - min_input = Helper.create_constant_node( - 'test_min', - value=0., - dtype=dtypes.float32) + min_input = Helper.create_constant_node("test_min", value=0.0, dtype=dtypes.float32) - max_input = Helper.create_constant_node( - 'test_max', - value=[1], - dtype=dtypes.float32) + max_input = Helper.create_constant_node("test_max", value=[1], dtype=dtypes.float32) - quant_v2_node = Helper.create_node("QuantizeV2", 'test_quantize', - [input.name, min_input.name, max_input.name]) + quant_v2_node = Helper.create_node("QuantizeV2", "test_quantize", [input.name, min_input.name, max_input.name]) dequantize_node = Helper.create_node( - "Dequantize", 'test_dequantize', - [quant_v2_node.name, quant_v2_node.name + ':1', quant_v2_node.name + ':2']) + "Dequantize", "test_dequantize", [quant_v2_node.name, quant_v2_node.name + ":1", quant_v2_node.name + ":2"] + ) if set_min_first: - Helper.set_attr_string(dequantize_node, "mode", b'MIN_FIRST') + Helper.set_attr_string(dequantize_node, "mode", b"MIN_FIRST") - cast_node = Helper.create_node( - "Cast", 'test_cast', [dequantize_node.name]) + cast_node = Helper.create_node("Cast", "test_cast", [dequantize_node.name]) Helper.set_attr_dtype(cast_node, "DstT", dtypes.bfloat16) Helper.set_attr_dtype(cast_node, "SrcT", dtypes.float32) Helper.set_attr_bool(cast_node, "Truncate", False) - dentity_node = Helper.create_node( - "Identity", 'output', [cast_node.name]) + dentity_node = Helper.create_node("Identity", "output", [cast_node.name]) Helper.set_attr_dtype(dentity_node, "T", dtypes.bfloat16) - graph_def.node.extend([ - min_input, - max_input, - quant_v2_node, - dequantize_node, - cast_node, - dentity_node, - ]) + graph_def.node.extend( + [ + min_input, + max_input, + quant_v2_node, + dequantize_node, + cast_node, + dentity_node, + ] + ) if dq_multi_outputs: - dentity_node_2 = Helper.create_node( - "Identity", 'id_1', [dequantize_node.name]) + dentity_node_2 = Helper.create_node("Identity", "id_1", [dequantize_node.name]) Helper.set_attr_dtype(dentity_node_2, "T", dtypes.float32) graph_def.node.extend([dentity_node_2]) return graph_def -class TestDequantizeCastOptimizer(unittest.TestCase): +class TestDequantizeCastOptimizer(unittest.TestCase): @disable_random() def test_dequantize_cast_normal(self): graph_def = build_fake_graphdef() converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() for i in converted_graph_def.node: - self.assertNotEqual(i.op, 'Cast') + self.assertNotEqual(i.op, "Cast") @disable_random() def test_dequantize_cast_min_first(self): @@ -75,7 +70,7 @@ def test_dequantize_cast_min_first(self): converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() hasCast = False for i in converted_graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": hasCast = True break self.assertEqual(hasCast, True) @@ -86,7 +81,7 @@ def test_dequantize_cast_multiple_outputs(self): converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() hasCast = False for i in converted_graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": hasCast = True break self.assertEqual(hasCast, True) diff --git a/test/tfnewapi/test_tensorflow_graph_dq_cast_fusion.py b/test/tfnewapi/test_tensorflow_graph_dq_cast_fusion.py index 78b6d061cd3..85e9fb9aed7 100644 --- a/test/tfnewapi/test_tensorflow_graph_dq_cast_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_dq_cast_fusion.py @@ -1,14 +1,16 @@ -import unittest import os -import yaml +import unittest + import numpy as np import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.util import disable_random def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -35,9 +37,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -46,43 +48,42 @@ class TestDqCastFusion(unittest.TestCase): @classmethod def setUpClass(self): build_fake_yaml() - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_dq_all_outputs_bf16(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.constant(np.random.random((1, 3, 16, 16)).astype(np.float32), name='y') + conv_weights = tf.constant(np.random.random((1, 3, 16, 16)).astype(np.float32), name="y") conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - conv_reshape1 = tf.reshape(conv, [1,28,27,16]) - conv_reshape2 = tf.reshape(conv, [1,28,27,16]) - out = tf.math.add(conv_reshape1, conv_reshape2, name='op_to_store') - out_name = out.name.split(':')[0] + conv_reshape1 = tf.reshape(conv, [1, 28, 27, 16]) + conv_reshape2 = tf.reshape(conv, [1, 28, 27, 16]) + out = tf.math.add(conv_reshape1, conv_reshape2, name="op_to_store") + out_name = out.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 56, 56, 16)) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16)) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() found_cast = False for node in output_graph.graph_def.node: - if node.op == 'Cast': + if node.op == "Cast": found_cast = True break self.assertEqual(found_cast, False) - + + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_fuse_gelu_newapi.py b/test/tfnewapi/test_tensorflow_graph_fuse_gelu_newapi.py index acfbc67d652..f14f7d70fbe 100644 --- a/test/tfnewapi/test_tensorflow_graph_fuse_gelu_newapi.py +++ b/test/tfnewapi/test_tensorflow_graph_fuse_gelu_newapi.py @@ -2,61 +2,73 @@ # -*- coding: utf-8 -*- # import unittest -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_gelu import FuseGeluOptimizer -from neural_compressor.adaptor.tf_utils.util import disable_random import tensorflow as tf from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fuse_gelu import FuseGeluOptimizer +from neural_compressor.adaptor.tf_utils.util import disable_random + + class TestGeluFusion(unittest.TestCase): def gelu(self, input_tensor, mul_value=0.5, addv2_value=1.0, sqrt_value=2.0): cdf = mul_value * (addv2_value + tf.math.erf(input_tensor / tf.sqrt(sqrt_value))) return input_tensor * cdf - def gelu_enable_approximation(self, input_tensor, - another_mul_value=0.5, - mul1_value=0.044715, - addv2_value=1.0, - mul2_value=0.7978845608028654, - pow_value=3): + def gelu_enable_approximation( + self, + input_tensor, + another_mul_value=0.5, + mul1_value=0.044715, + addv2_value=1.0, + mul2_value=0.7978845608028654, + pow_value=3, + ): coeff = tf.cast(mul1_value, input_tensor.dtype) - return another_mul_value * input_tensor * ( - addv2_value + tf.tanh(mul2_value * - (input_tensor + coeff * tf.pow(input_tensor, pow_value)))) - - - def gelu_enable_approximation_varaint(self, input_tensor, - another_mul_value=0.5, - mul1_value=0.044715, - addv2_value=1.0, - mul2_value=0.7978845608028654, - pow_value=3): + return ( + another_mul_value + * input_tensor + * (addv2_value + tf.tanh(mul2_value * (input_tensor + coeff * tf.pow(input_tensor, pow_value)))) + ) + + def gelu_enable_approximation_varaint( + self, + input_tensor, + another_mul_value=0.5, + mul1_value=0.044715, + addv2_value=1.0, + mul2_value=0.7978845608028654, + pow_value=3, + ): coeff = tf.cast(mul1_value, input_tensor.dtype) cdf = another_mul_value * ( - addv2_value + tf.tanh(mul2_value * - (input_tensor + coeff * tf.pow(input_tensor, pow_value)))) + addv2_value + tf.tanh(mul2_value * (input_tensor + coeff * tf.pow(input_tensor, pow_value))) + ) return input_tensor * cdf - def gelu_disable_approximation(self, input_tensor, - another_add_value=0.5, - mul1_value=0.044715, - addv2_value=1.0, - mul2_value=0.7978845608028654, - pow_value=3): + def gelu_disable_approximation( + self, + input_tensor, + another_add_value=0.5, + mul1_value=0.044715, + addv2_value=1.0, + mul2_value=0.7978845608028654, + pow_value=3, + ): coeff = tf.cast(mul1_value, input_tensor.dtype) return (another_add_value + input_tensor) * ( - addv2_value + tf.tanh(mul2_value * - (input_tensor + coeff * tf.pow(input_tensor, pow_value)))) + addv2_value + tf.tanh(mul2_value * (input_tensor + coeff * tf.pow(input_tensor, pow_value))) + ) @disable_random() def test_gelu_disable_approximation_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -65,15 +77,14 @@ def test_gelu_disable_approximation_fusion(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -81,13 +92,12 @@ def test_gelu_disable_approximation_fusion(self): @disable_random() def test_gelu_approximation_fusion(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -96,15 +106,14 @@ def test_gelu_approximation_fusion(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -112,13 +121,12 @@ def test_gelu_approximation_fusion(self): @disable_random() def test_gelu_approximation_fusion_varaint(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -127,28 +135,27 @@ def test_gelu_approximation_fusion_varaint(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break self.assertEqual(found_gelu, True) + @disable_random() def test_gelu_approximation_fusion_with_invalid_pow_value(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -157,15 +164,14 @@ def test_gelu_approximation_fusion_with_invalid_pow_value(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -173,13 +179,12 @@ def test_gelu_approximation_fusion_with_invalid_pow_value(self): @disable_random() def test_gelu_approximation_fusion_with_invalid_mul2_value(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -188,15 +193,14 @@ def test_gelu_approximation_fusion_with_invalid_mul2_value(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -204,13 +208,12 @@ def test_gelu_approximation_fusion_with_invalid_mul2_value(self): @disable_random() def test_gelu_approximation_fusion_with_invalid_addv2_value(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -219,15 +222,14 @@ def test_gelu_approximation_fusion_with_invalid_addv2_value(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -235,13 +237,12 @@ def test_gelu_approximation_fusion_with_invalid_addv2_value(self): @disable_random() def test_gelu_approximation_fusion_with_invalid_mul1_value(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -251,15 +252,14 @@ def test_gelu_approximation_fusion_with_invalid_mul1_value(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -267,13 +267,12 @@ def test_gelu_approximation_fusion_with_invalid_mul1_value(self): @disable_random() def test_gelu_approximation_fusion_with_invalid_another_mul(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -283,15 +282,14 @@ def test_gelu_approximation_fusion_with_invalid_another_mul(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -299,13 +297,12 @@ def test_gelu_approximation_fusion_with_invalid_another_mul(self): @disable_random() def test_gelu_fusion_with_invalid_sqrt(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -313,15 +310,14 @@ def test_gelu_fusion_with_invalid_sqrt(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[gelu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[gelu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -329,13 +325,12 @@ def test_gelu_fusion_with_invalid_sqrt(self): @disable_random() def test_gelu_fusion_with_invalid_addv2(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -343,15 +338,14 @@ def test_gelu_fusion_with_invalid_addv2(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[gelu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[gelu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -359,13 +353,12 @@ def test_gelu_fusion_with_invalid_addv2(self): @disable_random() def test_gelu_fusion_with_invalid_mul(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -373,15 +366,14 @@ def test_gelu_fusion_with_invalid_mul(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[gelu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[gelu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break @@ -389,13 +381,12 @@ def test_gelu_fusion_with_invalid_mul(self): @disable_random() def test_gelu_fusion(self): - x = tf.compat.v1.placeholder(tf.float32, [1, 224, 224, 3], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [32], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [32], initializer=tf.compat.v1.random_normal_initializer()) conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="SAME") conv_bias = tf.math.add(conv1, conv_bias) @@ -404,20 +395,19 @@ def test_gelu_fusion(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[relu.name.split(':')[0]]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[relu.name.split(":")[0]] + ) output_graph_def = FuseGeluOptimizer(output_graph_def).do_transformation() found_gelu = False for i in output_graph_def.node: - if i.op == 'Gelu': + if i.op == "Gelu": found_gelu = True break self.assertEqual(found_gelu, True) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_fuse_pad_conv_fp32.py b/test/tfnewapi/test_tensorflow_graph_fuse_pad_conv_fp32.py index 100debc98df..6b1ac19ba02 100644 --- a/test/tfnewapi/test_tensorflow_graph_fuse_pad_conv_fp32.py +++ b/test/tfnewapi/test_tensorflow_graph_fuse_pad_conv_fp32.py @@ -1,13 +1,15 @@ -import unittest import os -import yaml +import unittest + import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util + from neural_compressor.adaptor.tf_utils.util import disable_random def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -34,9 +36,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -48,29 +50,30 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_fold_pad_conv(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed, name='op_to_store') - out_name = relu.name.split(':')[0] + relu = tf.nn.relu(normed, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -78,7 +81,7 @@ def test_fold_pad_conv(self): found_pad = False for i in output_graph.graph_def.node: - if i.op == 'Pad': + if i.op == "Pad": found_pad = True break self.assertEqual(found_pad, False) @@ -86,25 +89,26 @@ def test_fold_pad_conv(self): @disable_random() def test_fold_non_const_pad_conv(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) - vec = tf.raw_ops.DataFormatVecPermute(x=paddings, src_format='NHWC', dst_format='NHWC') + paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) + vec = tf.raw_ops.DataFormatVecPermute(x=paddings, src_format="NHWC", dst_format="NHWC") x_pad = tf.pad(x, vec, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed, name='op_to_store') - out_name = relu.name.split(':')[0] + relu = tf.nn.relu(normed, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -112,10 +116,11 @@ def test_fold_non_const_pad_conv(self): found_pad = False for i in output_graph.graph_def.node: - if i.op == 'Pad': + if i.op == "Pad": found_pad = True break self.assertEqual(found_pad, False) + if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_bn_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_bn_fusion.py index 856a1547fa5..02ee5d61bf8 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_bn_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_bn_fusion.py @@ -1,22 +1,25 @@ # # -*- coding: utf-8 -*- # -import unittest +import logging import os -import yaml +import unittest + import numpy as np import tensorflow as tf -import logging +import yaml from tensorflow.compat.v1 import graph_util from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.utils.utility import CpuInfo from neural_compressor.experimental import Quantization, common from neural_compressor.utils import logger +from neural_compressor.utils.utility import CpuInfo + def build_fake_yaml_1(): - fake_yaml_1 = ''' + fake_yaml_1 = """ model: name: fake_yaml_1 framework: tensorflow @@ -42,14 +45,15 @@ def build_fake_yaml_1(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml_1, Loader=yaml.SafeLoader) - with open('fake_yaml_1.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_1.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_fake_yaml_2(): - fake_yaml_2 = ''' + fake_yaml_2 = """ model: name: fake_yaml_2 framework: tensorflow @@ -73,14 +77,14 @@ def build_fake_yaml_2(): relative: 0.1 workspace: path: saved - ''' + """ y = yaml.load(fake_yaml_2, Loader=yaml.SafeLoader) - with open('fake_yaml_2.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml_2.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() -class TestTensorflowQdqConvFusion(unittest.TestCase): +class TestTensorflowQdqConvFusion(unittest.TestCase): @classmethod def setUpClass(self): build_fake_yaml_1() @@ -88,30 +92,30 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml_1.yaml') - os.remove('fake_yaml_2.yaml') + os.remove("fake_yaml_1.yaml") + os.remove("fake_yaml_2.yaml") @disable_random() def test_bn_relu_depthwiseconv_biasadd_relu6_fusion(self): logger.info("test_bn_relu_depthwiseconv_biasadd_relu6_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) normed_0 = tf.compat.v1.layers.batch_normalization(x) - relu = tf.nn.relu(normed_0, name='op_to_store_0') + relu = tf.nn.relu(normed_0, name="op_to_store_0") conv = tf.compat.v1.nn.depthwise_conv2d_native(relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed_1 = tf.compat.v1.layers.batch_normalization(conv) - relu6 = tf.nn.relu6(normed_1, name='op_to_store_1') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(normed_1, name="op_to_store_1") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -121,16 +125,25 @@ def test_bn_relu_depthwiseconv_biasadd_relu6_fusion(self): qbn_num = 0 dq_num = 0 for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedDepthwiseConv2D' \ - and i.attr['Thost_inputs'].list.type != [11, 11, 1, 1, 1, 1, 1, 1, 1]: + if i.op == "_FusedQuantizedDepthwiseConv2D" and i.attr["Thost_inputs"].list.type != [ + 11, + 11, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + ]: conv_input_type = False break - if i.op in ['Relu', 'Relu6', 'FusedBatchNormV3']: + if i.op in ["Relu", "Relu6", "FusedBatchNormV3"]: found_fusion = False break - if i.op == '_QuantizedFusedBatchNorm': + if i.op == "_QuantizedFusedBatchNorm": qbn_num += 1 - if i.op == 'Dequantize': + if i.op == "Dequantize": dq_num += 1 self.assertEqual(conv_input_type, True) self.assertEqual(found_fusion, True) @@ -141,41 +154,41 @@ def test_bn_relu_depthwiseconv_biasadd_relu6_fusion(self): def test_training_bn_relu_depthwiseconv_biasadd_relu6_fusion(self): logger.info("test_training_bn_relu_depthwiseconv_biasadd_relu6_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) normed_0 = tf.compat.v1.layers.batch_normalization(x, training=True) - relu = tf.nn.relu(normed_0, name='op_to_store_0') + relu = tf.nn.relu(normed_0, name="op_to_store_0") conv = tf.compat.v1.nn.depthwise_conv2d_native(relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed_1 = tf.compat.v1.layers.batch_normalization(conv) - relu6 = tf.nn.relu6(normed_1, name='op_to_store_1') - out_name = relu6.name.split(':')[0] + relu6 = tf.nn.relu6(normed_1, name="op_to_store_1") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() bn_num, bf16_bn_num, qbn_num, dq_num = 0, 0, 0, 0 for i in output_graph.graph_def.node: - if i.op == 'FusedBatchNormV3': + if i.op == "FusedBatchNormV3": bn_num += 1 - if i.attr['T'].type == dtypes.bfloat16.as_datatype_enum: + if i.attr["T"].type == dtypes.bfloat16.as_datatype_enum: bf16_bn_num += 1 - if i.op == '_QuantizedFusedBatchNorm': + if i.op == "_QuantizedFusedBatchNorm": qbn_num += 1 - if i.op == 'Dequantize': + if i.op == "Dequantize": dq_num += 1 self.assertEqual(bn_num, 1) self.assertEqual(qbn_num, 0) self.assertEqual(dq_num, 0) - bf16_enabled = bool(CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') + bf16_enabled = bool(CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") if bf16_enabled: self.assertEqual(bf16_bn_num, 1) @@ -183,23 +196,23 @@ def test_training_bn_relu_depthwiseconv_biasadd_relu6_fusion(self): def test_bn_leakyrelu_conv_biasadd_relu(self): logger.info("test_bn_leakyrelu_conv_biasadd_relu") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) normed_0 = tf.compat.v1.layers.batch_normalization(x) - leaky_relu = tf.nn.leaky_relu(normed_0, alpha=0.3, name='op_to_store_0') + leaky_relu = tf.nn.leaky_relu(normed_0, alpha=0.3, name="op_to_store_0") conv = tf.nn.conv2d(leaky_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed_1 = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed_1, name='op_to_store_1') - out_name = relu.name.split(':')[0] + relu = tf.nn.relu(normed_1, name="op_to_store_1") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) @@ -209,16 +222,15 @@ def test_bn_leakyrelu_conv_biasadd_relu(self): found_fusion = True qbn_num = 0 dq_num = 0 - qbn_output_max_name = 'batch_normalization/FusedBatchNormV3_eightbit_quantized_bn/frozen_bn_output_max' + qbn_output_max_name = "batch_normalization/FusedBatchNormV3_eightbit_quantized_bn/frozen_bn_output_max" for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D' \ - and i.attr['Thost_inputs'].list.type != [11, 11, 1, 1, 1, 1, 1, 1, 1]: + if i.op == "_FusedQuantizedConv2D" and i.attr["Thost_inputs"].list.type != [11, 11, 1, 1, 1, 1, 1, 1, 1]: conv_input_type = False break - if i.op in ['Relu', 'LeakyRelu', 'FusedBatchNormV3']: + if i.op in ["Relu", "LeakyRelu", "FusedBatchNormV3"]: found_fusion = False break - if i.op == '_QuantizedFusedBatchNorm': + if i.op == "_QuantizedFusedBatchNorm": is_offset_const = i.attr["is_offset_const"].b is_mean_const = i.attr["is_mean_const"].b qbn_alpha = i.attr["alpha"].f @@ -226,7 +238,7 @@ def test_bn_leakyrelu_conv_biasadd_relu(self): qbn_num += 1 if i.name == qbn_output_max_name: frozen_qbn_output_max_value = i.attr["value"].tensor.float_val[0] - if i.op == 'Dequantize': + if i.op == "Dequantize": dq_num += 1 self.assertEqual(conv_input_type, True) self.assertEqual(found_fusion, True) @@ -242,23 +254,23 @@ def test_bn_leakyrelu_conv_biasadd_relu(self): def test_bn_relu_conv_biasadd_relu(self): logger.info("test_bn_relu_conv_biasadd_relu") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) normed_0 = tf.compat.v1.layers.batch_normalization(x) - relu_0 = tf.nn.relu(normed_0, name='op_to_store_0') + relu_0 = tf.nn.relu(normed_0, name="op_to_store_0") conv = tf.nn.conv2d(relu_0, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed_1 = tf.compat.v1.layers.batch_normalization(conv) - relu_1 = tf.nn.relu(normed_1, name='op_to_store_1') - out_name = relu_1.name.split(':')[0] + relu_1 = tf.nn.relu(normed_1, name="op_to_store_1") + out_name = relu_1.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_1.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_1.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) @@ -268,23 +280,22 @@ def test_bn_relu_conv_biasadd_relu(self): found_fusion = True qbn_num = 0 dq_num = 0 - qbn_output_max_name = 'batch_normalization/FusedBatchNormV3_eightbit_quantized_bn/frozen_bn_output_max' + qbn_output_max_name = "batch_normalization/FusedBatchNormV3_eightbit_quantized_bn/frozen_bn_output_max" for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D' \ - and i.attr['Thost_inputs'].list.type != [11, 11, 1, 1, 1, 1, 1, 1, 1]: + if i.op == "_FusedQuantizedConv2D" and i.attr["Thost_inputs"].list.type != [11, 11, 1, 1, 1, 1, 1, 1, 1]: conv_input_type = False break - if i.op in ['Relu', 'FusedBatchNormV3']: + if i.op in ["Relu", "FusedBatchNormV3"]: found_fusion = False break - if i.op == '_QuantizedFusedBatchNorm': + if i.op == "_QuantizedFusedBatchNorm": is_offset_const = i.attr["is_offset_const"].b is_mean_const = i.attr["is_mean_const"].b frozen_qbn_output_max = i.input[8] qbn_num += 1 if i.name == qbn_output_max_name: frozen_qbn_output_max_value = i.attr["value"].tensor.float_val[0] - if i.op == 'Dequantize': + if i.op == "Dequantize": dq_num += 1 self.assertEqual(conv_input_type, True) self.assertEqual(found_fusion, True) @@ -299,23 +310,23 @@ def test_bn_relu_conv_biasadd_relu(self): def test_bn_performance_only_false(self): logger.info("test_bn_performance_only_false") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) normed_0 = tf.compat.v1.layers.batch_normalization(x) - relu_0 = tf.nn.relu(normed_0, name='op_to_store_0') + relu_0 = tf.nn.relu(normed_0, name="op_to_store_0") conv = tf.nn.conv2d(relu_0, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed_1 = tf.compat.v1.layers.batch_normalization(conv) - relu_1 = tf.nn.relu6(normed_1, name='op_to_store_1') - out_name = relu_1.name.split(':')[0] + relu_1 = tf.nn.relu6(normed_1, name="op_to_store_1") + out_name = relu_1.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) - quantizer = Quantization('fake_yaml_2.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_2.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) @@ -326,14 +337,14 @@ def test_bn_performance_only_false(self): qbn_num = 0 dq_num = 0 for i in output_graph.graph_def.node: - if i.op in ['Relu6']: + if i.op in ["Relu6"]: found_fusion = False break - if i.op == '_FusedQuantizedConv2D': + if i.op == "_FusedQuantizedConv2D": qconv_num += 1 - if i.op == '_QuantizedFusedBatchNorm': + if i.op == "_QuantizedFusedBatchNorm": qbn_num += 1 - if i.op == 'Dequantize': + if i.op == "Dequantize": dq_num += 1 self.assertEqual(found_fusion, True) self.assertEqual(qconv_num, 1) @@ -344,21 +355,23 @@ def test_bn_performance_only_false(self): def test_bnex_performance_only_false(self): logger.info("test_bnex_performance_only_false") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights_0 = tf.compat.v1.get_variable("weight_0", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights_0 = tf.compat.v1.get_variable( + "weight_0", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) normed_0 = tf.compat.v1.layers.batch_normalization(x) - relu_0 = tf.nn.relu(normed_0, name='op_to_store_0') + relu_0 = tf.nn.relu(normed_0, name="op_to_store_0") conv_0 = tf.nn.conv2d(relu_0, conv_weights_0, strides=[1, 2, 2, 1], padding="VALID") normed_1 = tf.compat.v1.layers.batch_normalization(conv_0) - conv_weights_1 = tf.compat.v1.get_variable("weight_1", [5, 5, 16, 2], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights_1 = tf.compat.v1.get_variable( + "weight_1", [5, 5, 16, 2], initializer=tf.compat.v1.random_normal_initializer() + ) conv_1 = tf.nn.conv2d(normed_1, conv_weights_1, strides=[1, 3, 3, 1], padding="VALID") - relu_1 = tf.nn.relu6(conv_1, name='op_to_store_1') - out_name = relu_1.name.split(':')[0] - """ - graph_def = tf.compat.v1.get_default_graph().as_graph_def() + relu_1 = tf.nn.relu6(conv_1, name="op_to_store_1") + out_name = relu_1.name.split(":")[0] + """graph_def = tf.compat.v1.get_default_graph().as_graph_def() for node in graph_def.node: - if node.name == "batch_normalization_1/FusedBatchNormV3": + + if node.name == "batch_normalization_1/FusedBatchNormV3": node.op = "_FusedBatchNormEx" with tf.Graph().as_default() as graph: tf.import_graph_def(graph_def, name='') @@ -366,16 +379,15 @@ def test_bnex_performance_only_false(self): with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) for node in output_graph_def.node: if node.name == "batch_normalization_1/FusedBatchNormV3": node.op = "_FusedBatchNormEx" node.attr["activation_mode"].CopyFrom(attr_value_pb2.AttrValue(s=b"Relu")) - quantizer = Quantization('fake_yaml_2.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer = Quantization("fake_yaml_2.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) @@ -386,14 +398,14 @@ def test_bnex_performance_only_false(self): qbn_num = 0 dq_num = 0 for i in output_graph.graph_def.node: - if i.op in ['Relu6', '_FusedBatchNormEx']: + if i.op in ["Relu6", "_FusedBatchNormEx"]: found_fusion = False break - if i.op == '_FusedQuantizedConv2D': + if i.op == "_FusedQuantizedConv2D": qconv_num += 1 - if i.op == '_QuantizedFusedBatchNorm': + if i.op == "_QuantizedFusedBatchNorm": qbn_num += 1 - if i.op == 'Dequantize': + if i.op == "Dequantize": dq_num += 1 self.assertEqual(found_fusion, True) self.assertEqual(qconv_num, 2) @@ -401,5 +413,5 @@ def test_bnex_performance_only_false(self): self.assertEqual(dq_num, 1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_concat_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_concat_fusion.py index 5da89c33bb6..93d252e793a 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_concat_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_concat_fusion.py @@ -1,20 +1,20 @@ # # # -*- coding: utf-8 -*- -import unittest import os +import unittest + import tensorflow as tf import yaml +from tensorflow.compat.v1 import graph_util -from neural_compressor.adaptor.tf_utils.util import read_graph -from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel from neural_compressor.adaptor.tensorflow import TensorflowQuery -from neural_compressor.adaptor.tf_utils.util import disable_random -from tensorflow.compat.v1 import graph_util +from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel +from neural_compressor.adaptor.tf_utils.util import disable_random, read_graph def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -41,45 +41,48 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() class TestTensorflowQdqConcatFusion(unittest.TestCase): - mb_model_url = 'https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/inceptionv3_fp32_pretrained_model.pb' - pb_path = '/tmp/.neural_compressor/inceptionv3_fp32.pb' + mb_model_url = ( + "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/inceptionv3_fp32_pretrained_model.pb" + ) + pb_path = "/tmp/.neural_compressor/inceptionv3_fp32.pb" @classmethod def setUpClass(self): if not os.path.exists(self.pb_path): - os.system( - "mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.mb_model_url, self.pb_path)) - self.op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml")).get_eightbit_patterns() + os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(self.mb_model_url, self.pb_path)) + self.op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml") + ).get_eightbit_patterns() build_fake_yaml() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") def test_tensorflow_concat_quantization(self): output_graph_def = read_graph(self.pb_path) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 299, 299, 3), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 299, 299, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() found_quantized_concat_node = False - target_concat_node_name = 'v0/cg/incept_v3_a0/concat_eightbit_quantized_concatv2' + target_concat_node_name = "v0/cg/incept_v3_a0/concat_eightbit_quantized_concatv2" from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer + cur_graph = GraphAnalyzer() cur_graph.graph = output_graph.graph_def graph_info = cur_graph.parse_graph() @@ -91,10 +94,8 @@ def test_tensorflow_concat_quantization(self): # print (input_conv_name, graph_info[input_conv_name].node.input) min_freezed_out_name = graph_info[input_conv_name].node.input[-2] max_freezed_out_name = graph_info[input_conv_name].node.input[-1] - min_freezed_out_value = ( - graph_info[min_freezed_out_name].node.attr['value'].tensor.float_val)[0] - max_freezed_out_value = ( - graph_info[max_freezed_out_name].node.attr['value'].tensor.float_val)[0] + min_freezed_out_value = (graph_info[min_freezed_out_name].node.attr["value"].tensor.float_val)[0] + max_freezed_out_value = (graph_info[max_freezed_out_name].node.attr["value"].tensor.float_val)[0] min_out.append(min_freezed_out_value) max_out.append(max_freezed_out_value) @@ -103,38 +104,33 @@ def test_tensorflow_concat_quantization(self): @disable_random() def test_concat_with_different_input_type(self): - x = tf.compat.v1.placeholder( - tf.float32, [1, 128, 128, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) + x = tf.compat.v1.placeholder(tf.float32, [1, 128, 128, 16], name="input") + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) sqrt = tf.math.sqrt(x) relu_sqrt = tf.nn.relu(sqrt) - conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[ - 1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) - conv1 = tf.nn.conv2d(x, conv_weights, strides=[ - 1, 2, 2, 1], padding="SAME", name='last') + conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) concat = tf.concat([relu, conv_bias], 1) - pool = tf.nn.avg_pool(concat, ksize=1, strides=[1, 2, 2, 1], name='avgpool', padding="SAME") - final_node = tf.nn.relu(pool, name='op_to_store') - out_name = final_node.name.split(':')[0] + pool = tf.nn.avg_pool(concat, ksize=1, strides=[1, 2, 2, 1], name="avgpool", padding="SAME") + final_node = tf.nn.relu(pool, name="op_to_store") + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 128, 128, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 128, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -142,47 +138,41 @@ def test_concat_with_different_input_type(self): quantized_concat = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedConcatV2': - quantized_concat = True + if i.op == "QuantizedConcatV2": + quantized_concat = True self.assertEqual(quantized_concat, False) @disable_random() def test_concat_with_same_input_type(self): - x = tf.compat.v1.placeholder( - tf.float32, [1, 128, 128, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) - conv1_bias = tf.compat.v1.get_variable("bias1", [16], - initializer=tf.compat.v1.random_normal_initializer()) + x = tf.compat.v1.placeholder(tf.float32, [1, 128, 128, 16], name="input") + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) + conv1_bias = tf.compat.v1.get_variable("bias1", [16], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) sqrt = tf.math.sqrt(x) relu_sqrt = tf.nn.relu(sqrt) - conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[ - 1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(relu_sqrt, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv, conv_bias) - relu1 = tf.nn.relu(conv_bias) + relu1 = tf.nn.relu(conv_bias) - conv1 = tf.nn.conv2d(x, conv_weights, strides=[ - 1, 2, 2, 1], padding="SAME", name='last') + conv1 = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv1_bias = tf.nn.bias_add(conv1, conv1_bias) - relu2 = tf.nn.relu(conv1_bias) + relu2 = tf.nn.relu(conv1_bias) concat = tf.concat([relu1, relu2], 1) - pool = tf.nn.avg_pool(concat, ksize=1, strides=[1, 2, 2, 1], name='avgpool', padding="SAME") - final_node = tf.nn.relu(pool, name='op_to_store') - out_name = final_node.name.split(':')[0] + pool = tf.nn.avg_pool(concat, ksize=1, strides=[1, 2, 2, 1], name="avgpool", padding="SAME") + final_node = tf.nn.relu(pool, name="op_to_store") + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 128, 128, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 128, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -190,34 +180,30 @@ def test_concat_with_same_input_type(self): quantized_concat = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedConcatV2': - quantized_concat = True + if i.op == "QuantizedConcatV2": + quantized_concat = True self.assertEqual(quantized_concat, True) @disable_random() def test_concat_with_qint8_and_fp32_input_type(self): - x = tf.compat.v1.placeholder( - tf.float32, [1, 128, 128, 16], name="input") - bias = tf.compat.v1.get_variable("bias", [16], - initializer=tf.compat.v1.random_normal_initializer()) + x = tf.compat.v1.placeholder(tf.float32, [1, 128, 128, 16], name="input") + bias = tf.compat.v1.get_variable("bias", [16], initializer=tf.compat.v1.random_normal_initializer()) bias_add = tf.nn.bias_add(x, bias) - pool = tf.nn.avg_pool(x, ksize=1, strides=[1, 1, 1, 1], name='avgpool', padding="SAME") + pool = tf.nn.avg_pool(x, ksize=1, strides=[1, 1, 1, 1], name="avgpool", padding="SAME") concat = tf.concat([bias_add, pool], 1) - final_node = tf.nn.relu(concat , name='op_to_store') - out_name = final_node.name.split(':')[0] + final_node = tf.nn.relu(concat, name="op_to_store") + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset( - 'dummy', shape=(100, 128, 128, 16), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 128, 16), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -225,12 +211,14 @@ def test_concat_with_qint8_and_fp32_input_type(self): dtype = None quantized_concat = False from tensorflow.python.framework import dtypes + for i in output_graph.graph_def.node: - if i.op == 'QuantizedConcatV2': - dtype = dtypes.DType(i.attr['T'].type) - quantized_concat = True + if i.op == "QuantizedConcatV2": + dtype = dtypes.DType(i.attr["T"].type) + quantized_concat = True self.assertEqual(quantized_concat, True) self.assertEqual(dtype, dtypes.qint8) - + + if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_conv3d_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_conv3d_fusion.py index cfd4545a0d0..cbcb63e95e2 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_conv3d_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_conv3d_fusion.py @@ -1,24 +1,26 @@ # # -*- coding: utf-8 -*- # -import unittest +import logging import os -import yaml +import unittest + import numpy as np import tensorflow as tf -import logging - -from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer -from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer +import yaml +from pkg_resources import parse_version from tensorflow.compat.v1 import graph_util from tensorflow.python.framework import function + from neural_compressor.adaptor.tensorflow import TensorflowQuery +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer +from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel from neural_compressor.adaptor.tf_utils.util import disable_random -from pkg_resources import parse_version + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -44,50 +46,51 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() class TestTensorflowQdqConvFusion(unittest.TestCase): - @classmethod def setUpClass(self): build_fake_yaml() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_conv3d_addv2_relu_fusion(self): logging.getLogger().info("test_conv3d_addv2_relu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 128, 64, 64, 16], name="input") top_relu = tf.nn.relu(x) - conv3d_1_weights = tf.compat.v1.get_variable("weight_conv3d_1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight_conv3d_1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name='addv2') + add = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name="addv2") relu = tf.nn.relu(add) - conv3d_2_weights = tf.compat.v1.get_variable("weight_conv3d_2", [3, 3, 3, 32, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight_conv3d_2", [3, 3, 3, 32, 1], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - out_name = conv3d_2.name.split(':')[0] + out_name = conv3d_2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -96,10 +99,10 @@ def test_conv3d_addv2_relu_fusion(self): found_conv_sumadd_fusion = False found_conv_biasadd_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - if b'Sum' in i.attr['fused_ops'].list.s: + if i.op == "_FusedQuantizedConv3D": + if b"Sum" in i.attr["fused_ops"].list.s: found_conv_sumadd_fusion = True - if i.attr['fused_ops'].list.s == [b'BiasAdd', b'Relu', b'Requantize']: + if i.attr["fused_ops"].list.s == [b"BiasAdd", b"Relu", b"Requantize"]: found_conv_biasadd_fusion = True self.assertEqual(found_conv_sumadd_fusion, False) self.assertEqual(found_conv_biasadd_fusion, True) @@ -107,23 +110,24 @@ def test_conv3d_addv2_relu_fusion(self): @disable_random() def test_single_conv3d_fusion(self): logging.getLogger().info("test_single_conv3d_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1,2,2,2,1], padding="VALID") - mul = tf.multiply(conv, 2.0, name='op_to_store') - out_name = mul.name.split(':')[0] + conv_weights = tf.compat.v1.get_variable( + "weight", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") + mul = tf.multiply(conv, 2.0, name="op_to_store") + out_name = mul.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -132,9 +136,9 @@ def test_single_conv3d_fusion(self): found_dequantize_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'Dequantize']): + if str(i.attr["fused_ops"].list.s) == str([b"Dequantize"]): found_dequantize_fusion = True self.assertEqual(found_conv_fusion, True) self.assertEqual(found_dequantize_fusion, True) @@ -142,24 +146,25 @@ def test_single_conv3d_fusion(self): @disable_random() def test_conv3d_biasadd_fusion(self): logging.getLogger().info("test_conv3d_biasadd_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1,2,2,2,1], padding="VALID") - y_const = tf.constant(np.random.randn(1,1,1,1,64), dtype=tf.float32) - add = tf.raw_ops.AddV2(x=conv, y=y_const, name='addv2') - out_name = add.name.split(':')[0] + conv_weights = tf.compat.v1.get_variable( + "weight", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 64), dtype=tf.float32) + add = tf.raw_ops.AddV2(x=conv, y=y_const, name="addv2") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -168,32 +173,33 @@ def test_conv3d_biasadd_fusion(self): found_dequantize_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Dequantize']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Dequantize"]): found_dequantize_fusion = True self.assertEqual(found_conv_fusion, True) self.assertEqual(found_dequantize_fusion, True) def test_conv3d_relu6_fusion(self): logging.getLogger().info("test_conv3d_biasadd_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1,2,2,2,1], padding="VALID") - relu6 = tf.nn.relu6(conv, name='op_to_store') - out_name = relu6.name.split(':')[0] + conv_weights = tf.compat.v1.get_variable( + "weight", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") + relu6 = tf.nn.relu6(conv, name="op_to_store") + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -202,9 +208,9 @@ def test_conv3d_relu6_fusion(self): found_requantize_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Relu', b'Dequantize']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Relu", b"Dequantize"]): found_requantize_fusion = True self.assertEqual(found_conv_fusion, True) self.assertEqual(found_requantize_fusion, True) @@ -212,26 +218,28 @@ def test_conv3d_relu6_fusion(self): @disable_random() def test_conv3d_add_relu_fusion(self): logging.getLogger().info("test_conv3d_add_relu_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") - conv_weights = tf.compat.v1.get_variable("weight1", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv1_weights = tf.compat.v1.get_variable("weight2", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x, conv_weights, strides=[1,2,2,2,1], padding="VALID") - conv1 = tf.nn.conv3d(x, conv1_weights, strides=[1,2,2,2,1], padding="VALID") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") + conv_weights = tf.compat.v1.get_variable( + "weight1", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv1_weights = tf.compat.v1.get_variable( + "weight2", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") + conv1 = tf.nn.conv3d(x, conv1_weights, strides=[1, 2, 2, 2, 1], padding="VALID") add = conv + conv1 relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -239,7 +247,7 @@ def test_conv3d_add_relu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -247,39 +255,98 @@ def test_conv3d_add_relu_fusion(self): @disable_random() def test_conv3d_add_const_fusion(self): logging.getLogger().info("test_conv3d_add_const_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x, conv_weights, strides=[1,2,2,2,1], padding="VALID") - add = conv + tf.constant([[[[[ - 0.000015179151887423359, 0.000022200847524800338, -0.000009995766049541999, -0.0000022956028260523453, - 0.000008830029400996864, 0.0000017190360495078494, 0.000019561824956326745, 0.00014721050683874637, - -0.000005871841494808905, 0.000004377178811409976, -0.000006191140982991783, 0.000009258330464945175, - -0.000009839599442784674, 0.000008547322067897767, 0.000004629391241905978, 2.345327061448188e-7, - 0.000015179151887423359, 0.000022200847524800338, -0.000009995766049541999, -0.0000022956028260523453, - 0.000008830029400996864, 0.0000017190360495078494, 0.000019561824956326745, 0.00014721050683874637, - -0.000005871841494808905, 0.000004377178811409976, -0.000006191140982991783, 0.000009258330464945175, - -0.000009839599442784674, 0.000008547322067897767, 0.000004629391241905978, 2.345327061448188e-7, - 0.000015179151887423359, 0.000022200847524800338, -0.000009995766049541999, -0.0000022956028260523453, - 0.000008830029400996864, 0.0000017190360495078494, 0.000019561824956326745, 0.00014721050683874637, - -0.000005871841494808905, 0.000004377178811409976, -0.000006191140982991783, 0.000009258330464945175, - -0.000009839599442784674, 0.000008547322067897767, 0.000004629391241905978, 2.345327061448188e-7, - 0.000015179151887423359, 0.000022200847524800338, -0.000009995766049541999, -0.0000022956028260523453, - 0.000008830029400996864, 0.0000017190360495078494, 0.000019561824956326745, 0.00014721050683874637, - -0.000005871841494808905, 0.000004377178811409976, -0.000006191140982991783, 0.000009258330464945175, - -0.000009839599442784674, 0.000008547322067897767, 0.000004629391241905978, 2.345327061448188e-7 - ]]]]]) - - out_name = add.name.split(':')[0] + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") + conv_weights = tf.compat.v1.get_variable( + "weight", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") + add = conv + tf.constant( + [ + [ + [ + [ + [ + 0.000015179151887423359, + 0.000022200847524800338, + -0.000009995766049541999, + -0.0000022956028260523453, + 0.000008830029400996864, + 0.0000017190360495078494, + 0.000019561824956326745, + 0.00014721050683874637, + -0.000005871841494808905, + 0.000004377178811409976, + -0.000006191140982991783, + 0.000009258330464945175, + -0.000009839599442784674, + 0.000008547322067897767, + 0.000004629391241905978, + 2.345327061448188e-7, + 0.000015179151887423359, + 0.000022200847524800338, + -0.000009995766049541999, + -0.0000022956028260523453, + 0.000008830029400996864, + 0.0000017190360495078494, + 0.000019561824956326745, + 0.00014721050683874637, + -0.000005871841494808905, + 0.000004377178811409976, + -0.000006191140982991783, + 0.000009258330464945175, + -0.000009839599442784674, + 0.000008547322067897767, + 0.000004629391241905978, + 2.345327061448188e-7, + 0.000015179151887423359, + 0.000022200847524800338, + -0.000009995766049541999, + -0.0000022956028260523453, + 0.000008830029400996864, + 0.0000017190360495078494, + 0.000019561824956326745, + 0.00014721050683874637, + -0.000005871841494808905, + 0.000004377178811409976, + -0.000006191140982991783, + 0.000009258330464945175, + -0.000009839599442784674, + 0.000008547322067897767, + 0.000004629391241905978, + 2.345327061448188e-7, + 0.000015179151887423359, + 0.000022200847524800338, + -0.000009995766049541999, + -0.0000022956028260523453, + 0.000008830029400996864, + 0.0000017190360495078494, + 0.000019561824956326745, + 0.00014721050683874637, + -0.000005871841494808905, + 0.000004377178811409976, + -0.000006191140982991783, + 0.000009258330464945175, + -0.000009839599442784674, + 0.000008547322067897767, + 0.000004629391241905978, + 2.345327061448188e-7, + ] + ] + ] + ] + ] + ) + + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -287,7 +354,7 @@ def test_conv3d_add_const_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'AddV2': + if i.op == "AddV2": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -297,24 +364,26 @@ def test_conv3d_add_addn_const_relu_fusion(self): logging.getLogger().info("test_conv3d_add_addn_const_relu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 128, 64, 64, 16], name="input") top_relu = tf.nn.relu(x) - conv3d_1_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name='addv2_1') - var = tf.compat.v1.get_variable("add_y", [1,64,32,32,32], - initializer=tf.compat.v1.random_normal_initializer()) - add_2 = tf.raw_ops.AddV2(x=add_1, y=var, name='addv2_2') + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name="addv2_1") + var = tf.compat.v1.get_variable( + "add_y", [1, 64, 32, 32, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + add_2 = tf.raw_ops.AddV2(x=add_1, y=var, name="addv2_2") relu = tf.nn.relu(add_2) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -323,10 +392,10 @@ def test_conv3d_add_addn_const_relu_fusion(self): found_conv_sumadd_fusion = False found_conv_biasadd_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - if str(b'Sum') in str(i.attr['fused_ops'].list.s): + if i.op == "_FusedQuantizedConv3D": + if str(b"Sum") in str(i.attr["fused_ops"].list.s): found_conv_sumadd_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Relu']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"Relu"]): found_conv_biasadd_fusion = True self.assertEqual(found_conv_sumadd_fusion, False) self.assertEqual(found_conv_biasadd_fusion, False) @@ -338,25 +407,27 @@ def test_conv3d_add_const_addn_relu_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name='addv2_1') - conv3d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name="addv2_1") + conv3d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_2') + add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_2") relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -365,13 +436,13 @@ def test_conv3d_add_const_addn_relu_fusion(self): found_conv_sumadd_fusion = False found_conv_biasadd_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - if str(b'Sum') in str(i.attr['fused_ops'].list.s): + if i.op == "_FusedQuantizedConv3D": + if str(b"Sum") in str(i.attr["fused_ops"].list.s): found_conv_sumadd_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Relu']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"Relu"]): found_conv_biasadd_fusion = True self.assertEqual(found_conv_sumadd_fusion, True) - + @disable_random() def test_conv3d_add_addn_fusion(self): logging.getLogger().info("test_conv3d_add_addn_fusion") @@ -379,24 +450,26 @@ def test_conv3d_add_addn_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name='addv2_4') - conv3d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=tf.constant(np.random.randn(32), dtype=tf.float32), name="addv2_4") + conv3d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2') - out_name = add.name.split(':')[0] + add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -404,8 +477,11 @@ def test_conv3d_add_addn_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Requantize']: + if i.op == "_FusedQuantizedConv3D" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Sum", + b"Requantize", + ]: found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -416,28 +492,31 @@ def test_conv3d_add_addn_relu_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) - conv3d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name='addv2_1') - conv3d_3_weights = tf.compat.v1.get_variable("weight3", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name="addv2_1") + conv3d_3_weights = tf.compat.v1.get_variable( + "weight3", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_3_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_2') + add = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_2") relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -445,32 +524,37 @@ def test_conv3d_add_addn_relu_fusion(self): found_relu_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Relu', b'Requantize']: + if i.op == "_FusedQuantizedConv3D" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Sum", + b"Relu", + b"Requantize", + ]: found_relu_fusion = True self.assertEqual(found_relu_fusion, True) @disable_random() def test_conv3d_leakyrelu_fusion(self): logging.getLogger().info("test_conv3d_relu_fusion") - x = tf.compat.v1.placeholder(tf.float32, [1,64,64,64,1], name="input") + x = tf.compat.v1.placeholder(tf.float32, [1, 64, 64, 64, 1], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [4, 4, 4, 1, 64], - initializer=tf.compat.v1.random_normal_initializer()) - conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1,2,2,2,1], padding="VALID") + conv_weights = tf.compat.v1.get_variable( + "weight", [4, 4, 4, 1, 64], initializer=tf.compat.v1.random_normal_initializer() + ) + conv = tf.nn.conv3d(x_pad, conv_weights, strides=[1, 2, 2, 2, 1], padding="VALID") relu = tf.nn.leaky_relu(conv) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100,64,64,64,1), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 64, 64, 64, 1), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -478,8 +562,11 @@ def test_conv3d_leakyrelu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'LeakyRelu', b'Dequantize']: + if i.op == "_FusedQuantizedConv3D" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"LeakyRelu", + b"Dequantize", + ]: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -491,23 +578,25 @@ def test_conv3d_add_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - conv3d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name='addv2') - out_name = add.name.split(':')[0] + add = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name="addv2") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -515,8 +604,11 @@ def test_conv3d_add_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Requantize']: + if i.op == "_FusedQuantizedConv3D" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Sum", + b"Requantize", + ]: found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -527,26 +619,28 @@ def test_conv3d_add_const_addn_relu_requantize_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2_1') - conv3d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2_1") + conv3d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_2 = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_2') + add_2 = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_2") relu = tf.nn.relu(add_2) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -555,10 +649,10 @@ def test_conv3d_add_const_addn_relu_requantize_fusion(self): found_conv_sumadd_fusion = False found_conv_biasadd_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - if str(b'Sum') in str(i.attr['fused_ops'].list.s): + if i.op == "_FusedQuantizedConv3D": + if str(b"Sum") in str(i.attr["fused_ops"].list.s): found_conv_sumadd_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Relu', b'Requantize']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Sum", b"Relu", b"Requantize"]): found_conv_biasadd_fusion = True self.assertEqual(found_conv_sumadd_fusion, True) self.assertEqual(found_conv_biasadd_fusion, True) @@ -570,25 +664,27 @@ def test_conv3d_add_const_addn_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2_1') - conv3d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2_1") + conv3d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_2 = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name='addv2_2') - out_name = add_2.name.split(':')[0] + add_2 = tf.raw_ops.AddV2(x=add_1, y=conv3d_2, name="addv2_2") + out_name = add_2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -596,7 +692,7 @@ def test_conv3d_add_const_addn_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -607,29 +703,30 @@ def test_conv3d_add_no_relu_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2') + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2") pooling = tf.nn.max_pool(add, ksize=1, strides=[1, 2, 2, 2, 1], padding="SAME") - out_name = pooling.name.split(':')[0] + out_name = pooling.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -641,29 +738,30 @@ def test_conv3d_add_const_relu_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2') + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2") relu = tf.nn.relu(add) - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': + if i.op == "_FusedQuantizedConv3D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -675,34 +773,36 @@ def test_conv3d_add_const_leakyrelu_add_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - y_const = tf.constant(np.random.randn(1,1,1,1,32), dtype=tf.float32) - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name='addv2_1') + y_const = tf.constant(np.random.randn(1, 1, 1, 1, 32), dtype=tf.float32) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=y_const, name="addv2_1") relu = tf.nn.leaky_relu(add_1) - conv3d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_2 = tf.raw_ops.AddV2(x=relu, y=conv3d_2, name='addv2_2') - out_name = add_2.name.split(':')[0] + add_2 = tf.raw_ops.AddV2(x=relu, y=conv3d_2, name="addv2_2") + out_name = add_2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D': - found_conv_fusion = True + if i.op == "_FusedQuantizedConv3D": + found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @disable_random() @@ -712,27 +812,30 @@ def test_conv3d_add_addn_non_const_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv3d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_1 = tf.nn.conv3d(top_relu, conv3d_1_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - conv3d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + conv3d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_2 = tf.nn.conv3d(top_relu, conv3d_2_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name='addv2_1') - conv3d_3_weights = tf.compat.v1.get_variable("weight3", [3, 3, 3, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + add_1 = tf.raw_ops.AddV2(x=conv3d_1, y=conv3d_2, name="addv2_1") + conv3d_3_weights = tf.compat.v1.get_variable( + "weight3", [3, 3, 3, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) conv3d_3 = tf.nn.conv3d(top_relu, conv3d_3_weights, strides=[1, 2, 2, 2, 1], padding="SAME") - add = tf.raw_ops.AddV2(x=add_1, y=conv3d_3, name='addv2_2') - out_name = add.name.split(':')[0] + add = tf.raw_ops.AddV2(x=add_1, y=conv3d_3, name="addv2_2") + out_name = add.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 128, 64, 64, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 128, 64, 64, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -740,10 +843,12 @@ def test_conv3d_add_addn_non_const_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv3D' and \ - str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Sum', b'Requantize']): + if i.op == "_FusedQuantizedConv3D" and str(i.attr["fused_ops"].list.s) == str( + [b"BiasAdd", b"Sum", b"Requantize"] + ): found_conv_fusion = True self.assertEqual(found_conv_fusion, True) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py index fbe56954627..f10209114c6 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py @@ -1,19 +1,21 @@ # # -*- coding: utf-8 -*- # -import unittest +import logging import os -import yaml +import unittest + import numpy as np import tensorflow as tf -import logging - +import yaml from tensorflow.compat.v1 import graph_util from tensorflow.python.framework import function + from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -39,25 +41,24 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() class TestTensorflowQdqConvFusion(unittest.TestCase): - @classmethod def setUpClass(self): build_fake_yaml() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_fold_pad_conv(self): @@ -65,22 +66,23 @@ def test_fold_pad_conv(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed, name='op_to_store') - out_name = relu.name.split(':')[0] + relu = tf.nn.relu(normed, name="op_to_store") + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -88,7 +90,7 @@ def test_fold_pad_conv(self): found_pad = False for i in output_graph.graph_def.node: - if i.op == 'Pad': + if i.op == "Pad": found_pad = True break self.assertEqual(found_pad, False) @@ -100,23 +102,24 @@ def test_conv_relu_fusion(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") relu = tf.nn.relu(conv) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -124,7 +127,7 @@ def test_conv_relu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu': + if i.op == "Relu": found_conv_fusion = False break @@ -136,23 +139,24 @@ def test_conv_biasadd_relu6_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu6 = tf.nn.relu6(normed, name='op_to_store') + relu6 = tf.nn.relu6(normed, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -160,7 +164,7 @@ def test_conv_biasadd_relu6_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu6': + if i.op == "Relu6": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -171,26 +175,28 @@ def test_conv_biasadd_swishf32_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) @function.Defun(tf.float32, func_name="swish_f32") def swish_f32(x): return tf.nn.silu(x, beta=1.0) + swish = swish_f32(normed, name="swish_f32_output_node") - out_name = swish.name.split(':')[0] + out_name = swish.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -198,7 +204,7 @@ def swish_f32(x): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'swish_f32': + if i.op == "swish_f32": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -207,24 +213,26 @@ def swish_f32(x): def test_conv_addv2_fusion(self): logging.getLogger().info("test_conv_addv2_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv1_weights = tf.compat.v1.get_variable("weight_conv1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weight_conv1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x, conv1_weights, strides=[1, 2, 2, 1], padding="SAME") - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(x, conv2_weights, strides=[1, 2, 2, 1], padding="SAME") - sumadd = tf.raw_ops.AddV2(x=conv1, y=conv2, name='addv2') + sumadd = tf.raw_ops.AddV2(x=conv1, y=conv2, name="addv2") - out_name = sumadd.name.split(':')[0] + out_name = sumadd.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -232,7 +240,7 @@ def test_conv_addv2_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op.find('QuantizedConv2D') != -1: + if i.op.find("QuantizedConv2D") != -1: found_conv_fusion = True break @@ -243,24 +251,25 @@ def test_conv_biasadd_add_relu_fusion(self): logging.getLogger().info("test_conv_biasadd_add_relu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") - normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2,1,2,3,4,5,6,7,8,9,0,12,2,3,4])) + normed2 = tf.nn.bias_add(conv2, tf.constant([3.0, 1.2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 12, 2, 3, 4])) relu = tf.nn.relu(normed2 + tf.constant([3.0])) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -269,7 +278,7 @@ def test_conv_biasadd_add_relu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op.find('QuantizedConv2D') != -1: + if i.op.find("QuantizedConv2D") != -1: found_conv_fusion = True break @@ -282,32 +291,34 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): top_relu = tf.nn.leaky_relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.AddV2(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.AddV2(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -316,46 +327,53 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Relu', b'Requantize']: + if i.op == "_FusedQuantizedConv2D" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Sum", + b"Relu", + b"Requantize", + ]: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) - + @disable_random() def test_conv_fusion_with_last_conv(self): logging.getLogger().info("test_conv_fusion_with_last_conv") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_weights_2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights_2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(pooling, conv_weights_2, strides=[1, 2, 2, 1], padding="VALID") - conv_weights_3 = tf.compat.v1.get_variable("weight3", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights_3 = tf.compat.v1.get_variable( + "weight3", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) relu2 = tf.nn.relu(conv2) conv3 = tf.nn.conv2d(relu2, conv_weights_3, strides=[1, 2, 2, 1], padding="VALID") relu3 = tf.nn.relu(conv3) - relu6 = tf.nn.relu6(relu3, name='op_to_store') + relu6 = tf.nn.relu6(relu3, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -363,7 +381,7 @@ def test_conv_fusion_with_last_conv(self): quantize_v2_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_v2_count += 1 break @@ -376,21 +394,22 @@ def test_conv_fusion_with_max_pooling(self): relu = tf.nn.relu(x) pooling = tf.nn.max_pool(relu, ksize=1, strides=[1, 2, 2, 1], padding="SAME") - conv_weights = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(pooling, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - biasadd = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - out_name = biasadd.name.split(':')[0] + biasadd = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + out_name = biasadd.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -400,9 +419,9 @@ def test_conv_fusion_with_max_pooling(self): quantized_conv_data_type = None for i in output_graph.graph_def.node: if i.op.find("QuantizedMaxPool") != -1: - quantized_pool_data_type = i.attr['T'].type + quantized_pool_data_type = i.attr["T"].type if i.op.find("QuantizedConv2D") != -1: - quantized_conv_data_type = i.attr['Tinput'].type + quantized_conv_data_type = i.attr["Tinput"].type self.assertNotEqual(quantized_pool_data_type, None) self.assertEqual(quantized_pool_data_type, quantized_conv_data_type) @@ -414,21 +433,22 @@ def test_conv_biasadd_fusion(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") - normed = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') + normed = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") - out_name = normed.name.split(':')[0] + out_name = normed.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -436,7 +456,7 @@ def test_conv_biasadd_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'batch_normalization/FusedBatchNormV3': + if i.op == "batch_normalization/FusedBatchNormV3": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -446,22 +466,23 @@ def test_depthwiseconv_biasadd_fusion(self): logging.getLogger().info("test_depthwiseconv_biasadd_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(top_relu, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - normed = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') + normed = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") - out_name = normed.name.split(':')[0] + out_name = normed.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -469,7 +490,7 @@ def test_depthwiseconv_biasadd_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'batch_normalization/FusedBatchNormV3': + if i.op == "batch_normalization/FusedBatchNormV3": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -478,23 +499,24 @@ def test_depthwiseconv_biasadd_fusion(self): def test_conv_biasadd_relu_fusion(self): logging.getLogger().info("test_conv_biasadd_relu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu = tf.nn.relu(normed, name='op_to_store') + relu = tf.nn.relu(normed, name="op_to_store") - out_name = relu.name.split(':')[0] + out_name = relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -502,7 +524,7 @@ def test_conv_biasadd_relu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu': + if i.op == "Relu": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -511,23 +533,24 @@ def test_conv_biasadd_relu_fusion(self): def test_conv_biasadd_leakyrelu_fusion(self): logging.getLogger().info("test_conv_biasadd_leakyrelu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - leaky_relu = tf.nn.leaky_relu(normed, name='op_to_store') + leaky_relu = tf.nn.leaky_relu(normed, name="op_to_store") - out_name = leaky_relu.name.split(':')[0] + out_name = leaky_relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -535,7 +558,7 @@ def test_conv_biasadd_leakyrelu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Leaky_Relu': + if i.op == "Leaky_Relu": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -544,23 +567,24 @@ def test_conv_biasadd_leakyrelu_fusion(self): def test_depthwiseconv_biasadd_relu6_fusion(self): logging.getLogger().info("test_depthwiseconv_biasadd_relu6_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.compat.v1.nn.depthwise_conv2d_native(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu6 = tf.nn.relu6(normed, name='op_to_store') + relu6 = tf.nn.relu6(normed, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -568,7 +592,7 @@ def test_depthwiseconv_biasadd_relu6_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu6': + if i.op == "Relu6": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -577,23 +601,24 @@ def test_depthwiseconv_biasadd_relu6_fusion(self): def test_depthwiseconv_biasadd_relu_fusion(self): logging.getLogger().info("test_depthwiseconv_biasadd_relu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.compat.v1.nn.depthwise_conv2d_native(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - relu6 = tf.nn.relu6(normed, name='op_to_store') + relu6 = tf.nn.relu6(normed, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -601,7 +626,7 @@ def test_depthwiseconv_biasadd_relu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu': + if i.op == "Relu": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -613,27 +638,30 @@ def test_conv_single_fusion(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv1_weights = tf.compat.v1.get_variable("weight_conv1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weight_conv1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x_pad, conv1_weights, strides=[1, 2, 2, 1], padding="VALID") - matmul_weights = tf.compat.v1.get_variable("weight_matmul", [1, 28, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + matmul_weights = tf.compat.v1.get_variable( + "weight_matmul", [1, 28, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) matmul = tf.matmul(conv1, matmul_weights) - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [7, 7, 32, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [7, 7, 32, 1], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(matmul, conv2_weights, strides=[1, 2, 2, 1], padding="VALID") - leaky_relu = tf.nn.leaky_relu(conv2, name='op_to_store') + leaky_relu = tf.nn.leaky_relu(conv2, name="op_to_store") - out_name = leaky_relu.name.split(':')[0] + out_name = leaky_relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -642,8 +670,8 @@ def test_conv_single_fusion(self): find_single_qconv = [] for i in output_graph.graph_def.node: # BatchMatMul Quantization disabled - if i.op == '_FusedQuantizedConv2D': - find_single_qconv.append(i.attr['fused_ops'].list.s == [b'Requantize']) + if i.op == "_FusedQuantizedConv2D": + find_single_qconv.append(i.attr["fused_ops"].list.s == [b"Requantize"]) self.assertEqual(find_single_qconv, [False, False]) @@ -654,8 +682,9 @@ def test_conv_fusion_with_last_matmul(self): top_relu = tf.nn.relu(x) # paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) # x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(top_relu, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) @@ -666,25 +695,25 @@ def test_conv_fusion_with_last_matmul(self): y_data = np.random.random([3136, 1]) y = tf.constant(y_data, dtype=tf.float32, shape=[3136, 1]) - z = tf.raw_ops.MatMul(a=reshape, b=y, name='matmul_1') + z = tf.raw_ops.MatMul(a=reshape, b=y, name="matmul_1") relu1 = tf.nn.relu(z) y_data_1 = np.random.random([1, 1]) y_1 = tf.constant(y_data_1, dtype=tf.float32, shape=[1, 1]) - z_2nd_matmul = tf.raw_ops.MatMul(a=relu1, b=y_1, name='matmul_2') - relu6 = tf.nn.relu6(z_2nd_matmul, name='op_to_store') + z_2nd_matmul = tf.raw_ops.MatMul(a=relu1, b=y_1, name="matmul_2") + relu6 = tf.nn.relu6(z_2nd_matmul, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -692,7 +721,7 @@ def test_conv_fusion_with_last_matmul(self): quantize_v2_count = 0 for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2': + if i.op == "QuantizeV2": quantize_v2_count += 1 break @@ -705,33 +734,35 @@ def test_conv2d_add_const_leakyrelu_add_fusion(self): paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(x, paddings, "CONSTANT") top_relu = tf.nn.relu(x_pad) - conv2d_1_weights = tf.compat.v1.get_variable("weight1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2d_1_weights = tf.compat.v1.get_variable( + "weight1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2d_1 = tf.nn.conv2d(top_relu, conv2d_1_weights, strides=[1, 2, 2, 1], padding="SAME") y_const = tf.constant(np.random.randn(16), dtype=tf.float32) - add_1 = tf.raw_ops.AddV2(x=conv2d_1, y=y_const, name='addv2_1') + add_1 = tf.raw_ops.AddV2(x=conv2d_1, y=y_const, name="addv2_1") relu = tf.nn.leaky_relu(add_1) - conv2d_2_weights = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2d_2_weights = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2d_2 = tf.nn.conv2d(top_relu, conv2d_2_weights, strides=[1, 2, 2, 1], padding="SAME") - add_2 = tf.raw_ops.AddV2(x=relu, y=conv2d_2, name='addv2_2') - out_name = add_2.name.split(':')[0] + add_2 = tf.raw_ops.AddV2(x=relu, y=conv2d_2, name="addv2_2") + out_name = add_2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def output_graph = quantizer.fit() found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D': + if i.op == "_FusedQuantizedConv2D": found_conv_fusion = True self.assertEqual(found_conv_fusion, True) @@ -739,23 +770,24 @@ def test_conv2d_add_const_leakyrelu_add_fusion(self): def test_depthwiseconv_biasadd_leakyrelu_fusion(self): logging.getLogger().info("test_depthwiseconv_biasadd_leakyrelu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.compat.v1.nn.depthwise_conv2d_native(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - leaky_relu = tf.nn.leaky_relu(normed, name='op_to_store') + leaky_relu = tf.nn.leaky_relu(normed, name="op_to_store") - out_name = leaky_relu.name.split(':')[0] + out_name = leaky_relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -763,7 +795,7 @@ def test_depthwiseconv_biasadd_leakyrelu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Relu': + if i.op == "Relu": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -775,32 +807,34 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_2(self): top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) # relu = tf.nn.relu(normed) - conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights2 = tf.compat.v1.get_variable( + "weight2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") normed2 = tf.compat.v1.layers.batch_normalization(conv2) # relu2 = tf.nn.relu(normed2) - add = tf.raw_ops.AddV2(x=normed, y=normed2, name='addv2') + add = tf.raw_ops.AddV2(x=normed, y=normed2, name="addv2") relu = tf.nn.relu(add) - relu6 = tf.nn.relu6(relu, name='op_to_store') + relu6 = tf.nn.relu6(relu, name="op_to_store") - out_name = relu6.name.split(':')[0] + out_name = relu6.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -809,8 +843,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_2(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Requantize']: + if i.op == "_FusedQuantizedConv2D" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Requantize"]: found_conv_fusion = True break @@ -820,23 +853,24 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_2(self): @disable_random() def test_conv_biasadd_elu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - elu = tf.nn.elu(normed, name='op_to_store') + elu = tf.nn.elu(normed, name="op_to_store") - out_name = elu.name.split(':')[0] + out_name = elu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -844,7 +878,7 @@ def test_conv_biasadd_elu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Elu': + if i.op == "Elu": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) @@ -853,23 +887,24 @@ def test_conv_biasadd_elu_fusion(self): @disable_random() def test_conv_biasadd_sigmoid_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="VALID") normed = tf.compat.v1.layers.batch_normalization(conv) - sigmoid = tf.math.sigmoid(normed, name='op_to_store') + sigmoid = tf.math.sigmoid(normed, name="op_to_store") - out_name = sigmoid.name.split(':')[0] + out_name = sigmoid.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -877,10 +912,11 @@ def test_conv_biasadd_sigmoid_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'Sigmoid': + if i.op == "Sigmoid": found_conv_fusion = False break self.assertEqual(found_conv_fusion, True) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_depthwiseconv_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_depthwiseconv_fusion.py index d94656599a4..32f1650068e 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_depthwiseconv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_depthwiseconv_fusion.py @@ -1,23 +1,21 @@ # # -*- coding: utf-8 -*- # -import unittest import os -import yaml +import unittest + import numpy as np import tensorflow as tf -from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import tensor_util -from tensorflow.python.framework import dtypes +import yaml from tensorflow.compat.v1 import graph_util +from tensorflow.core.framework import attr_value_pb2, graph_pb2, node_def_pb2 +from tensorflow.python.framework import dtypes, tensor_util from neural_compressor.adaptor.tf_utils.util import disable_random def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -43,112 +41,130 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() + def build_conv2d_biasadd_add_relu6_mul_mul(): input_node = node_def_pb2.NodeDef() input_node.name = "input" input_node.op = "Placeholder" - input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_weight_node = node_def_pb2.NodeDef() conv1_weight_node.name = "conv1_weights" conv1_weight_node.op = "Const" - conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) - conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + conv1_weight_value = np.float32(np.abs(np.random.randn(3, 3, 3, 32))) + conv1_weight_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape + ) + ) + ) conv1_node = node_def_pb2.NodeDef() conv1_node.name = "conv1" conv1_node.op = "Conv2D" - conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( - type=dtypes.float32.as_datatype_enum)) + conv1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) conv1_node.input.extend([input_node.name, conv1_weight_node.name]) - conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( - list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) - conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) - conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + conv1_node.attr["strides"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["dilations"].CopyFrom( + attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(i=[1, 1, 1, 1])) + ) + conv1_node.attr["padding"].CopyFrom(attr_value_pb2.AttrValue(s=b"SAME")) + conv1_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) bias_node = node_def_pb2.NodeDef() bias_node.name = "conv1_bias" bias_node.op = "Const" bias_value = np.float32(np.abs(np.random.randn(32))) - bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - bias_value, bias_value.dtype.type, bias_value.shape))) + bias_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(bias_value, bias_value.dtype.type, bias_value.shape) + ) + ) bias_add_node = node_def_pb2.NodeDef() bias_add_node.name = "conv1_bias_add" bias_add_node.op = "BiasAdd" - bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) - bias_add_node.input.extend([conv1_node.name, bias_node.name]) - + bias_add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.attr["data_format"].CopyFrom(attr_value_pb2.AttrValue(s=b"NHWC")) + bias_add_node.input.extend([conv1_node.name, bias_node.name]) + offset_node = node_def_pb2.NodeDef() offset_node.name = "offset" offset_node.op = "Const" offset_value = np.float32(np.abs(np.random.randn(1))) - offset_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - offset_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - offset_value, offset_value.dtype.type, offset_value.shape))) - + offset_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + offset_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(offset_value, offset_value.dtype.type, offset_value.shape) + ) + ) + add_node = node_def_pb2.NodeDef() add_node.op = "Add" add_node.name = "add/hard_swish" - add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - add_node.input.extend([bias_add_node.name, offset_node.name]) - + add_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + add_node.input.extend([bias_add_node.name, offset_node.name]) + relu_node = node_def_pb2.NodeDef() relu_node.op = "Relu6" relu_node.name = "relu6/hard_swish" - relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - relu_node.input.extend([add_node.name]) - + relu_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.input.extend([add_node.name]) + mul_node = node_def_pb2.NodeDef() mul_node.op = "Mul" mul_node.name = "mul/hard_swish" - mul_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - mul_node.input.extend([bias_add_node.name, relu_node.name]) - + mul_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + mul_node.input.extend([bias_add_node.name, relu_node.name]) + offset1_node = node_def_pb2.NodeDef() offset1_node.name = "mul1_offset" offset1_node.op = "Const" offset1_value = np.float32(np.abs(np.random.randn(1))) - offset1_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - offset1_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( - offset1_value, offset1_value.dtype.type, offset1_value.shape))) - + offset1_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + offset1_node.attr["value"].CopyFrom( + attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto(offset1_value, offset1_value.dtype.type, offset1_value.shape) + ) + ) + mul1_node = node_def_pb2.NodeDef() mul1_node.op = "Mul" mul1_node.name = "mul1/hard_swish" - mul1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) - mul1_node.input.extend([mul_node.name,offset1_node.name]) - + mul1_node.attr["T"].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + mul1_node.input.extend([mul_node.name, offset1_node.name]) + test_graph = graph_pb2.GraphDef() - test_graph.node.extend([input_node, - conv1_weight_node, - conv1_node, - bias_node, - bias_add_node, - add_node, - relu_node, - offset_node, - offset1_node, - mul_node, - mul1_node - ]) + test_graph.node.extend( + [ + input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + add_node, + relu_node, + offset_node, + offset1_node, + mul_node, + mul1_node, + ] + ) return test_graph - + + class TestConv2DBiasAddAddReluFusion(unittest.TestCase): @classmethod def setUpClass(self): @@ -156,26 +172,27 @@ def setUpClass(self): @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_single_depthwiseconv2d_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - out_name = conv.name.split(':')[0] + out_name = conv.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -184,9 +201,9 @@ def test_single_depthwiseconv2d_fusion(self): found_dequantize_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedDepthwiseConv2D': + if i.op == "_FusedQuantizedDepthwiseConv2D": found_conv_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'Dequantize']): + if str(i.attr["fused_ops"].list.s) == str([b"Dequantize"]): found_dequantize_fusion = True self.assertEqual(found_conv_fusion, True) self.assertEqual(found_dequantize_fusion, True) @@ -194,23 +211,24 @@ def test_single_depthwiseconv2d_fusion(self): @disable_random() def test_depthwiseconv2d_biasadd_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - normed = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - out_name = normed.name.split(':')[0] + normed = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + out_name = normed.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -219,9 +237,9 @@ def test_depthwiseconv2d_biasadd_fusion(self): found_dequantize_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedDepthwiseConv2D': + if i.op == "_FusedQuantizedDepthwiseConv2D": found_conv_fusion = True - if str(i.attr['fused_ops'].list.s) == str([b'BiasAdd', b'Dequantize']): + if str(i.attr["fused_ops"].list.s) == str([b"BiasAdd", b"Dequantize"]): found_dequantize_fusion = True self.assertEqual(found_conv_fusion, True) self.assertEqual(found_dequantize_fusion, True) @@ -231,8 +249,9 @@ def test_depthwiseconv2dnative_biasadd_add_relu6_mul_mul_fusion(self): output_graph_def = build_conv2d_biasadd_add_relu6_mul_mul() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 224, 224, 3), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 224, 224, 3), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -241,7 +260,7 @@ def test_depthwiseconv2dnative_biasadd_add_relu6_mul_mul_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedConv2D': + if i.op == "_FusedQuantizedConv2D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) @@ -249,25 +268,26 @@ def test_depthwiseconv2dnative_biasadd_add_relu6_mul_mul_fusion(self): @disable_random() def test_depthwiseconv2d_biasadd_leakyrelu_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.depthwise_conv2d(x, conv_weights, strides=[1, 1, 1, 1], padding="VALID") - normed = tf.compat.v1.layers.batch_normalization(conv, name='op_to_store') - + normed = tf.compat.v1.layers.batch_normalization(conv, name="op_to_store") + leakyrelu = tf.nn.leaky_relu(normed) - out_name = leakyrelu.name.split(':')[0] + out_name = leakyrelu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -275,11 +295,12 @@ def test_depthwiseconv2d_biasadd_leakyrelu_fusion(self): found_conv_fusion = False for i in output_graph.graph_def.node: - if i.op == '_FusedQuantizedDepthwiseConv2D': + if i.op == "_FusedQuantizedDepthwiseConv2D": found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) - + + if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_matmul_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_matmul_fusion.py index 165061742e6..0040c4d53a5 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_matmul_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_matmul_fusion.py @@ -3,15 +3,18 @@ # import os import unittest -import yaml + import numpy as np import tensorflow.compat.v1 as tf +import yaml from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -38,9 +41,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -49,12 +52,13 @@ class TestGraphMatMulFusion(unittest.TestCase): @classmethod def setUpClass(self): build_fake_yaml() - self.op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml")).get_eightbit_patterns(True) + self.op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml") + ).get_eightbit_patterns(True) @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_matmul_biasadd_relu_requantize_fusion(self): @@ -62,27 +66,31 @@ def test_matmul_biasadd_relu_requantize_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.relu(z, name='op_to_store') + z = tf.nn.relu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Relu', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Relu", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -91,19 +99,20 @@ def test_matmul_biasadd_relu_requantize_fusion(self): def test_first_matmul_biasadd_relu_fusion(self): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.relu(z, name='op_to_store') + z = tf.nn.relu(z, name="op_to_store") with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def @@ -111,73 +120,79 @@ def test_first_matmul_biasadd_relu_fusion(self): found_quantized_matmul = False for i in output_graph.graph_def.node: - if i.op == 'QuantizeV2' and i.name == 'MatMul_eightbit_quantize_x' and i.attr["T"].type == dtypes.quint8: + if ( + i.op == "QuantizeV2" + and i.name == "MatMul_eightbit_quantize_x" + and i.attr["T"].type == dtypes.quint8 + ): found_quantized_matmul = True break - + self.assertEqual(found_quantized_matmul, True) @disable_random() def test_matmul_biasadd_requantize_dequantize_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.identity(z, name='op_to_store') + z = tf.identity(z, name="op_to_store") found_quantized_matmul = False - + with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr["fused_ops"].list.s == [b'BiasAdd', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Dequantize"]: found_quantized_matmul = True break - + self.assertEqual(found_quantized_matmul, True) @disable_random() def test_matmul_biasadd_requantize_dequantize_last_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.nn.bias_add(z, [1, 2], name='op_to_store') + z = tf.nn.bias_add(z, [1, 2], name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and i.name == 'op_to_store' and \ - i.attr["fused_ops"].list.s == [b'BiasAdd', b'Dequantize']: + if ( + i.op == "_QuantizedMatMul" + and i.name == "op_to_store" + and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Dequantize"] + ): found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -186,13 +201,12 @@ def test_matmul_biasadd_requantize_dequantize_last_fusion(self): def test_matmul_fusion_with_transpose_b_true(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - z = tf.matmul(x, y, name='no_quant_matmul', transpose_b=True) - z = tf.nn.relu6(z, name='op_to_store') + z = tf.matmul(x, y, name="no_quant_matmul", transpose_b=True) + z = tf.nn.relu6(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: @@ -200,30 +214,30 @@ def test_matmul_fusion_with_transpose_b_true(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul': + if i.op == "_QuantizedMatMul": found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) - + @disable_random() def test_matmul_dummybiasadd_relu6_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - z = tf.matmul(x, y, name='quant_matmul') - z = tf.nn.relu6(z, name='op_to_store') + z = tf.matmul(x, y, name="quant_matmul") + z = tf.nn.relu6(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: @@ -231,15 +245,16 @@ def test_matmul_dummybiasadd_relu6_fusion(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and i.name == 'op_to_store': + if i.op == "_QuantizedMatMul" and i.name == "op_to_store": found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -248,15 +263,14 @@ def test_matmul_dummybiasadd_relu6_fusion(self): def test_matmul_with_reshape_transpose(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) transpose = tf.transpose(y, perm=[1, 0]) reshape = tf.reshape(transpose, [2, 2]) - z = tf.matmul(x, reshape, name='no_quant_matmul') - z = tf.nn.bias_add(z, [1, 2], name='op_to_store') + z = tf.matmul(x, reshape, name="no_quant_matmul") + z = tf.nn.bias_add(z, [1, 2], name="op_to_store") found_quantized_matmul = True with tf.Session() as sess: @@ -264,14 +278,15 @@ def test_matmul_with_reshape_transpose(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) @@ -282,12 +297,12 @@ def test_matmul_with_add(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) transpose = tf.transpose(y, perm=[1, 0]) reshape = tf.reshape(transpose, [2, 2]) - z = tf.matmul(x, reshape, name='no_quant_matmul') - z = tf.math.add(z, [1, 2], name='op_to_store') + z = tf.matmul(x, reshape, name="no_quant_matmul") + z = tf.math.add(z, [1, 2], name="op_to_store") found_quantized_matmul = True with tf.Session() as sess: @@ -295,14 +310,15 @@ def test_matmul_with_add(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) @@ -311,10 +327,9 @@ def test_matmul_with_add(self): def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) biasadd = tf.nn.bias_add(z, [1, 2]) @@ -325,7 +340,7 @@ def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): biasadd2 = tf.nn.bias_add(matmul1, [1, 1]) - z = tf.nn.softmax(biasadd2, name='op_to_store') + z = tf.nn.softmax(biasadd2, name="op_to_store") found_quantized_matmul = False if tf.version.VERSION < "2.2.0": found_quantized_matmul = False @@ -335,16 +350,17 @@ def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() - count=0 + count = 0 for i in output_graph.model.as_graph_def().node: - if i.op == '_QuantizedMatMul': + if i.op == "_QuantizedMatMul": count += 1 found_quantized_matmul = bool(count > 1) self.assertEqual(found_quantized_matmul, False) @@ -352,10 +368,9 @@ def test_matmul_biasadd_requantize_dequantize_fusion_with_softmax(self): def test_matmul_biasadd_relu_non_const_weight(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') - y = tf.matmul(x, x, name='quant_matmul_non_const_weight') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") + y = tf.matmul(x, x, name="quant_matmul_non_const_weight") biasadd = tf.nn.bias_add(y, [1, 2]) z = tf.nn.relu(biasadd) found_quantized_matmul = True @@ -365,15 +380,16 @@ def test_matmul_biasadd_relu_non_const_weight(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) @@ -381,10 +397,9 @@ def test_matmul_biasadd_relu_non_const_weight(self): def test_matmul_biasadd_non_const_weight(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') - y = tf.matmul(x, x, name='quant_matmul_non_const_weight') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") + y = tf.matmul(x, x, name="quant_matmul_non_const_weight") z = tf.nn.bias_add(y, [1, 2]) found_quantized_matmul = True @@ -393,15 +408,16 @@ def test_matmul_biasadd_non_const_weight(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) @@ -410,13 +426,12 @@ def test_matmul_biasadd_non_const_weight(self): def test_matmul_with_dummy_biasadd(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - z = tf.matmul(x, y, name='no_quant_matmul') - z = tf.identity(z, name='op_to_store') + z = tf.matmul(x, y, name="no_quant_matmul") + z = tf.identity(z, name="op_to_store") found_quantized_matmul = True with tf.Session() as sess: @@ -424,15 +439,16 @@ def test_matmul_with_dummy_biasadd(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == 'MatMul': + if i.op == "MatMul": found_quantized_matmul = False break self.assertEqual(found_quantized_matmul, True) @@ -441,22 +457,22 @@ def test_matmul_with_dummy_biasadd(self): def test_first_matmul_addv2_relu_fusion(self): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) a = tf.matmul(x, y) b = tf.matmul(x, y) c = tf.nn.relu(b) - add = tf.raw_ops.AddV2(x=a, y=c, name='addv2') - z = tf.nn.relu(add, name='op_to_store') + add = tf.raw_ops.AddV2(x=a, y=c, name="addv2") + z = tf.nn.relu(add, name="op_to_store") with tf.Session() as sess: - sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def @@ -464,10 +480,10 @@ def test_first_matmul_addv2_relu_fusion(self): found_quantized_matmul = False for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul': + if i.op == "_QuantizedMatMul": found_quantized_matmul = True break - + self.assertEqual(found_quantized_matmul, True) # batchmatmul quantization disabled temporarily for its bad performance @@ -602,27 +618,31 @@ def test_matmul_biasadd_relu6_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.relu6(z, name='op_to_store') + z = tf.nn.relu6(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Relu6', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Relu6", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -633,58 +653,66 @@ def test_matmul_biasadd_leakyrelu_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.leaky_relu(z, name='op_to_store') + z = tf.nn.leaky_relu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'LeakyRelu', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"LeakyRelu", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) - + @disable_random() def test_matmul_biasadd_geluapproximate_fusion(self): g = tf.Graph() with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.gelu(z, approximate=True, name='op_to_store') + z = tf.nn.gelu(z, approximate=True, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'GeluApproximate', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"GeluApproximate", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -695,27 +723,31 @@ def test_matmul_biasadd_geluexact_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.gelu(z, name='op_to_store') + z = tf.nn.gelu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'GeluExact', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"GeluExact", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -726,27 +758,27 @@ def test_matmul_biasadd_elu_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.nn.elu(z, name='op_to_store') + z = tf.nn.elu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Elu', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Elu", b"Dequantize"]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -757,27 +789,31 @@ def test_matmul_biasadd_tanh_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.math.tanh(z, name='op_to_store') + z = tf.math.tanh(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Tanh', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Tanh", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -788,27 +824,31 @@ def test_matmul_biasadd_sigmoid_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.math.sigmoid(z, name='op_to_store') + z = tf.math.sigmoid(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sigmoid', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Sigmoid", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -819,10 +859,10 @@ def test_matmul_dummy_biasadd_relu_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) - z = tf.matmul(x, y, name='quant_matmul') - z = tf.nn.relu(z, name='op_to_store') + z = tf.matmul(x, y, name="quant_matmul") + z = tf.nn.relu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: @@ -830,16 +870,20 @@ def test_matmul_dummy_biasadd_relu_fusion(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Relu', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Relu", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -850,26 +894,30 @@ def test_matmul_dummy_biasadd_relu6_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.nn.relu6(z, name='op_to_store') + z = tf.nn.relu6(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Relu6', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Relu6", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -880,26 +928,30 @@ def test_matmul_dummy_biasadd_leakyrelu_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.nn.leaky_relu(z, name='op_to_store') + z = tf.nn.leaky_relu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'LeakyRelu', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"LeakyRelu", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -910,26 +962,30 @@ def test_matmul_dummy_biasadd_geluapproximate_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.nn.gelu(z, approximate=True, name='op_to_store') + z = tf.nn.gelu(z, approximate=True, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'GeluApproximate', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"GeluApproximate", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -940,26 +996,30 @@ def test_matmul_dummy_biasadd_geluexact_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.nn.gelu(z, approximate=False, name='op_to_store') + z = tf.nn.gelu(z, approximate=False, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'GeluExact', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"GeluExact", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -970,26 +1030,26 @@ def test_matmul_dummy_biasadd_elu_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.nn.elu(z, name='op_to_store') + z = tf.nn.elu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Elu', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Elu", b"Dequantize"]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -1000,26 +1060,30 @@ def test_matmul_dummy_biasadd_tanh_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.math.tanh(z, name='op_to_store') + z = tf.math.tanh(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Tanh', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Tanh", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -1030,26 +1094,30 @@ def test_matmul_dummy_biasadd_sigmoid_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) - z = tf.math.sigmoid(z, name='op_to_store') + z = tf.math.sigmoid(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sigmoid', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [ + b"BiasAdd", + b"Sigmoid", + b"Dequantize", + ]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -1060,12 +1128,12 @@ def test_matmul_add_const_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) transpose = tf.transpose(y, perm=[1, 0]) reshape = tf.reshape(transpose, [2, 2]) - z = tf.matmul(x, reshape, name='quant_matmul') - z = tf.math.add(z, [1, 2], name='op_to_store') + z = tf.matmul(x, reshape, name="quant_matmul") + z = tf.math.add(z, [1, 2], name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: @@ -1073,16 +1141,16 @@ def test_matmul_add_const_fusion(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Dequantize"]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -1093,13 +1161,13 @@ def test_matmul_add_non_const_fusion(self): with g.as_default(): x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) transpose = tf.transpose(y, perm=[1, 0]) reshape = tf.reshape(transpose, [2, 2]) - z = tf.matmul(x, reshape, name='quant_matmul') - z = tf.math.add(z, x, name='addv2') - z = tf.nn.relu(z, name='op_to_store') + z = tf.matmul(x, reshape, name="quant_matmul") + z = tf.math.add(z, x, name="addv2") + z = tf.nn.relu(z, name="op_to_store") found_quantized_matmul = False with tf.Session() as sess: @@ -1107,16 +1175,16 @@ def test_matmul_add_non_const_fusion(self): float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [b"Dequantize"]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -1125,31 +1193,30 @@ def test_matmul_add_non_const_fusion(self): def test_matmul_biasadd_add_const_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.math.add(z, [1, 2], name='op_to_store') + z = tf.math.add(z, [1, 2], name="op_to_store") found_quantized_matmul = False - + with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Dequantize"]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) @@ -1158,36 +1225,34 @@ def test_matmul_biasadd_add_const_fusion(self): def test_matmul_biasadd_add_non_const_fusion(self): g = tf.Graph() with g.as_default(): - x_data = np.array([[0.1, 0.2], [0.2, 0.3]]) y_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - x = tf.placeholder(tf.float32, shape=[2, 2], name='x') + x = tf.placeholder(tf.float32, shape=[2, 2], name="x") y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2]) z = tf.matmul(x, y) z = tf.nn.bias_add(z, [1, 2]) - z = tf.math.add(z, x, name='op_to_store') + z = tf.math.add(z, x, name="op_to_store") found_quantized_matmul = False - + with tf.Session() as sess: sess.run(z, feed_dict={x: x_data, y: y_data}) float_graph_def = sess.graph.as_graph_def() from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(2, 2), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(2, 2), label=True) quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=2) quantizer.model = float_graph_def output_graph = quantizer.fit() for i in output_graph.graph_def.node: - if i.op == '_QuantizedMatMul' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Dequantize']: + if i.op == "_QuantizedMatMul" and i.attr["fused_ops"].list.s == [b"BiasAdd", b"Dequantize"]: found_quantized_matmul = True break self.assertEqual(found_quantized_matmul, True) -if __name__ == '__main__': - unittest.main() - +if __name__ == "__main__": + unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_new_conv_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_new_conv_fusion.py index 15fa0a6a7fd..806171b7c56 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_new_conv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_new_conv_fusion.py @@ -1,18 +1,20 @@ # # -*- coding: utf-8 -*- # -import unittest -import os -import yaml -import tensorflow as tf import logging +import os +import unittest +import tensorflow as tf +import yaml from tensorflow.compat.v1 import graph_util from tensorflow.python.framework import function + from neural_compressor.adaptor.tf_utils.util import disable_random + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -39,50 +41,51 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() class TestTensorflowNewQdqConvFusion(unittest.TestCase): - @classmethod def setUpClass(self): build_fake_yaml() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_conv_biasadd_add_leakyrelu_fusion(self): logging.getLogger().info("test_conv_biasadd_add_leakyrelu_fusion") x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME") normed = tf.compat.v1.layers.batch_normalization(conv) - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(x, conv2_weights, strides=[1, 2, 2, 1], padding="SAME") - sumadd = tf.raw_ops.AddV2(x=normed, y=conv2, name='addv2') - leaky_relu = tf.nn.leaky_relu(sumadd, name='op_to_store') + sumadd = tf.raw_ops.AddV2(x=normed, y=conv2, name="addv2") + leaky_relu = tf.nn.leaky_relu(sumadd, name="op_to_store") - out_name = leaky_relu.name.split(':')[0] + out_name = leaky_relu.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -90,40 +93,43 @@ def test_conv_biasadd_add_leakyrelu_fusion(self): found_conv_fusion = True for i in output_graph.graph_def.node: - if i.op == 'LeakyRelu': + if i.op == "LeakyRelu": found_conv_fusion = False break self.assertEqual(found_conv_fusion, False) @disable_random() def test_resizebilinear_bf16_input(self): - os.environ['FORCE_BF16'] = '1' + os.environ["FORCE_BF16"] = "1" x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") top_relu = tf.nn.relu(x) paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) x_pad = tf.pad(top_relu, paddings, "CONSTANT") - conv1_weights = tf.compat.v1.get_variable("weight_conv1", [3, 3, 16, 16], - initializer=tf.compat.v1.random_normal_initializer()) + conv1_weights = tf.compat.v1.get_variable( + "weight_conv1", [3, 3, 16, 16], initializer=tf.compat.v1.random_normal_initializer() + ) conv1 = tf.nn.conv2d(x_pad, conv1_weights, strides=[1, 2, 2, 1], padding="VALID") - matmul_weights = tf.compat.v1.get_variable("weight_matmul", [1, 28, 16, 32], - initializer=tf.compat.v1.random_normal_initializer()) + matmul_weights = tf.compat.v1.get_variable( + "weight_matmul", [1, 28, 16, 32], initializer=tf.compat.v1.random_normal_initializer() + ) matmul = tf.linalg.matmul(conv1, matmul_weights) - conv2_weights = tf.compat.v1.get_variable("weight_conv2", [7, 7, 32, 1], - initializer=tf.compat.v1.random_normal_initializer()) + conv2_weights = tf.compat.v1.get_variable( + "weight_conv2", [7, 7, 32, 1], initializer=tf.compat.v1.random_normal_initializer() + ) conv2 = tf.nn.conv2d(matmul, conv2_weights, strides=[1, 2, 2, 1], padding="VALID") - leaky_relu = tf.nn.leaky_relu(conv2, name='op_to_store') - resize_bili1 = tf.raw_ops.ResizeBilinear(images=leaky_relu, size=(28,28)) - resize_bili2 = tf.raw_ops.ResizeBilinear(images=resize_bili1, size=(14,14)) - out_name = resize_bili2.name.split(':')[0] + leaky_relu = tf.nn.leaky_relu(conv2, name="op_to_store") + resize_bili1 = tf.raw_ops.ResizeBilinear(images=leaky_relu, size=(28, 28)) + resize_bili2 = tf.raw_ops.ResizeBilinear(images=resize_bili1, size=(14, 14)) + out_name = resize_bili2.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 56, 56, 16), label=True) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -131,9 +137,10 @@ def test_resizebilinear_bf16_input(self): cast_counter = 0 for i in output_graph.graph_def.node: - if i.op == 'Cast': + if i.op == "Cast": cast_counter += 1 self.assertEqual(cast_counter, 2) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_pooling_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_pooling_fusion.py index cd65f4aec4c..e50e0e0e651 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_pooling_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_pooling_fusion.py @@ -3,16 +3,19 @@ # import os import unittest -import yaml + import numpy as np import tensorflow.compat.v1 as tf +import yaml +from tensorflow.compat.v1 import graph_util from tensorflow.python.framework import dtypes + from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.util import disable_random -from tensorflow.compat.v1 import graph_util + def build_fake_yaml(): - fake_yaml = ''' + fake_yaml = """ model: name: fake_yaml framework: tensorflow @@ -32,9 +35,9 @@ def build_fake_yaml(): performance_only: True workspace: path: saved - ''' + """ y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: yaml.dump(y, f) f.close() @@ -43,43 +46,43 @@ class TestGraphQDQPoolingFusion(unittest.TestCase): @classmethod def setUpClass(self): build_fake_yaml() - self.op_wise_sequences = TensorflowQuery(local_config_file=os.path.join( - os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml")).get_eightbit_patterns(True) + self.op_wise_sequences = TensorflowQuery( + local_config_file=os.path.join(os.path.dirname(__file__), "../../neural_compressor/adaptor/tensorflow.yaml") + ).get_eightbit_patterns(True) @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') + os.remove("fake_yaml.yaml") @disable_random() def test_qdq_maxpool_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [1], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [1], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) relu2 = tf.nn.relu(relu) - pool = tf.nn.max_pool(relu2, ksize=1, strides=[1, 2, 2, 1], name='maxpool', padding="SAME") - conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + pool = tf.nn.max_pool(relu2, ksize=1, strides=[1, 2, 2, 1], name="maxpool", padding="SAME") + conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) x = tf.nn.relu(conv_bias) - final_node = tf.nn.relu(x, name='op_to_store') + final_node = tf.nn.relu(x, name="op_to_store") - out_name = final_node.name.split(':')[0] + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -87,41 +90,40 @@ def test_qdq_maxpool_fusion(self): found_quantized_maxpool = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedMaxPool': + if i.op == "QuantizedMaxPool": found_quantized_maxpool = True break self.assertEqual(found_quantized_maxpool, True) - + @disable_random() def test_qdq_avgpool_fusion(self): x = tf.compat.v1.placeholder(tf.float32, [1, 30, 30, 1], name="input") - conv_weights = tf.compat.v1.get_variable("weight", [2, 2, 1, 1], - initializer=tf.compat.v1.random_normal_initializer()) - conv_bias = tf.compat.v1.get_variable("bias", [1], - initializer=tf.compat.v1.random_normal_initializer()) + conv_weights = tf.compat.v1.get_variable( + "weight", [2, 2, 1, 1], initializer=tf.compat.v1.random_normal_initializer() + ) + conv_bias = tf.compat.v1.get_variable("bias", [1], initializer=tf.compat.v1.random_normal_initializer()) x = tf.nn.relu(x) - conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + conv = tf.nn.conv2d(x, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") normed = tf.compat.v1.layers.batch_normalization(conv) relu = tf.nn.relu(normed) relu2 = tf.nn.relu(relu) - pool = tf.nn.avg_pool(relu2, ksize=1, strides=[1, 2, 2, 1], name='avgpool', padding="SAME") - conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name='last') + pool = tf.nn.avg_pool(relu2, ksize=1, strides=[1, 2, 2, 1], name="avgpool", padding="SAME") + conv1 = tf.nn.conv2d(pool, conv_weights, strides=[1, 2, 2, 1], padding="SAME", name="last") conv_bias = tf.nn.bias_add(conv1, conv_bias) x = tf.nn.relu(conv_bias) - final_node = tf.nn.relu(x, name='op_to_store') + final_node = tf.nn.relu(x, name="op_to_store") - out_name = final_node.name.split(':')[0] + out_name = final_node.name.split(":")[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) output_graph_def = graph_util.convert_variables_to_constants( - sess=sess, - input_graph_def=sess.graph_def, - output_node_names=[out_name]) + sess=sess, input_graph_def=sess.graph_def, output_node_names=[out_name] + ) from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 30, 30, 1), label=True) + quantizer = Quantization("fake_yaml.yaml") + dataset = quantizer.dataset("dummy", shape=(100, 30, 30, 1), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = output_graph_def @@ -129,10 +131,11 @@ def test_qdq_avgpool_fusion(self): found_quantized_avgpool = False for i in output_graph.graph_def.node: - if i.op == 'QuantizedAvgPool': + if i.op == "QuantizedAvgPool": found_quantized_avgpool = True break self.assertEqual(found_quantized_avgpool, True) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/test/tfnewapi/test_tf_spr_base_distributed_metrics.py b/test/tfnewapi/test_tf_spr_base_distributed_metrics.py index bf9f6db750b..80ce2041679 100644 --- a/test/tfnewapi/test_tf_spr_base_distributed_metrics.py +++ b/test/tfnewapi/test_tf_spr_base_distributed_metrics.py @@ -1,13 +1,16 @@ """Tests for the distributed metrics.""" import os -import signal +import re import shutil +import signal import subprocess import unittest -import re + import tensorflow as tf + from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 + def build_fake_ut(): fake_ut = """ import numpy as np @@ -56,7 +59,7 @@ def tearDownClass(cls): def test_mIOU(self): metrics = METRICS('tensorflow') miou = metrics['mIOU']() - miou.hvd = hvd + miou.hvd = hvd if hvd.rank() == 0: preds = np.array([0]) labels = np.array([0]) @@ -72,7 +75,7 @@ def test_mIOU(self): labels = np.array([0, 1]) else: preds = np.array([1, 1]) - labels = np.array([1, 1]) + labels = np.array([1, 1]) miou.update(preds, labels) self.assertAlmostEqual(miou.result(), 0.58333333) @@ -116,7 +119,7 @@ def test_tensorflow_F1(self): labels = [0, 1, 1, 1] else: preds = [1, 1, 1, 1, 1, 1] - labels = [1, 1, 1, 1, 1, 1] + labels = [1, 1, 1, 1, 1, 1] F1.update(preds, labels) self.assertEqual(F1.result(), 0.9) @@ -167,7 +170,7 @@ def test_tensorflow_topk(self): top1.hvd = hvd top2.hvd = hvd top3.hvd = hvd - + if hvd.rank() == 0: predicts = [[0, 0.2, 0.9, 0.3]] labels = [[0, 1, 0, 0]] @@ -188,7 +191,7 @@ def test_tensorflow_topk(self): self.assertEqual(top1.result(), 0.0) self.assertEqual(top2.result(), 0.5) self.assertEqual(top3.result(), 1) - + # test functionality of sparse label top1.reset() top2.reset() @@ -238,7 +241,7 @@ def test_tensorflow_mAP(self): ground_truth = [ np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([['a', 'b']]), np.array([[]]), np.array([b'000000397133.jpg']) ] @@ -253,7 +256,7 @@ def test_tensorflow_mAP(self): ground_truth = [ np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], [0.62706745, 0.35748824, 0.6892729 , 0.41513762]]]), - np.array([[b'dog', b'dog']]), + np.array([[b'dog', b'dog']]), np.array([[]]), np.array([b'000000397133.jpg']) ] @@ -261,7 +264,7 @@ def test_tensorflow_mAP(self): mAP.result() self.assertEqual(format(mAP.result(), '.5f'), '0.00000') - + detection = [ np.array([[[0.16117382, 0.59801614, 0.81511605, 0.7858219 ], [0.5589304 , 0. , 0.98301625, 0.520178 ], @@ -307,7 +310,7 @@ def test_tensorflow_mAP(self): [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), np.array([[]]), np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\\ - 56, 50, 56, 56, 79, 57, 81]]), + 56, 50, 56, 56, 79, 57, 81]]), np.array([b'000000397133.jpg']) ] ground_truth_2 = [ @@ -331,11 +334,11 @@ def test_tensorflow_mAP(self): ] mAP = metrics['mAP']() - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) self.assertEqual(format(mAP.result(), '.5f'), '0.18182') @@ -422,7 +425,7 @@ def test_tensorflow_VOCmAP(self): ground_truth = [ np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([['a', 'b']]), np.array([[]]), np.array([b'000000397133.jpg']) ] @@ -474,7 +477,7 @@ def test_tensorflow_VOCmAP(self): [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), np.array([[]]), np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\\ - 56, 50, 56, 56, 79, 57, 81]]), + 56, 50, 56, 56, 79, 57, 81]]), np.array([b'000000397133.jpg']) ] ground_truth_2 = [ @@ -496,11 +499,11 @@ def test_tensorflow_VOCmAP(self): np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), np.array([b'000000037777.jpg']) ] - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) self.assertEqual(format(mAP.result(), '.5f'), '0.18182') @@ -585,7 +588,7 @@ def test_tensorflow_COCOmAP(self): ground_truth = [ np.array([[[0.5633255 , 0.34003124, 0.69857144, 0.4009531 ], [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), - np.array([['a', 'b']]), + np.array([['a', 'b']]), np.array([[]]), np.array([b'000000397133.jpg']) ] @@ -636,7 +639,7 @@ def test_tensorflow_COCOmAP(self): [0.4763466 , 0.7769531 , 0.54334897, 0.9675937 ]]]), np.array([[]]), np.array([[44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51,\\ - 56, 50, 56, 56, 79, 57, 81]]), + 56, 50, 56, 56, 79, 57, 81]]), np.array([b'000000397133.jpg']) ] ground_truth_2 = [ @@ -658,11 +661,11 @@ def test_tensorflow_COCOmAP(self): np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), np.array([b'000000037777.jpg']) ] - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) self.assertEqual(format(mAP.result(), '.5f'), '0.14149') @@ -737,7 +740,7 @@ def test__accuracy(self): labels2 = [[1, 1]] predicts3 = [[[0, 1], [0, 1], [0, 1]]] labels3 = [[[1, 0], [1, 0], [1, 0]]] - predicts4 = [[0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] + predicts4 = [[0.1, 0.9], [0.3, 0.7], [0.4, 0.6]] labels4 = [1, 0, 0] metrics = METRICS('tensorflow') @@ -847,7 +850,7 @@ def test_mae(self): mae.update(predicts2, labels2) mae_result = mae.result() self.assertEqual(mae_result, 0.5) - + self.assertRaises(AssertionError, mae.update, [1], [1, 2]) self.assertRaises(AssertionError, mae.update, 1, [1,2]) self.assertRaises(AssertionError, mae.update, [1, 2], [1]) @@ -901,7 +904,7 @@ def test_loss(self): labels2 = [0] predicts3 = [0, 1] labels3 = [0, 0] - + metrics = METRICS('tensorflow') loss = metrics['Loss']() loss.hvd = hvd @@ -933,9 +936,10 @@ def test_loss(self): unittest.main() """ - with open('fake_ut.py', 'w', encoding="utf-8") as f: + with open("fake_ut.py", "w", encoding="utf-8") as f: f.write(fake_ut) + class TestDistributed(unittest.TestCase): @classmethod def setUpClass(cls): @@ -943,21 +947,22 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake_ut.py') - shutil.rmtree('./saved', ignore_errors = True) - shutil.rmtree('runs', ignore_errors = True) + os.remove("fake_ut.py") + shutil.rmtree("./saved", ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") def test_distributed(self): - distributed_cmd = 'horovodrun -np 2 python fake_ut.py' - p = subprocess.Popen(distributed_cmd, preexec_fn = os.setsid, stdout = subprocess.PIPE, - stderr = subprocess.PIPE, shell=True) # nosec + distributed_cmd = "horovodrun -np 2 python fake_ut.py" + p = subprocess.Popen( + distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True + ) # nosec try: out, error = p.communicate() - matches = re.findall(r'FAILED', error.decode('utf-8')) + matches = re.findall(r"FAILED", error.decode("utf-8")) self.assertEqual(matches, []) - matches = re.findall(r'OK', error.decode('utf-8')) + matches = re.findall(r"OK", error.decode("utf-8")) self.assertTrue(len(matches) > 0) except KeyboardInterrupt: diff --git a/test/tfnewapi/test_tf_spr_base_distributed_pruning.py b/test/tfnewapi/test_tf_spr_base_distributed_pruning.py index 81735fa2ab4..1029fcc7ea1 100644 --- a/test/tfnewapi/test_tf_spr_base_distributed_pruning.py +++ b/test/tfnewapi/test_tf_spr_base_distributed_pruning.py @@ -1,18 +1,21 @@ """Tests for the TensorFlow pruning with distributed training and inference.""" +import hashlib import os -import sys -import cpuinfo -from platform import platform, system -import signal +import re import shutil +import signal import subprocess -import unittest -import re -import hashlib +import sys import time +import unittest +from platform import platform, system + +import cpuinfo import tensorflow as tf -from neural_compressor.utils import logger + from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 +from neural_compressor.utils import logger + def build_fake_ut(): fake_ut = ''' @@ -333,7 +336,7 @@ def test_tensorflow_pruning(self): if __name__ == '__main__': unittest.main() ''' - with open('fake_ut.py', 'w', encoding="utf-8") as f: + with open("fake_ut.py", "w", encoding="utf-8") as f: f.write(fake_ut) build_fake_yaml() @@ -371,12 +374,14 @@ def build_fake_yaml(): metric: topk: 1 """ - with open('fake_yaml.yaml', 'w', encoding="utf-8") as f: + with open("fake_yaml.yaml", "w", encoding="utf-8") as f: f.write(fake_yaml) + def dir_md5_check(dir): files_list = [] md5_list = [] + def get_files_list(path, list_name): for file in sorted(os.listdir(path)): file_path = os.path.join(path, file) @@ -384,16 +389,19 @@ def get_files_list(path, list_name): get_files_list(file_path, list_name) else: list_name.append(file_path) + get_files_list(dir, files_list) for file_path in files_list: - with open(file_path, 'rb') as fp: + with open(file_path, "rb") as fp: data = fp.read() file_md5 = hashlib.md5(data).hexdigest() md5_list.append(file_md5) return md5_list + class TestDistributed(unittest.TestCase): - dst_path = './baseline_model' + dst_path = "./baseline_model" + @classmethod def setUpClass(cls): build_fake_ut() @@ -404,9 +412,12 @@ def setUpClass(cls): shutil.copytree("/tmp/.neural_compressor/inc_ut/resnet_v2/", os.getcwd(), dirs_exist_ok=True) if not os.path.exists(cls.dst_path): raise FileNotFoundError(f"'{cls.dst_path}' doesn't exist.") - elif dir_md5_check(cls.dst_path) != \ - ['65625fef42f44e6853d4d6d5e4188a49', 'a783396652bf62db3db4c9f647953175', - 'c7259753419d9fc053df5b2059aef8c0', '77f2a1045cffee9f6a43f2594a5627ba']: + elif dir_md5_check(cls.dst_path) != [ + "65625fef42f44e6853d4d6d5e4188a49", + "a783396652bf62db3db4c9f647953175", + "c7259753419d9fc053df5b2059aef8c0", + "77f2a1045cffee9f6a43f2594a5627ba", + ]: logger.warning("resnet_v2 baseline_model md5 verification failed.") raise ValueError(f"'{cls.dst_path}' md5 verification failed.") else: @@ -414,10 +425,10 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - os.remove('fake_ut.py') - os.remove('fake_yaml.yaml') - shutil.rmtree('nc_workspace', ignore_errors=True) - shutil.rmtree('baseline_model', ignore_errors=True) + os.remove("fake_ut.py") + os.remove("fake_yaml.yaml") + shutil.rmtree("nc_workspace", ignore_errors=True) + shutil.rmtree("baseline_model", ignore_errors=True) def setUp(self): logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -426,19 +437,20 @@ def setUp(self): def tearDown(self): logger.info(f"{self._testMethodName} done.\n") - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") def test_tf_distributed_pruning(self): - distributed_cmd = 'horovodrun -np 2 python fake_ut.py' - p = subprocess.Popen(distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, shell=True) + distributed_cmd = "horovodrun -np 2 python fake_ut.py" + p = subprocess.Popen( + distributed_cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True + ) try: out, _ = p.communicate() for line in out.splitlines(): print(line.decode().strip()) - matches = re.findall(r'FAILED', out.decode('utf-8')) + matches = re.findall(r"FAILED", out.decode("utf-8")) self.assertEqual(matches, []) - matches = re.findall(r'OK', out.decode('utf-8')) + matches = re.findall(r"OK", out.decode("utf-8")) self.assertTrue(len(matches) > 0) except KeyboardInterrupt: os.killpg(os.getpgid(p.pid), signal.SIGKILL) diff --git a/test/tfnewapi/test_tf_spr_base_distributed_tf_dataloader.py b/test/tfnewapi/test_tf_spr_base_distributed_tf_dataloader.py index 88f249ba522..62f105c76b2 100644 --- a/test/tfnewapi/test_tf_spr_base_distributed_tf_dataloader.py +++ b/test/tfnewapi/test_tf_spr_base_distributed_tf_dataloader.py @@ -1,21 +1,24 @@ """Tests for Distributed TensorFlow Dataloader.""" -import numpy as np import collections import json import os +import shutil import sys -import cpuinfo import unittest -import shutil + +import cpuinfo +import numpy as np import tensorflow as tf + from neural_compressor import data -from neural_compressor.utils.create_obj_from_config import create_dataset, create_dataloader +from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 +from neural_compressor.data import DATALOADERS, TRANSFORMS, Datasets from neural_compressor.data.dataloaders.dataloader import DataLoader -from neural_compressor.data import Datasets, DATALOADERS, TRANSFORMS from neural_compressor.utils import logger -from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 +from neural_compressor.utils.create_obj_from_config import create_dataloader, create_dataset + -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") class TestDistributedTFDataDataloader(unittest.TestCase): def setUp(self): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" @@ -30,14 +33,16 @@ def tearDown(self): logger.info(f"{self._testMethodName} done.\n") def check_tf_dataset_with_batch_raise(self, batch_size, last_batch, distributed): - dataset_with_batch = tf.data.Dataset.from_tensors\ - ((tf.ones([3, 224, 224]), tf.ones([1000]))).repeat(600).batch(2) - dataloader = DATALOADERS['tensorflow']\ - (dataset_with_batch, batch_size=batch_size, last_batch=last_batch, distributed=distributed) + dataset_with_batch = ( + tf.data.Dataset.from_tensors((tf.ones([3, 224, 224]), tf.ones([1000]))).repeat(600).batch(2) + ) + dataloader = DATALOADERS["tensorflow"]( + dataset_with_batch, batch_size=batch_size, last_batch=last_batch, distributed=distributed + ) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -46,12 +51,13 @@ def check_tf_dataset_with_batch_raise(self, batch_size, last_batch, distributed) self.count += 1 def check_distributed_raise(self, batch_size, last_batch, distributed): - dataloader = DATALOADERS['tensorflow']\ - (self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed) + dataloader = DATALOADERS["tensorflow"]( + self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed + ) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -60,36 +66,36 @@ def check_distributed_raise(self, batch_size, last_batch, distributed): self.count += 1 def test_dis_tf_data_dataloader_1(self): - self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 32, 'rollover', True) - + self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 32, "rollover", True) + def test_dis_tf_data_dataloader_2(self): - self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 32, 'no_rollover', True) + self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 32, "no_rollover", True) def test_dis_tf_data_dataloader_3(self): - self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 1, 'rollover', True) - + self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 1, "rollover", True) + def test_dis_tf_data_dataloader_4(self): - self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 1, 'no_rollover', True) + self.assertRaises(TypeError, self.check_tf_dataset_with_batch_raise, 1, "no_rollover", True) def test_dis_tf_data_dataloader_5(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'rollover', True) - + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "rollover", True) + def test_dis_tf_data_dataloader_6(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "no_rollover", True) def test_dis_tf_data_dataloader_7(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'rollover', True) - + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "rollover", True) + def test_dis_tf_data_dataloader_8(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "no_rollover", True) def test_dis_tf_data_dataloader_9(self): batch_size = 32 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -99,11 +105,11 @@ def test_dis_tf_data_dataloader_9(self): def test_dis_tf_data_dataloader_10(self): batch_size = 32 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='no_rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="no_rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -113,11 +119,11 @@ def test_dis_tf_data_dataloader_10(self): def test_dis_tf_data_dataloader_11(self): batch_size = 1 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -127,11 +133,11 @@ def test_dis_tf_data_dataloader_11(self): def test_dis_tf_data_dataloader_12(self): batch_size = 1 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='no_rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="no_rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -141,11 +147,11 @@ def test_dis_tf_data_dataloader_12(self): def test_dis_tf_data_dataloader_13(self): batch_size = 600 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -155,11 +161,11 @@ def test_dis_tf_data_dataloader_13(self): def test_dis_tf_data_dataloader_14(self): batch_size = 600 - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch='no_rollover') + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch="no_rollover") for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (3, 224, 224)) @@ -167,13 +173,14 @@ def test_dis_tf_data_dataloader_14(self): self.assertIsInstance(x, np.ndarray) self.count += 1 -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") class TestDefaultDataLoaderSequentialSampler(unittest.TestCase): @classmethod def tearDownClass(cls): - if os.path.exists('minist'): - shutil.rmtree('minist') - + if os.path.exists("minist"): + shutil.rmtree("minist") + def setUp(self): self.count = 0 logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -184,30 +191,30 @@ def tearDown(self): def check_get_len_raise(self, batch_size, last_batch, distributed): dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': last_batch, - 'distributed': distributed + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": last_batch, + "distributed": distributed, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) len_dataloader = len(dataloader) def check_distributed_raise(self, batch_size, last_batch, distributed): dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': last_batch, - 'distributed': distributed + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": last_batch, + "distributed": distributed, } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -216,43 +223,43 @@ def check_distributed_raise(self, batch_size, last_batch, distributed): self.count += 1 def test_sequential_sampler1(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, "rollover", True) def test_sequential_sampler2(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, "no_rollover", True) def test_sequential_sampler3(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, "rollover", True) def test_sequential_sampler4(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, "no_rollover", True) def test_sequential_sampler5(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "rollover", True) def test_sequential_sampler6(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "no_rollover", True) def test_sequential_sampler7(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "rollover", True) def test_sequential_sampler8(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "no_rollover", True) def test_sequential_sampler9(self): batch_size = 3332 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -263,17 +270,17 @@ def test_sequential_sampler9(self): def test_sequential_sampler10(self): batch_size = 3332 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'no_rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "no_rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -284,17 +291,17 @@ def test_sequential_sampler10(self): def test_sequential_sampler11(self): batch_size = 1 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -305,17 +312,17 @@ def test_sequential_sampler11(self): def test_sequential_sampler12(self): batch_size = 1 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'no_rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "no_rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -326,17 +333,17 @@ def test_sequential_sampler12(self): def test_sequential_sampler13(self): batch_size = 10000 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -347,17 +354,17 @@ def test_sequential_sampler13(self): def test_sequential_sampler14(self): batch_size = 10000 dataloader_args = { - 'batch_size': batch_size, - 'dataset': {"MNIST": {'root': './minist', 'train':False, 'download':True}}, - 'transform': {'Resize': {'size': 24}}, - 'filter': None, - 'last_batch': 'no_rollover' + "batch_size": batch_size, + "dataset": {"MNIST": {"root": "./minist", "train": False, "download": True}}, + "transform": {"Resize": {"size": 24}}, + "filter": None, + "last_batch": "no_rollover", } - dataloader = create_dataloader('tensorflow', dataloader_args) + dataloader = create_dataloader("tensorflow", dataloader_args) for batch in dataloader: x, y = batch - if self.count < len(dataloader)-1: - self.assertEqual(len(x), batch_size) + if self.count < len(dataloader) - 1: + self.assertEqual(len(x), batch_size) else: self.assertTrue(len(x) == dataloader.dis_dataset_len % batch_size or len(x) == batch_size) self.assertEqual(x[0].shape, (24, 24)) @@ -365,17 +372,18 @@ def test_sequential_sampler14(self): self.assertIsInstance(x, np.ndarray) self.count += 1 -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") class TestDefaultDataLoaderIterableSampler(unittest.TestCase): class iter_dataset(object): def __iter__(self): sample_size = 250 - for i in range(1, sample_size+1): + for i in range(1, sample_size + 1): yield np.array([i]) def setUp(self): self.rank = 0 - self.size = 1 + self.size = 1 self.count = 1 self.dataset = self.iter_dataset() logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -385,160 +393,210 @@ def tearDown(self): logger.info(f"{self._testMethodName} done.\n") def check_get_len_raise(self, batch_size, last_batch, distributed): - dataloader = DATALOADERS['tensorflow']\ - (self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed) + dataloader = DATALOADERS["tensorflow"]( + self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed + ) len_dataloader = len(dataloader) def check_distributed_raise(self, batch_size, last_batch, distributed): - dataloader = DATALOADERS['tensorflow']\ - (self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed) + dataloader = DATALOADERS["tensorflow"]( + self.dataset, batch_size=batch_size, last_batch=last_batch, distributed=distributed + ) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 - + def test_iterable_sampler1(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, "rollover", True) def test_iterable_sampler2(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 32, "no_rollover", True) def test_iterable_sampler3(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, "rollover", True) def test_iterable_sampler4(self): - self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_get_len_raise, 1, "no_rollover", True) def test_iterable_sampler5(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "rollover", True) def test_iterable_sampler6(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 32, "no_rollover", True) def test_iterable_sampler7(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "rollover", True) def test_iterable_sampler8(self): - self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, 'no_rollover', True) + self.assertRaises(EnvironmentError, self.check_distributed_raise, 1, "no_rollover", True) def test_iterable_sampler9(self): batch_size = 128 - last_batch = 'rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler10(self): batch_size = 128 - last_batch = 'no_rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "no_rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler11(self): batch_size = 1 - last_batch = 'rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler12(self): batch_size = 1 - last_batch = 'no_rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "no_rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler13(self): batch_size = 1000 - last_batch = 'rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 def test_iterable_sampler14(self): batch_size = 1000 - last_batch = 'no_rollover' - dataloader = DATALOADERS['tensorflow'](self.dataset, batch_size=batch_size, last_batch=last_batch) + last_batch = "no_rollover" + dataloader = DATALOADERS["tensorflow"](self.dataset, batch_size=batch_size, last_batch=last_batch) for batch in dataloader: if self.count < len(dataloader): self.assertEqual(len(batch), batch_size) - self.assertEqual(self.count*batch_size*self.size-self.size+self.rank+1, batch[-1][0]) + self.assertEqual(self.count * batch_size * self.size - self.size + self.rank + 1, batch[-1][0]) else: self.assertTrue(len(batch) == dataloader.dis_dataset_len % batch_size or len(batch) == batch_size) - self.assertEqual(((self.count-1)*batch_size+len(batch)-1)*self.size+self.rank+1, batch[-1][0]) + self.assertEqual( + ((self.count - 1) * batch_size + len(batch) - 1) * self.size + self.rank + 1, batch[-1][0] + ) break self.count += 1 -@unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.10.0'), "Only test TF 2.10.0 or above") + +@unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.10.0"), "Only test TF 2.10.0 or above") class TestTensorflowBertDataLoader(unittest.TestCase): - label = [{ - "paragraphs0":[ - {'context': - 'Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.', - 'qas': [{ - 'answers': [ - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}, - {'answer_start': 177, 'text': 'Denver Broncos'}], - 'question': 'Which NFL team represented the AFC at Super Bowl 50?', - 'id': '56be4db0acb8001400a502ec'}] - } - ] - }] + label = [ + { + "paragraphs0": [ + { + "context": "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season.", + "qas": [ + { + "answers": [ + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + {"answer_start": 177, "text": "Denver Broncos"}, + ], + "question": "Which NFL team represented the AFC at Super Bowl 50?", + "id": "56be4db0acb8001400a502ec", + } + ], + } + ] + } + ] unique_id = 1000000000 - input_ids = [101, 2029, 5088, 2136, 3421, 1996, 10511, 2012, 3565, 4605, 2753, 1029, 102, 3565, 4605, 2753,\ - 1007, 2005, 1996, 2325, 2161, 1012, 1996, 2137, 2374, 3034, 1006] + input_ids = [ + 101, + 2029, + 5088, + 2136, + 3421, + 1996, + 10511, + 2012, + 3565, + 4605, + 2753, + 1029, + 102, + 3565, + 4605, + 2753, + 1007, + 2005, + 1996, + 2325, + 2161, + 1012, + 1996, + 2137, + 2374, + 3034, + 1006, + ] input_mask = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] segment_ids = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - fake_json = json.dumps({'data': label, 'version': '1.1'}) - with open('dev.json', 'w') as f: - f.write(fake_json) - + fake_json = json.dumps({"data": label, "version": "1.1"}) + with open("dev.json", "w") as f: + f.write(fake_json) + @classmethod def tearDownClass(cls): - os.remove('test.record') - os.remove('dev.json') + os.remove("test.record") + os.remove("dev.json") def setUp(self): logger.info(f"CPU: {cpuinfo.get_cpu_info()['brand_raw']}") @@ -548,79 +606,60 @@ def tearDown(self): logger.info(f"{self._testMethodName} done.\n") def check_not_implement(self, batch_size, distributed): - with tf.io.TFRecordWriter('./test.record') as writer: + with tf.io.TFRecordWriter("./test.record") as writer: features = collections.OrderedDict() - features["unique_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list([self.unique_id]))) - features["input_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_ids))) - features["input_mask"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_mask))) - features["segment_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.segment_ids))) + features["unique_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list([self.unique_id]))) + features["input_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_ids))) + features["input_mask"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_mask))) + features["segment_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.segment_ids))) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', - {'bert':{'root':'test.record', 'label_file': './dev.json'}}, - None, - None) - dataloader = DATALOADERS['tensorflow']\ - (dataset=eval_dataset, batch_size=batch_size, distributed=distributed) + "tensorflow", {"bert": {"root": "test.record", "label_file": "./dev.json"}}, None, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=batch_size, distributed=distributed) def test_tf_bert_dataloader_1(self): self.assertRaises(NotImplementedError, self.check_not_implement, 32, True) - + def test_tf_bert_dataloader_2(self): batch_size = 128 - with tf.io.TFRecordWriter('./test.record') as writer: + with tf.io.TFRecordWriter("./test.record") as writer: features = collections.OrderedDict() - features["unique_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list([self.unique_id]))) - features["input_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_ids))) - features["input_mask"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_mask))) - features["segment_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.segment_ids))) + features["unique_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list([self.unique_id]))) + features["input_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_ids))) + features["input_mask"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_mask))) + features["segment_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.segment_ids))) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', - {'bert':{'root':'test.record', 'label_file': './dev.json'}}, - None, - None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=batch_size) + "tensorflow", {"bert": {"root": "test.record", "label_file": "./dev.json"}}, None, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=batch_size) for inputs, labels in dataloader: - self.assertEqual(inputs[0], 'test.record') + self.assertEqual(inputs[0], "test.record") self.assertEqual(inputs[1], batch_size) self.assertEqual(len(labels), 1) def test_tf_bert_dataloader_3(self): batch_size = 1 - with tf.io.TFRecordWriter('./test.record') as writer: + with tf.io.TFRecordWriter("./test.record") as writer: features = collections.OrderedDict() - features["unique_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list([self.unique_id]))) - features["input_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_ids))) - features["input_mask"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.input_mask))) - features["segment_ids"] = tf.train.Feature( - int64_list=tf.train.Int64List(value=list(self.segment_ids))) + features["unique_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list([self.unique_id]))) + features["input_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_ids))) + features["input_mask"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.input_mask))) + features["segment_ids"] = tf.train.Feature(int64_list=tf.train.Int64List(value=list(self.segment_ids))) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) eval_dataset = create_dataset( - 'tensorflow', - {'bert':{'root':'test.record', 'label_file': './dev.json'}}, - None, - None) - dataloader = DATALOADERS['tensorflow'](dataset=eval_dataset, batch_size=batch_size) + "tensorflow", {"bert": {"root": "test.record", "label_file": "./dev.json"}}, None, None + ) + dataloader = DATALOADERS["tensorflow"](dataset=eval_dataset, batch_size=batch_size) for inputs, labels in dataloader: - self.assertEqual(inputs[0], 'test.record') + self.assertEqual(inputs[0], "test.record") self.assertEqual(inputs[1], batch_size) self.assertEqual(len(labels), 1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/utils/test_huggingface.py b/test/utils/test_huggingface.py index 07893394255..634f17b96a6 100644 --- a/test/utils/test_huggingface.py +++ b/test/utils/test_huggingface.py @@ -1,11 +1,14 @@ -"""Tests for downloading int8 model from huggingface model hub""" +"""Tests for downloading int8 model from huggingface model hub.""" +import shutil import unittest + import torch -import shutil import transformers + from neural_compressor.model import Model from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream + class TestQuantization(unittest.TestCase): @classmethod def tearDownClass(self): @@ -13,31 +16,33 @@ def tearDownClass(self): def test_int8_huggingface_model(self): from neural_compressor.utils.load_huggingface import OptimizedModel - model_name_or_path = 'Intel/distilbert-base-uncased-finetuned-sst-2-english-int8-static' + + model_name_or_path = "Intel/distilbert-base-uncased-finetuned-sst-2-english-int8-static" tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path) model = OptimizedModel.from_pretrained( - model_name_or_path, - from_tf=bool(".ckpt" in model_name_or_path), - config=None, - cache_dir=None, - revision=None, - use_auth_token=None, - ) + model_name_or_path, + from_tf=bool(".ckpt" in model_name_or_path), + config=None, + cache_dir=None, + revision=None, + use_auth_token=None, + ) stat = model.state_dict() - self.assertTrue(stat['classifier.module._packed_params.dtype'] == torch.qint8) + self.assertTrue(stat["classifier.module._packed_params.dtype"] == torch.qint8) from huggingface_hub import hf_hub_download + resolved_weights_file = hf_hub_download( repo_id=model_name_or_path, - filename='pytorch_model.bin', + filename="pytorch_model.bin", ) - q_config = torch.load(resolved_weights_file)['best_configure'] + q_config = torch.load(resolved_weights_file)["best_configure"] inc_model = Model(model) inc_model.q_config = q_config - save_for_huggingface_upstream(inc_model, tokenizer, 'saved_results') - load_model = OptimizedModel.from_pretrained('saved_results') + save_for_huggingface_upstream(inc_model, tokenizer, "saved_results") + load_model = OptimizedModel.from_pretrained("saved_results") if __name__ == "__main__": diff --git a/test/utils/test_layer_histogram.py b/test/utils/test_layer_histogram.py index fedb26f73da..28397654ff6 100644 --- a/test/utils/test_layer_histogram.py +++ b/test/utils/test_layer_histogram.py @@ -1,11 +1,14 @@ """Tests for collecting layer histogram.""" -from neural_compressor.utils.collect_layer_histogram import LayerHistogramCollector +import unittest from collections import OrderedDict -from neural_compressor.utils import logger + import numpy as np import torch import torch.nn as nn -import unittest + +from neural_compressor.utils import logger +from neural_compressor.utils.collect_layer_histogram import LayerHistogramCollector + class InvertedResidual(nn.Module): def __init__(self, inp, oup, stride, expand_ratio): @@ -42,25 +45,18 @@ def forward(self, x): else: return self.conv(x) + class BuildFakeModel(nn.Module): def conv_1x1_bn(self, inp, oup): - return nn.Sequential( - nn.Conv2d(inp, oup, 1, 1, 0, bias=False), - nn.BatchNorm2d(oup), - nn.ReLU6(inplace=True) - ) + return nn.Sequential(nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True)) def conv_bn(self, inp, oup, stride): - return nn.Sequential( - nn.Conv2d(inp, oup, 3, stride, 1, bias=False), - nn.BatchNorm2d(oup), - nn.ReLU6(inplace=True) - ) + return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True)) def make_divisible(self, x, divisor=8): - return int(np.ceil(x * 1. / divisor) * divisor) + return int(np.ceil(x * 1.0 / divisor) * divisor) - def __init__(self, n_class=1000, input_size=224, width_mult=1.): + def __init__(self, n_class=1000, input_size=224, width_mult=1.0): super().__init__() block = InvertedResidual input_channel = 32 @@ -96,6 +92,7 @@ def forward(self, x): x = self.classifier(x) return x + class CollectLayerHistogram(unittest.TestCase): def setUp(self): model = BuildFakeModel(width_mult=1) @@ -104,20 +101,23 @@ def setUp(self): for key, value in model.state_dict().items(): if not value.ndim: value = np.expand_dims(value, axis=0) - if i>200: + if i > 200: pass else: include_layer[key] = np.array(value, dtype=np.float32) layer_tensor[key] = np.array(value, dtype=np.float32) i += 1 - self.layer_histogram_collector = LayerHistogramCollector \ - (num_bins=8001, layer_tensor=layer_tensor, include_layer=include_layer, logger=logger) - + self.layer_histogram_collector = LayerHistogramCollector( + num_bins=8001, layer_tensor=layer_tensor, include_layer=include_layer, logger=logger + ) + def test_layer_histogram(self): self.layer_histogram_collector.collect() - self.assertEqual(self.layer_histogram_collector.layer_tensor.keys() \ - & self.layer_histogram_collector.include_layer.keys(), \ - self.layer_histogram_collector.hist_dict.keys()) + self.assertEqual( + self.layer_histogram_collector.layer_tensor.keys() & self.layer_histogram_collector.include_layer.keys(), + self.layer_histogram_collector.hist_dict.keys(), + ) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/utils/test_logger.py b/test/utils/test_logger.py index 22d81c93265..c870038fb87 100644 --- a/test/utils/test_logger.py +++ b/test/utils/test_logger.py @@ -1,7 +1,9 @@ """Tests for logging utilities.""" -from neural_compressor.utils import logger import unittest +from neural_compressor.utils import logger + + class TestLogger(unittest.TestCase): def test_logger(self): logger.log(0, "call logger log function.") @@ -20,15 +22,13 @@ def test_logger(self): logger.warning({"msg": "call logger warning function"}) logger.warning(["call logger warning function", "done"]) logger.warning(("call logger warning function", "done")) - logger.warning({"msg": {('bert', "embedding"): {'weight': {'dtype': ['unint8', 'int8']}}}}) - logger.warning({"msg": {('bert', "embedding"): {'op': ('a', 'b')}}}) + logger.warning({"msg": {("bert", "embedding"): {"weight": {"dtype": ["unint8", "int8"]}}}}) + logger.warning({"msg": {("bert", "embedding"): {"op": ("a", "b")}}}) # the following log will not be prettified logger.warning([{"msg": "call logger warning function"}, {"msg2": "done"}]) logger.warning(({"msg": "call logger warning function"}, {"msg2": "done"})) - logger.warning(({"msg": [{"sub_msg":"call logger"}, - {"sub_msg2":"call warning function"}]}, - {"msg2": "done"})) + logger.warning(({"msg": [{"sub_msg": "call logger"}, {"sub_msg2": "call warning function"}]}, {"msg2": "done"})) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main()